# Encoding:utf-8
Import Stringio
From Apihelper import info, Info_save
From lxml import etree, HTML
From Lxml.html.clean import Cleaner
strhtml = "' \
<script type= "Text/javascript" src= "Evil-site" ></script>
<link rel= "Alternate" type= "Text/rss" src= "Evil-rss" >
<style>
Body {Background-image:url (javascript:do_evil)};
div {color:expression (evil)};
</style>
<body onload= "evil_function ()" >
<!--I am interpreted for evil! -
<a href= "javascript:evil_function ()" >a link</a>
<a href= "#" onclick= "Evil_function ()" >another link</a>
<p onclick= "evil_function ()" >a paragraph</p>
<div style= "Display:none" >secret evil!</div>
<object> of evil! </object>
<iframe src= "Evil-site" ></iframe>
<form action= "Evil-site" >
Password: <input type= "Password" name= "Password" >
</form>
<blink>annoying evil!</blink>
<a href= "evil-site" >spam spam spam!</a>
<image src= "evil!" >
<div id= ' nav ' class= ' nav ' >this is nav</div>
</body>
# strhtml = ' # <body><div>aaa</div></body>
#
Etree. DEBUG = 1
Print etree. Libxml_version
Utf8_parser=html. Htmlparser (encoding= ' UTF8 ')
# f = open (xmlfile)
# XML = F.read ()
# F.close ()
Doc=html.fromstring (strHTML, Parser=utf8_parser)
# weather = Doc.xpath (' Body/object ')
# Doc.find ('.//body/object '). Drop_tag ()
# Doc.find ('.//body/object '). Drop_tree ()
Doc.find ('.//body '). Drop_tree ()
# print ' \ r \ n '. Join (DOC)
# print Info_save (DOC)
# print html.tostring (DOC)
Lxml.html Delete the node tree and tag pair