SAX將dd.xml解析成html。當然啦,如果得到了xml對應的xsl檔案可以直接用libxml2將其轉換成html。
複製代碼 代碼如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程式:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 語言:Python 2.7
# 功能:將xml解析成對應的html
# 註解:該程式用xml.sax模組的parse函數解析XML,並建置事件
# 繼承ContentHandler並重寫其事件處理函數
# Dispatcher主要用於相應標籤的起始、結束事件的派發
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse
class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()
def startElement(self, name, attrs):
self.dispatch('start', name, attrs)
def endElement(self, name):
self.dispatch('end', name)
class Website(Dispatcher, ContentHandler):
def __init__(self):
self.fout = open('ddt_SAX.html', 'w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):
self.fout.write('''\n\n RSS-''')<br><br> def endChannel(self):<BR> self.fout.write('''<BR> <tr><td height="20"></td></tr><BR> </table><BR> </center><BR> <BR> </body><BR> </html><BR> ''')<BR> self.fout.close()</p><P> def characters(self, chars):<BR> if chars.strip():<BR> #chars = chars.strip()<BR> self.temp += chars<BR> #print self.temp<br><br> <BR> def startTitle(self):<br><br> if self.item:<BR> self.fout.write('''<BR> <tr bgcolor="#eeeeee">\n<td style="padding-top:5px;padding-left:5px;" height="30">\n<B><BR> ''')<br><br> def endTitle(self):<br><br> if not self.imagein and not self.item:<BR> self.title = self.temp<BR> self.temp = ''<BR> self.fout.write(self.title.encode('gb2312'))<br><br> #self.title = self.temp<BR> self.fout.write('''<BR> \n\n\n
\n
\n
\n
\n
\n ''')
if self.item: self.title = self.temp self.temp = '' self.fout.write(self.title.encode('gb2312')) self.fout.write(''' |
''') def startImage(self): self.imagein = True def endImage(self): self.imagein = False
def startLink(self): if self.imagein: self.fout.write(''' def endLink(self): self.link = self.temp self.temp = '' if self.imagein: self.fout.write(self.link.encode('gb2312')) self.fout.write('''" target="_blank">\n ''') elif self.item: #self.link = self.temp pass else: self.fout.write(self.link) self.fout.write(''' " target=" _blank "> ''') self.fout.write(self.title.encode('gb2312')) self.fout.write(''' |
''') self.fout.write(self.description.encode('gb2312')) self.fout.write(''' |
| ¸´ÖÆ´ËÒ³Á´½Ó ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£© |
def startUrl(self):
if self.imagein:
self.fout.write(''' def endUrl(self):
self.url = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.url.encode('gb2312'))
self.fout.write('''" border="0">\n
#程式入口
if __name__ == '__main__':
parse('ddt.xml', Website())
''')
''') if self.item: #self.url = self.temp pass def defaultStart(self): pass def defaultEnd(self): self.temp = '' def startDescription(self): pass def endDescription(self): self.description = self.temp self.temp = '' if self.item: #self.fout.write('¡¡¡¡') self.fout.write(self.description.encode('gb2312'))
def endGuid(self): self.guid = self.temp def endPubdate(self): if not self.temp.startswith('http'): self.pubdate = self.temp self.temp = '' else: self.pubdate = '' def startItem(self): self.item = True def endItem(self): self.item = False self.fout.write(''' |
self.fout.write(self.link) self.fout.write(''' " target="_blank"> ''') self.fout.write(self.guid) self.fout.write(''' |
''') self.fout.write(self.pubdate) self.fout.write(''' |
|
''')