使用python解析xml成對應的html樣本分享

來源:互聯網
上載者:User
SAX將dd.xml解析成html。當然啦,如果得到了xml對應的xsl檔案可以直接用libxml2將其轉換成html。

複製代碼 代碼如下:


#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程式:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 語言:Python 2.7
# 功能:將xml解析成對應的html
# 註解:該程式用xml.sax模組的parse函數解析XML,並建置事件
# 繼承ContentHandler並重寫其事件處理函數
# Dispatcher主要用於相應標籤的起始、結束事件的派發
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse

class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()

def startElement(self, name, attrs):
self.dispatch('start', name, attrs)

def endElement(self, name):
self.dispatch('end', name)

class Website(Dispatcher, ContentHandler):

def __init__(self):
self.fout = open('ddt_SAX.html', 'w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):

self.fout.write('''\n\n RSS-''')<br><br> def endChannel(self):<BR> self.fout.write('''<BR> <tr><td height="20"></td></tr><BR> </table><BR> </center><BR> <BR> </body><BR> </html><BR> ''')<BR> self.fout.close()</p><P> def characters(self, chars):<BR> if chars.strip():<BR> #chars = chars.strip()<BR> self.temp += chars<BR> #print self.temp<br><br> <BR> def startTitle(self):<br><br> if self.item:<BR> self.fout.write('''<BR> <tr bgcolor="#eeeeee">\n<td style="padding-top:5px;padding-left:5px;" height="30">\n<B><BR> ''')<br><br> def endTitle(self):<br><br> if not self.imagein and not self.item:<BR> self.title = self.temp<BR> self.temp = ''<BR> self.fout.write(self.title.encode('gb2312'))<br><br> #self.title = self.temp<BR> self.fout.write('''<BR> \n\n\n

\n
\n









\n \n
\n
''')

if self.item:
self.title = self.temp
self.temp = ''
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''


''')

def startImage(self):
self.imagein = True

def endImage(self):
self.imagein = False

def startLink(self):
if self.imagein:
self.fout.write('''

def endLink(self):
self.link = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.link.encode('gb2312'))
self.fout.write('''" target="_blank">\n ''')
elif self.item:
#self.link = self.temp
pass
else:
self.fout.write(self.link)
self.fout.write(''' " target="
_blank
"> ''')
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''


''')
self.fout.write(self.description.encode('gb2312'))
self.fout.write('''
¸´ÖÆ´ËÒ³Á´½Ó ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£©


def startUrl(self):
if self.imagein:
self.fout.write(''' def endUrl(self):
self.url = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.url.encode('gb2312'))
self.fout.write('''" border="0">\n










#程式入口
if __name__ == '__main__':
parse('ddt.xml', Website())

''') ''')
  • ''')
    if self.item:
    #self.url = self.temp
    pass

    def defaultStart(self):
    pass
    def defaultEnd(self):
    self.temp = ''
    def startDescription(self):
    pass
    def endDescription(self):
    self.description = self.temp
    self.temp = ''
    if self.item:
    #self.fout.write('¡¡¡¡')
    self.fout.write(self.description.encode('gb2312'))

    def endGuid(self):
    self.guid = self.temp
    def endPubdate(self):
    if not self.temp.startswith('http'):
    self.pubdate = self.temp
    self.temp = ''
    else:
    self.pubdate = ''
    def startItem(self):
    self.item = True
    def endItem(self):
    self.item = False
    self.fout.write('''


    self.fout.write(self.link)
    self.fout.write(''' " target="_blank"> ''')
    self.fout.write(self.guid)
    self.fout.write('''

    ''')
    self.fout.write(self.pubdate)
    self.fout.write('''

    聯繫我們

    該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

    如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.