This article mainly introduces how to use python to parse xml into the corresponding html example. if you need it, you can refer to SAX to parse dd. xml into html. Of course, if you get the xsl file corresponding to xml, you can directly use libxml2 to convert it into html.
The code is as follows:
#! /Usr/bin/env python
#-*-Coding: UTF-8 -*-
#---------------------------------------
# Program: XML parser
# Version 01.0
# Author: mupeng
# Date: 2013-12-18
# Programming language: Python 2.7
# Function: parse xml into the corresponding html
# Annotation: This program uses the parse function of the xml. sax module to parse XML and generate events.
# Inherit ContentHandler and rewrite its event handler function
# Dispatcher is mainly used to distribute the start and end events of corresponding tags.
#---------------------------------------
From xml. sax. handler import ContentHandler
From xml. sax import parse
Class Dispatcher:
Def dispatch (self, prefix, name, attrs = None ):
Mname = prefix + name. capitalize ()
Dname = 'default' + prefix. capitalize ()
Method = getattr (self, mname, None)
If callable (method): args = ()
Else:
Method = getattr (self, dname, None)
# Args = name
# If prefix = 'start': args + = attrs
If callable (method): method ()
Def startElement (self, name, attrs ):
Self. dispatch ('start', name, attrs)
Def endElement (self, name ):
Self. dispatch ('end', name)
Class Website (Dispatcher, ContentHandler ):
Def _ init _ (self ):
Self. fout = open('ddt_SAX.html ', 'w ')
Self. imagein = False
Self. desflag = False
Self. item = False
Self. title =''
Self. link =''
Self. guid =''
Self. url =''
Self. pubdate =''
Self. description =''
Self. temp =''
Self. prx =''
Def startChannel (self ):
Self. fout. write ('''\ N\ NRSS-''') <br> def endChannel (self): <BR> self. fout. write (''' <BR> <tr> <td height = "20"> </td> </tr> <BR> </table> <BR> </center> <BR> script <BR> function GetTimeDiff (str) <BR >{< BR> if (str = '') <BR >{< BR> return ''; <BR >}</P> <P> var pubDate = new Date (str); <BR> var nowDate = new Date (); <BR> var diffMilSeconds = nowDate. valueOf ()-pubDate. valueOf (); <BR> var days = diffMilSeconds/86400000; <BR> days = parseInt (days ); </P> <P> diffMilSeconds = diffMilSeconds-(days * 86400000); <BR> var hours = diffMilSeconds/3600000; <BR> hours = parseInt (hours ); </P> <P> diffMilSeconds = diffMilSeconds-(hours * 3600000); <BR> var minutes = diffMilSeconds/60000; <BR> minutes = parseInt (minutes ); </P> <P> diffMilSeconds = diffMilSeconds-(minutes * 60000); <BR> var seconds = diffMilSeconds/1000; <BR> seconds = parseInt (seconds ); <br> var returnStr = "± response©· ² ¼ Ê ± ä 䣰 "+ pubDate. toLocaleString (); </P> <P> if (days> 0) <BR >{< BR> returnStr = returnStr + "£°{}à Ö öô ú" + days +" {}" + hours + "{{}±" + minutes + "· ööó £.©"; <BR >}< BR> else if (hours> 0) <BR >{< BR> returnStr = returnStr + "£°{}à Ï öô ú" + hours +" {{}± "+ minutes +" · ööó £.©"; <BR >}< BR> else if (minutes> 0) <BR >{< BR> returnStr = returnStr + "£°à à ë Ö öô ú" + minutes +" · ööó £.©"; <BR >}</P> <P> return returnStr; </P> <P >}</P> <P> function GetSpanText () <BR >{< BR> var pubDate; <BR> var pubDateArray; <BR> var spanArray = document. getElementsByTagName ("span"); </P> <P> for (var I = 0; I <spanArray. length; I ++) <BR >{< BR> pubDate = spanArray [I]. innerHTML; <BR> document. getElementsByTagName ("span") [I]. innerHTML = GetTimeDiff (pubDate); <BR >}</P> <P> GetSpanText (); <BR> script <BR> </body> <BR> </ptml> <BR> ''') <BR> self. fout. close () </P> <P> def characters (self, chars): <BR> if chars. strip (): <BR> # chars = chars. strip () <BR> self. temp + = chars <BR> # print self. temp <br> <BR> def startTitle (self): <br> if self. item: <BR> self. fout. write (''' <BR> <tr bgcolor = "# eeeeee"> \ n <td style = "padding-top: 5px; padding-left: 5px; "height =" 30 "> \ n <B> <BR> ''') <br> def endTitle (self ): <br> if not self. imagein and not self. item: <BR> self. title = self. temp <BR> self. temp = ''<BR> self. fout. write (self. title. encode ('gb2312') <br> # self. title = self. temp <BR> self. fout. write (''' <BR>\ N\ N\ N
\ N
Script \ n
Function copyLink ()
{
ClipboardData. setData ("Text", window. location. href );
Alert ("RSS á ");
}
Function subscibeLink ()
{
Var str = window. location. pathname;
While (str. match (/^ \//))
{
Str = str. replace (/^ \//,"");
}
Window. open ("http://rss.sina.com.cn/my_sina_web_rss_news.html? Url = "+ str," _ self ");
}
Script \ n
\ N
\ N
\ N ''')
If self. item: Self. title = self. temp Self. temp ='' Self. fout. write (self. title. encode ('gb2312 ')) Self. fout. write (''' |
''')Def startImage (self ): Self. imagein = True Def endImage (self ): Self. imagein = False
Def startLink (self ): If self. imagein: Self. fout. write (''' Def endLink (self ): Self. link = self. temp Self. temp ='' If self. imagein: Self. fout. write (self. link. encode ('gb2312 ')) Self. fout. write (''' "target =" _ blank "> \ n ''') Elif self. item: # Self. link = self. temp Pass Else: Self. fout. write (self. link) Self. fout. write (''' "target =" _ Blank "> ''') Self. fout. write (self. title. encode ('gb2312 ')) Self. fout. write (''' |
''') Self. fout. write (self. description. encode ('gb2312 ')) Self. fout. write (''' |
Comment 'ömāmá '***************************************************************?© |
Def startUrl (self ):
If self. imagein:
Self. fout. write (''' def endUrl (self ):
Self. url = self. temp
Self. temp =''
If self. imagein:
Self. fout. write (self. url. encode ('gb2312 '))
Self. fout. write (''' "border =" 0 "> \ n
# Program entry
If _ name _ = '_ main __':
Parse ('ddt. XML', Website ())
''')
''') If self. item: # Self. url = self. temp PassDef defaultStart (self ): Pass Def defaultEnd (self ): Self. temp ='' Def startDescription (self ): Pass Def endDescription (self ): Self. description = self. temp Self. temp ='' If self. item: # Self. fout. write ('Zookeeper successful failed ('¡¡¡¡') Self. fout. write (self. description. encode ('gb2312 '))
Def endGuid (self ): Self. guid = self. temp Def endPubdate (self ): If not self. temp. startswith ('http '): Self. pubdate = self. temp Self. temp ='' Else: Self. pubdate ='' Def startItem (self ): Self. item = True Def endItem (self ): Self. item = False Self. fout. write (''' |
Self. fout. write (self. link) Self. fout. write (''' "target =" _ blank "> ''') Self. fout. write (self. guid) Self. fout. write (''' |
''') Self. fout. write (self. pubdate) Self. fout. write (''' |
|
''')