Use python to parse xml into the corresponding html example

Source: Internet
Author: User
Tags dname xsl file
This article mainly introduces how to use python to parse xml into the corresponding html example. if you need it, you can refer to SAX to parse dd. xml into html. Of course, if you get the xsl file corresponding to xml, you can directly use libxml2 to convert it into html.

The code is as follows:


#! /Usr/bin/env python
#-*-Coding: UTF-8 -*-
#---------------------------------------
# Program: XML parser
# Version 01.0
# Author: mupeng
# Date: 2013-12-18
# Programming language: Python 2.7
# Function: parse xml into the corresponding html
# Annotation: This program uses the parse function of the xml. sax module to parse XML and generate events.
# Inherit ContentHandler and rewrite its event handler function
# Dispatcher is mainly used to distribute the start and end events of corresponding tags.
#---------------------------------------
From xml. sax. handler import ContentHandler
From xml. sax import parse

Class Dispatcher:
Def dispatch (self, prefix, name, attrs = None ):
Mname = prefix + name. capitalize ()
Dname = 'default' + prefix. capitalize ()
Method = getattr (self, mname, None)
If callable (method): args = ()
Else:
Method = getattr (self, dname, None)
# Args = name
# If prefix = 'start': args + = attrs
If callable (method): method ()

Def startElement (self, name, attrs ):
Self. dispatch ('start', name, attrs)

Def endElement (self, name ):
Self. dispatch ('end', name)

Class Website (Dispatcher, ContentHandler ):

Def _ init _ (self ):
Self. fout = open('ddt_SAX.html ', 'w ')
Self. imagein = False
Self. desflag = False
Self. item = False
Self. title =''
Self. link =''
Self. guid =''
Self. url =''
Self. pubdate =''
Self. description =''
Self. temp =''
Self. prx =''
Def startChannel (self ):

Self. fout. write ('''\ N\ NRSS-''') <br> def endChannel (self): <BR> self. fout. write (''' <BR> <tr> <td height = "20"> </td> </tr> <BR> </table> <BR> </center> <BR> script <BR> function GetTimeDiff (str) <BR >{< BR> if (str = '') <BR >{< BR> return ''; <BR >}</P> <P> var pubDate = new Date (str); <BR> var nowDate = new Date (); <BR> var diffMilSeconds = nowDate. valueOf ()-pubDate. valueOf (); <BR> var days = diffMilSeconds/86400000; <BR> days = parseInt (days ); </P> <P> diffMilSeconds = diffMilSeconds-(days * 86400000); <BR> var hours = diffMilSeconds/3600000; <BR> hours = parseInt (hours ); </P> <P> diffMilSeconds = diffMilSeconds-(hours * 3600000); <BR> var minutes = diffMilSeconds/60000; <BR> minutes = parseInt (minutes ); </P> <P> diffMilSeconds = diffMilSeconds-(minutes * 60000); <BR> var seconds = diffMilSeconds/1000; <BR> seconds = parseInt (seconds ); <br> var returnStr = "± response©· ² ¼ Ê ± ä 䣰 "+ pubDate. toLocaleString (); </P> <P> if (days> 0) <BR >{< BR> returnStr = returnStr + "£°{}à Ö öô ú" + days +" {}" + hours + "{{}±" + minutes + "· ööó £.©"; <BR >}< BR> else if (hours> 0) <BR >{< BR> returnStr = returnStr + "£°{}à Ï öô ú" + hours +" {{}± "+ minutes +" · ööó £.©"; <BR >}< BR> else if (minutes> 0) <BR >{< BR> returnStr = returnStr + "£°à à ë Ö öô ú" + minutes +" · ööó £.©"; <BR >}</P> <P> return returnStr; </P> <P >}</P> <P> function GetSpanText () <BR >{< BR> var pubDate; <BR> var pubDateArray; <BR> var spanArray = document. getElementsByTagName ("span"); </P> <P> for (var I = 0; I <spanArray. length; I ++) <BR >{< BR> pubDate = spanArray [I]. innerHTML; <BR> document. getElementsByTagName ("span") [I]. innerHTML = GetTimeDiff (pubDate); <BR >}</P> <P> GetSpanText (); <BR> script <BR> </body> <BR> </ptml> <BR> ''') <BR> self. fout. close () </P> <P> def characters (self, chars): <BR> if chars. strip (): <BR> # chars = chars. strip () <BR> self. temp + = chars <BR> # print self. temp <br> <BR> def startTitle (self): <br> if self. item: <BR> self. fout. write (''' <BR> <tr bgcolor = "# eeeeee"> \ n <td style = "padding-top: 5px; padding-left: 5px; "height =" 30 "> \ n <B> <BR> ''') <br> def endTitle (self ): <br> if not self. imagein and not self. item: <BR> self. title = self. temp <BR> self. temp = ''<BR> self. fout. write (self. title. encode ('gb2312') <br> # self. title = self. temp <BR> self. fout. write (''' <BR>\ N\ N\ N

\ N
Script \ n

Function copyLink ()
{
ClipboardData. setData ("Text", window. location. href );
Alert ("RSS á ");
}

Function subscibeLink ()
{
Var str = window. location. pathname;
While (str. match (/^ \//))
{
Str = str. replace (/^ \//,"");
}
Window. open ("http://rss.sina.com.cn/my_sina_web_rss_news.html? Url = "+ str," _ self ");

}
Script \ n










\ N \ N
\ N
''')

If self. item:
Self. title = self. temp
Self. temp =''
Self. fout. write (self. title. encode ('gb2312 '))
Self. fout. write ('''


''')

Def startImage (self ):
Self. imagein = True

Def endImage (self ):
Self. imagein = False

Def startLink (self ):
If self. imagein:
Self. fout. write ('''

Def endLink (self ):
Self. link = self. temp
Self. temp =''
If self. imagein:
Self. fout. write (self. link. encode ('gb2312 '))
Self. fout. write (''' "target =" _ blank "> \ n ''')
Elif self. item:
# Self. link = self. temp
Pass
Else:
Self. fout. write (self. link)
Self. fout. write (''' "target ="
_ Blank
"> ''')
Self. fout. write (self. title. encode ('gb2312 '))
Self. fout. write ('''


''')
Self. fout. write (self. description. encode ('gb2312 '))
Self. fout. write ('''
Comment 'ömāmá '***************************************************************?©


Def startUrl (self ):
If self. imagein:
Self. fout. write (''' def endUrl (self ):
Self. url = self. temp
Self. temp =''
If self. imagein:
Self. fout. write (self. url. encode ('gb2312 '))
Self. fout. write (''' "border =" 0 "> \ n










# Program entry
If _ name _ = '_ main __':
Parse ('ddt. XML', Website ())

''') ''')
''')
If self. item:
# Self. url = self. temp
Pass

Def defaultStart (self ):
Pass
Def defaultEnd (self ):
Self. temp =''
Def startDescription (self ):
Pass
Def endDescription (self ):
Self. description = self. temp
Self. temp =''
If self. item:
# Self. fout. write ('Zookeeper successful failed ('¡¡¡¡')
Self. fout. write (self. description. encode ('gb2312 '))

Def endGuid (self ):
Self. guid = self. temp
Def endPubdate (self ):
If not self. temp. startswith ('http '):
Self. pubdate = self. temp
Self. temp =''
Else:
Self. pubdate =''
Def startItem (self ):
Self. item = True
Def endItem (self ):
Self. item = False
Self. fout. write ('''


Self. fout. write (self. link)
Self. fout. write (''' "target =" _ blank "> ''')
Self. fout. write (self. guid)
Self. fout. write ('''

''')
Self. fout. write (self. pubdate)
Self. fout. write ('''

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.