Because the company's internal interface returns the string support 2 kinds of forms: PHP array, XML; results php array python can not be used directly, and XML string format is not standard, so also can not use the standard module parsing. "Non-standard place is the name of some node will be the beginning of the number", so write a simple step to parse the file, used to do interface testing.
#!/usr/bin/env python #encoding: utf-8 import re class xmlparse:def __init__ (Self, xmlstr): self.xmls
TR = Xmlstr Self.xmldom = Self.__convet2utf8 () self.xmlnodelist = [] Self.xpath = '
def __convet2utf8 (self): Headstr = Self.__get_head () xmldomstr = Self.xmlstr.replace (Headstr, "")
If ' GBK ' in headstr:xmldomstr = Xmldomstr.decode (' GBK '). Encode (' utf-8 ') elif ' gb2312 ' in Headstr:
Xmldomstr = Self.xmlstr.decode (' gb2312 '). Encode (' Utf-8 ') return xmldomstr def __get_head (self): Headpat = R ' <\?xml.*\?> ' Headpatobj = Re.compile (headpat) headregobj = Headpatobj.match (self
. xmlstr) If headregobj:headstr = Headregobj.group () return headstr else: Return ' Def parse (self, xpath): Self.xpath = XPath xpatlist = [] Xpatha rr = Self.xpath.split ('/') for xnode in xpatharr:if xnode:spcindex = Xnode.find (' [') if SPCI
Ndex > -1:index = Int (xnode[spcindex+1:-1]) XNode = Xnode[:spcindex]
Else:index = 0; Temppat = (' <%s> (. *?) </%s> '% (XNode, xnode), index) xpatlist.append (temppat) Xmlnodestr = Self.xmldom for xpat,index in xpatlist:xmlnodelist = Re.findall (xpat,xmlnodestr) Xmlnod Estr = Xmlnodelist[index] If Xmlnodestr.startswith (R ' <![ cdata['): Xmlnodestr = Xmlnodestr.replace (R ' <![
Cdata[', '] [: -3] self.xmlnodelist = XmlNodeList return xmlnodestr If ' __main__ ' = __name__: Xmlstr = ' <?xml version= "1.0" encoding= "Utf-8" standalone= "yes"?> <resultobject><a><p roduct_id>aaaaa</product_id><product_name><! [cdata[bbbbb]]></a><b><product_id>bbbbb</product_id><product_name><! [cdata[bbbbb]]></b></product_name></resultobject> ' xpath1 = '/product_id ' Xpa Th2 = '/product_id[1] ' xpath3 = '/a/product_id ' XP = Xmlparse (xmlstr) print ' xmlstr: ', xp.xmlstr print ' X
Mldom: ', xp.xmldom print '------------------------------' getstr = Xp.parse (xpath1) print ' XPath: ', Xp.xpath print ' Get-list: ', xp.xmlnodelist print ' Get string: ', getstr print '---------------
---------------' getstr = Xp.parse (xpath2) print ' XPath: ', xp.xpath print ' Get list: ', xp.xmlnodelist print ' Get string: ', getstr print '------------------------------' getstr = Xp.parse (xpath3) PRI NT ' XPath: ', xp.xpath print ' Get-list: ', xp.xmlnodelist print ' Get string: ', Getstr
Run Result:
XMLSTR: <?xml version= "1.0" encoding= "Utf-8" standalone= "yes"? ><resultobject><a><product_id >aaaaa</product_id><product_name><! [cdata[bbbbb]]></a><b><product_id>bbbbb</product_id><product_name><! [cdata[bbbbb]]></b></product_name></resultobject>
xmldom: <resultObject><a ><product_id>aaaaa</product_id><product_name><! [cdata[bbbbb]]></a><b><product_id>bbbbb</product_id><product_name><! [cdata[bbbbb]]></b></product_name></resultobject>
------------------------------
XPath:/product_id get
list: [' aaaaa ', ' bbbbb '] get
string:aaaaa
------------------------------
XPath:/product_id[1] get
list: [' aaaaa ', ' bbbbb '] get
string:bbbbb
------------------------------
XPath:/a/product_id get
list: [' AAAAA '] get
string:aaaaa
Because the XML format returned is simpler, there are no nodes with attributes, so it is easier to deal with them. But the test still found a bug. That is, when the same node is nested, there will be a positive match problem, which can be solved by avoiding the presence of nested nodes in XPath, otherwise, only complex mechanisms are rewritten.