Like the following XML file:
Copy Code code as follows:
<?xml version= "1.0" encoding= "Utf-8"?>
<root>
<childs>
<child name= ' a ' >1</child>
<child value= "2" >2</child>
</childs>
</root>
Here are some of the ways Python parses XML files, implemented using Python modules.
Mode 1,python module implementation to automatically traverse all nodes:
Copy Code code as follows:
#!/usr/bin/env python
#-*-Coding:utf-8-*-
From Xml.sax.handler import ContentHandler
From Xml.sax Import Parse
Class Testhandle (ContentHandler):
def __init__ (self, inlist):
Self.inlist = Inlist
def startelement (self,name,attrs):
print ' name: ', Name, ' Attrs: ', Attrs.keys ()
def endelement (self,name):
print ' Endname ', name
def characters (Self,chars):
print ' chars ', chars
Self.inlist.append (chars)
if __name__ = = ' __main__ ':
lt = []
Parse (' Test.xml ', Testhandle (LT))
Print LT
Results:
[HTML] View plaincopy
Name:root attrs: []
Chars
Name:childs attrs: []
Chars
Name:child attrs: [u ' name ']
Chars 1
Endname Child
Chars
Name:child attrs: [u ' value ']
Chars 2
Endname Child
Chars
Endname Childs
Chars
Endname Root
[u ' \ n ', u ' \ n ', U ' 1 ', U ' \ n ', U ' 2 ', U ' \ n ', u ' \ n ']
Mode 2,python module implementation gets the root node and finds the specified node on demand:
Copy Code code as follows:
#!/usr/bin/env python
#-*-Coding:utf-8-*-
From Xml.dom import Minidom
Xmlstr = ' ' <?xml version= ' 1.0 ' encoding= ' UTF-8 '?>
<request name= ' a ' >/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
'''
def doxml (XMLSTR):
Dom = minidom.parsestring (XMLSTR)
print ' Dom: '
Print Dom.toxml ()
root = Dom.firstchild
print ' root: '
Print Root.toxml ()
Childs = Root.childnodes
For child in Childs:
Print Child.toxml ()
if Child.nodetype = = Child. Text_node:
Pass
Else
print ' child node attribute name: ', Child.getattribute (' name ')
print ' child node name: ', Child.nodename
print ' child node Len: ', Len (child.childnodes)
print ' Child data: ', Child.childnodes[0].data
print ' ======================================= '
print ' more help info to: '
For Med in dir (child):
Print Help (MED)
if __name__ = = ' __main__ ':
Doxml (XMLSTR)
Results:
[HTML] View plaincopy
Dom:
<?xml version= "1.0"?><request name= "a" >/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
Root
<request name= "a" >/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
<request name= "a" >/2/photos/square/type.xml</request>
Child node Attribute Name:first
Child node Name:request
Child node Len:1
Child data:/2/photos/square/type.xml
=======================================
More help Info to:
Both approaches have their advantages, and Python has too many XML processing modules, which currently use only 2.
===== Supplemental Split Line ================
The actual work found that Python's mimidom could not parse other encoded XML, can only parse utf-8 encoding, and its XML file header must also be utf-8, for other encodings will report errors.
The solution on the net is to replace the code statement of the head of the XML file, and then convert the code to UTF-8 and then use the Minidom decoding, the actual test is feasible, but a little cumbersome feeling.
This section is the second part of the Python Parsing XML module encapsulation code.
= = = Write the XML content of the split line =========
Copy Code code as follows:
#!\urs\bin\env python
#encoding: Utf-8
From Xml.dom import Minidom
Class Xmlwrite:
def __init__ (self, resultfile):
Self.resultfile = Resultfile
Self.rootname = ' API '
Self.__create_xml_dom ()
def __create_xml_dom (self):
Xmlimpl = Minidom.getdomimplementation ()
Self.dom = Xmlimpl.createdocument (None, Self.rootname, none)
Self.root = Self.dom.documentElement
def __get_spec_node (Self, XPath):
Patharr = Xpath.split (R '/')
ParentNode = Self.root
exist = 1
For nodename in Patharr:
If Nodename.strip () = = ':
Continue
If not exist:
Return None
Spcindex = Nodename.find (' [')
If Spcindex >-1:
index = int (nodename[spcindex+1:-1])
Else
index = 0
Count = 0
Childs = Parentnode.childnodes
For child in Childs:
if child.nodename = = Nodename[:spcindex]:
if Count = = Index:
ParentNode = Child
exist = 1
Break
Count + 1
Continue
Else
exist = 0
Return parentnode
def write_node (self, parent, nodename, value, Attribute=none, Cdata=false):
node = self.dom.createElement (nodename)
If value:
If CDATA:
Nodedata = self.dom.createCDATASection (value)
Else
Nodedata = Self.dom.createTextNode (value)
Node.appendchild (Nodedata)
If attribute and Isinstance (attribute, dict):
For key, value in Attribute.items ():
Node.setattribute (key, value)
Try
ParentNode = Self.__get_spec_node (parent)
Except
print ' Get parent node Fail and use the Root as parent node '
ParentNode = Self.root
Parentnode.appendchild (node)
def write_start_time (self, Time):
Self.write_node ('/', ' starttime ', time)
def write_end_time (self, Time):
Self.write_node ('/', ' endtime ', time)
def write_pass_count (self, count):
Self.write_node ('/', ' Passcount ', count)
def write_fail_count (self, count):
Self.write_node ('/', ' Failcount ', count)
def write_case (self):
Self.write_node ('/', ' case ', None)
def write_case_no (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' No ', value)
def write_case_url (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' URL ', value)
def write_case_dbdata (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' dbdata ', value)
def write_case_apidata (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' apidata ', value)
def write_case_dbsql (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' dbsql ', value, cdata=true)
def write_case_apixpath (self, Index, value):
Self.write_node ('/case[%s]/'% index, ' Apixpath ', value)
def save_xml (self):
MyFile = File (Self.resultfile, ' W ')
Self.dom.writexml (myfile, encoding= ' Utf-8 ')
Myfile.close ()
if __name__ = = ' __main__ ':
XR = Xmlwrite (R ' D:\test.xml ')
Xr.write_start_time (' 2223 ')
Xr.write_end_time (' 444 ')
Xr.write_pass_count (' 22 ')
Xr.write_fail_count (' 33 ')
Xr.write_case ()
Xr.write_case ()
Xr.write_case_no (0, ' 0 ')
Xr.write_case_url (0, ' http://www.google.com ')
Xr.write_case_url (0, ' http://www.google.com ')
Xr.write_case_dbsql (0, ' select * from ')
Xr.write_case_dbdata (0, ' dbtata ')
Xr.write_case_apixpath (0, '/xpath ')
Xr.write_case_apidata (0, ' apidata ')
Xr.write_case_no (1, ' 1 ')
Xr.write_case_url (1, ' http://www.baidu.com ')
Xr.write_case_url (1, ' http://www.baidu.com ')
Xr.write_case_dbsql (1, ' Select 1 from ')
Xr.write_case_dbdata (1, ' dbtata1 ')
Xr.write_case_apixpath (1, '/xpath1 ')
Xr.write_case_apidata (1, ' apidata1 ')
Xr.save_xml ()
The above encapsulates the minidom, supports writing nodes through XPath, does not support XPath-striped matching, but supports indexed matching.
For example:/root/child[1], which represents the 2nd child node of root.