<?xml version="1.0" encoding="utf-8" ?><root><childs><child name='first' >1</child><child value="2">2</child></childs></root>
第一種方式,自動遍曆所有節點:
#!/usr/bin/env python# -*- coding: utf-8 -*-from xml.sax.handler import ContentHandlerfrom xml.sax import parse class TestHandle(ContentHandler): def __init__(self, inlist): self.inlist = inlist def startElement(self,name,attrs): print 'name:',name, 'attrs:',attrs.keys() def endElement(self,name): print 'endname',name def characters(self,chars): print 'chars',chars self.inlist.append(chars) if __name__ == '__main__': lt = [] parse('test.xml', TestHandle(lt)) print lt
結果:
name: root attrs: []chars name: childs attrs: []chars name: child attrs: [u'name']chars 1endname childchars name: child attrs: [u'value']chars 2endname childchars endname childschars endname root[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']
第二種:擷取根節點,按需尋找指定節點:
#!/usr/bin/env python # -*- coding: utf-8 -*- from xml.dom import minidom xmlstr = '''<?xml version="1.0" encoding="UTF-8"?><hash> <request name='first'>/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error></hash>'''def doxml(xmlstr): dom = minidom.parseString(xmlstr) print 'Dom:' print dom.toxml() root = dom.firstChild print 'root:' print root.toxml() childs = root.childNodes for child in childs: print child.toxml() if child.nodeType == child.TEXT_NODE: pass else: print 'child node attribute name:', child.getAttribute('name') print 'child node name:', child.nodeName print 'child node len:',len(child.childNodes) print 'child data:',child.childNodes[0].data print '=======================================' print 'more help info to see:' for med in dir(child): print help(med) if __name__ == '__main__': doxml(xmlstr)
結果:
Dom:<?xml version="1.0" ?><hash> <request name="first">/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error></hash>root:<hash> <request name="first">/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error></hash> <request name="first">/2/photos/square/type.xml</request>child node attribute name: firstchild node name: requestchild node len: 1child data: /2/photos/square/type.xml=======================================more help info to see:
兩種方法各有其優點,python的xml處理模組太多,目前只用到這2個。
================================================補充的分割線======================================================
實際工作中發現python的mimidom無法解析其它編碼的xml,只能解析utf-8的編碼,而其xml檔案的頭部申明也必須是utf-8,為其它編碼會報錯誤。網上的解決辦法都是替換xml檔案頭部的編碼申明,然後轉換編碼為utf-8再用minidom解碼,實際測試為可行,不過有點累贅的感覺。
================================================寫xml內容的分割線======================================================
#!\urs\bin\env python#encoding: utf-8from xml.dom import minidomclass xmlwrite: def __init__(self, resultfile): self.resultfile = resultfile self.rootname = 'api' self.__create_xml_dom() def __create_xml_dom(self): xmlimpl = minidom.getDOMImplementation() self.dom = xmlimpl.createDocument(None, self.rootname, None) self.root = self.dom.documentElement def __get_spec_node(self, xpath): patharr = xpath.split(r'/') parentnode = self.root exist = 1 for nodename in patharr: if nodename.strip() == '': continue if not exist: return None spcindex = nodename.find('[') if spcindex > -1: index = int(nodename[spcindex+1:-1]) else: index = 0 count = 0 childs = parentnode.childNodes for child in childs: if child.nodeName == nodename[:spcindex]: if count == index: parentnode = child exist = 1 break count += 1 continue else: exist = 0 return parentnode def write_node(self, parent, nodename, value, attribute=None, CDATA=False): node = self.dom.createElement(nodename) if value: if CDATA: nodedata = self.dom.createCDATASection(value) else: nodedata = self.dom.createTextNode(value) node.appendChild(nodedata) if attribute and isinstance(attribute, dict): for key, value in attribute.items(): node.setAttribute(key, value) try: parentnode = self.__get_spec_node(parent) except: print 'Get parent Node Fail, Use the Root as parent Node' parentnode = self.root parentnode.appendChild(node) def write_start_time(self, time): self.write_node('/','StartTime', time) def write_end_time(self, time): self.write_node('/','EndTime', time) def write_pass_count(self, count): self.write_node('/','PassCount', count) def write_fail_count(self, count): self.write_node('/','FailCount', count) def write_case(self): self.write_node('/','Case', None) def write_case_no(self, index, value): self.write_node('/Case[%s]/' % index,'No', value) def write_case_url(self, index, value): self.write_node('/Case[%s]/' % index,'URL', value) def write_case_dbdata(self, index, value): self.write_node('/Case[%s]/' % index,'DBData', value) def write_case_apidata(self, index, value): self.write_node('/Case[%s]/' % index,'APIData', value) def write_case_dbsql(self, index, value): self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True) def write_case_apixpath(self, index, value): self.write_node('/Case[%s]/' % index,'APIXPath', value) def save_xml(self): myfile = file(self.resultfile, 'w') self.dom.writexml(myfile, encoding='utf-8') myfile.close() if __name__ == '__main__': xr = xmlwrite(r'D:\test.xml') xr.write_start_time('2223') xr.write_end_time('444') xr.write_pass_count('22') xr.write_fail_count('33') xr.write_case() xr.write_case() xr.write_case_no(0, '0') xr.write_case_url(0, 'http://www.google.com') xr.write_case_url(0, 'http://www.google.com') xr.write_case_dbsql(0, 'select * from ') xr.write_case_dbdata(0, 'dbtata') xr.write_case_apixpath(0, '/xpath') xr.write_case_apidata(0, 'apidata') xr.write_case_no(1, '1') xr.write_case_url(1, 'http://www.baidu.com') xr.write_case_url(1, 'http://www.baidu.com') xr.write_case_dbsql(1, 'select 1 from ') xr.write_case_dbdata(1, 'dbtata1') xr.write_case_apixpath(1, '/xpath1') xr.write_case_apidata(1, 'apidata1') xr.save_xml()
封裝了minidom,支援通過xpath來寫節點,不支援xpath帶屬性的匹配,但支援帶索引的匹配。如:/root/child[1], 表示root的第2個child節點