python解析xml模組

來源:互聯網
上載者:User
<?xml version="1.0" encoding="utf-8" ?><root><childs><child name='first' >1</child><child value="2">2</child></childs></root>

第一種方式,自動遍曆所有節點:

#!/usr/bin/env python# -*- coding: utf-8 -*-from xml.sax.handler import ContentHandlerfrom xml.sax import parse         class TestHandle(ContentHandler):    def __init__(self, inlist):        self.inlist = inlist            def startElement(self,name,attrs):        print 'name:',name, 'attrs:',attrs.keys()            def endElement(self,name):        print 'endname',name            def characters(self,chars):        print 'chars',chars        self.inlist.append(chars)                        if __name__ == '__main__':    lt = []    parse('test.xml', TestHandle(lt))    print lt

結果:

name: root attrs: []chars name: childs attrs: []chars name: child attrs: [u'name']chars 1endname childchars name: child attrs: [u'value']chars 2endname childchars endname childschars endname root[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

第二種:擷取根節點,按需尋找指定節點:

#!/usr/bin/env python  # -*- coding: utf-8 -*-  from xml.dom import minidom  xmlstr = '''<?xml version="1.0" encoding="UTF-8"?><hash>    <request name='first'>/2/photos/square/type.xml</request>    <error_code>21301</error_code>    <error>auth faild!</error></hash>'''def doxml(xmlstr):    dom = minidom.parseString(xmlstr)        print 'Dom:'        print dom.toxml()            root = dom.firstChild        print 'root:'        print root.toxml()          childs = root.childNodes      for child in childs:        print child.toxml()        if child.nodeType == child.TEXT_NODE:            pass        else:            print 'child node attribute name:', child.getAttribute('name')            print 'child node name:', child.nodeName            print 'child node len:',len(child.childNodes)            print 'child data:',child.childNodes[0].data            print '======================================='            print 'more help info to see:'            for med in dir(child):                print help(med)                    if __name__ == '__main__':      doxml(xmlstr)

結果:

Dom:<?xml version="1.0" ?><hash>    <request name="first">/2/photos/square/type.xml</request>    <error_code>21301</error_code>    <error>auth faild!</error></hash>root:<hash>    <request name="first">/2/photos/square/type.xml</request>    <error_code>21301</error_code>    <error>auth faild!</error></hash>    <request name="first">/2/photos/square/type.xml</request>child node attribute name: firstchild node name: requestchild node len: 1child data: /2/photos/square/type.xml=======================================more help info to see:

兩種方法各有其優點,python的xml處理模組太多,目前只用到這2個。

================================================補充的分割線======================================================

實際工作中發現python的mimidom無法解析其它編碼的xml,只能解析utf-8的編碼,而其xml檔案的頭部申明也必須是utf-8,為其它編碼會報錯誤。網上的解決辦法都是替換xml檔案頭部的編碼申明,然後轉換編碼為utf-8再用minidom解碼,實際測試為可行,不過有點累贅的感覺。

================================================寫xml內容的分割線======================================================

#!\urs\bin\env python#encoding: utf-8from xml.dom import minidomclass xmlwrite:    def __init__(self, resultfile):        self.resultfile = resultfile        self.rootname = 'api'        self.__create_xml_dom()        def __create_xml_dom(self):        xmlimpl = minidom.getDOMImplementation()        self.dom = xmlimpl.createDocument(None, self.rootname, None)        self.root = self.dom.documentElement        def __get_spec_node(self, xpath):        patharr = xpath.split(r'/')        parentnode = self.root        exist = 1        for nodename in patharr:            if nodename.strip() == '':                continue            if not exist:                return None            spcindex = nodename.find('[')            if spcindex > -1:                index = int(nodename[spcindex+1:-1])            else:                index = 0            count = 0            childs = parentnode.childNodes            for child in childs:                if child.nodeName == nodename[:spcindex]:                    if count == index:                        parentnode = child                        exist = 1                        break                    count += 1                    continue                else:                    exist = 0        return parentnode                    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):        node = self.dom.createElement(nodename)        if value:            if CDATA:                nodedata = self.dom.createCDATASection(value)            else:                nodedata = self.dom.createTextNode(value)            node.appendChild(nodedata)            if attribute and isinstance(attribute, dict):                for key, value in attribute.items():                    node.setAttribute(key, value)           try:            parentnode = self.__get_spec_node(parent)        except:            print 'Get parent Node Fail, Use the Root as parent Node'            parentnode = self.root        parentnode.appendChild(node)            def write_start_time(self, time):        self.write_node('/','StartTime', time)    def write_end_time(self, time):        self.write_node('/','EndTime', time)                def write_pass_count(self, count):        self.write_node('/','PassCount', count)       def write_fail_count(self, count):        self.write_node('/','FailCount', count)               def write_case(self):        self.write_node('/','Case', None)               def write_case_no(self, index, value):        self.write_node('/Case[%s]/' % index,'No', value)    def write_case_url(self, index, value):        self.write_node('/Case[%s]/' % index,'URL', value)            def write_case_dbdata(self, index, value):        self.write_node('/Case[%s]/' % index,'DBData', value)            def write_case_apidata(self, index, value):        self.write_node('/Case[%s]/' % index,'APIData', value)    def write_case_dbsql(self, index, value):        self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)                    def write_case_apixpath(self, index, value):        self.write_node('/Case[%s]/' % index,'APIXPath', value)                                   def save_xml(self):        myfile = file(self.resultfile, 'w')        self.dom.writexml(myfile, encoding='utf-8')        myfile.close()        if __name__ == '__main__':      xr = xmlwrite(r'D:\test.xml')      xr.write_start_time('2223')      xr.write_end_time('444')            xr.write_pass_count('22')      xr.write_fail_count('33')        xr.write_case()      xr.write_case()      xr.write_case_no(0, '0')      xr.write_case_url(0, 'http://www.google.com')         xr.write_case_url(0, 'http://www.google.com')         xr.write_case_dbsql(0, 'select * from ')      xr.write_case_dbdata(0, 'dbtata')      xr.write_case_apixpath(0, '/xpath')      xr.write_case_apidata(0, 'apidata')      xr.write_case_no(1, '1')             xr.write_case_url(1, 'http://www.baidu.com')         xr.write_case_url(1, 'http://www.baidu.com')         xr.write_case_dbsql(1, 'select 1 from ')      xr.write_case_dbdata(1, 'dbtata1')      xr.write_case_apixpath(1, '/xpath1')      xr.write_case_apidata(1, 'apidata1')      xr.save_xml()          

封裝了minidom,支援通過xpath來寫節點,不支援xpath帶屬性的匹配,但支援帶索引的匹配。如:/root/child[1], 表示root的第2個child節點

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.