Python parsing xml file operation instance, python parsing xml instance
This example describes how to parse xml files in python. Share it with you for your reference. The specific method is as follows:
The content of the xml file is as follows:
<?xml version="1.0" ?> <!--Simple xml document__chapter 8--> <book> <title> sample xml thing </title> <author> <name> <first> ma </first> <last> xiaoju </last> </name> <affiliation> Springs Widgets, Inc. </affiliation> </author> <chapter number="1"> <title> First </title> <para> I think widgets are greate.You should buy lots of them forom <company> Spirngy Widgts, Inc </company> </para> </chapter> </book>
Python code:
from xml.dom import minidom, Node import re, textwrap class SampleScanner: """""" def __init__(self, doc): """Constructor""" assert(isinstance(doc, minidom.Document)) for child in doc.childNodes: if child.nodeType == Node.ELEMENT_NODE and \ child.tagName == "book": self.handle_book(child) def handle_book(self, node): for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "title": print "Book titile is:", self.gettext(child.childNodes) if child.tagName == "author": self.handle_author(child) if child.tagName == "chapter": self.handle_chapter(child) def handle_chapter(self, node): number = node.getAttribute("number") print "number:", number title_node = node.getElementsByTagName("title") print "title:", self.gettext(title_node) for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "para": self.handle_chapter_para(child) def handle_chapter_para(self, node): company = "" company = self.gettext(node.getElementsByTagName("company")) print "chapter:para:company", company def handle_author(self, node): for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "name": self.handle_author_name(child) if child.tagName == "affiliation": print "affiliation:", self.gettext(child.childNodes) def handle_author_name(self, node): first = "" last = "" for child in node.childNodes: if child.nodeType != Node.ELEMENT_NODE: continue if child.tagName == "first": first = self.gettext(child.childNodes) if child.tagName == 'last': last = self.gettext(child.childNodes) print "firstname:%s,lastname:%s" % (first, last) def gettext(self, nodelist): retlist = [] for node in nodelist: if node.nodeType == Node.TEXT_NODE: retlist.append(node.wholeText) elif node.hasChildNodes: retlist.append(self.gettext(node.childNodes)) return re.sub('\s+', " ", ''.join(retlist)) if __name__=="__main__": doc = minidom.parse("simple.xml") sample = SampleScanner(doc)
I hope this article will help you with Python programming.
Xml file operations in python
I will give you an example of code. You just need to change it to add a subnode.
#! /Usr/bin/python #-*-coding = UTF-8-*-# author: wklken@yeah.net # date: 2012-05-25 # version: 0.1 from xml. etree. elementTree import ElementTree, Elementdef read_xml (in_path): ''' read and parse the xml file in_path: xml Path return: ElementTree ''' tree = ElementTree () tree. parse (in_path) return treedef write_xml (tree, out_path): ''' write the xml file into the tree: xml tree out_path: Write path ''' tree. write (out_path, encoding = "UTF-8", xml_declaration = True) def if_match (node, kv_map): ''' determines whether a node contains all input parameter attributes node: node kv_map: map ''' for key in kv_map: if node. get (key )! = Kv_map.get (key): return False return True # --------------- search ----- def find_nodes (tree, path): ''' find all node trees matching a path: xml tree path: node path '''return tree. findall (path) def get_node_by_keyvalue (nodelist, kv_map): ''' locate the matched Node Based on the attribute and attribute value, and return the node nodelist: node list kv_map: matching attributes and attribute values map ''' result_nodes = [] for node in nodelist: if if_match (node, kv_map): equals (node) return result_nodes # ------------- change ----- def change_node_properties (nodelist, kv_map, is_delete = False): ''' Modify/Add/delete node attributes and attribute values: nodelist: node list kv_map: attribute and attribute value map''' for node in nodelist: for key in kv_map: if is_delete :...... remaining full text>
How does python Parse XML documents?
Import: from xml. dom import minidom (xml is the package we know, dom is the nested package in xml, and minidom is the module in xml. dom .)
Xmldoc = minidom. parse ('xml file to be parsed ')