A recent project is to parse the XML file, extract the Chatid and LT, timestamp and other information, and save to Excel. 1. Parse the XML, extract the data using Python's own xml.dom in the Minidom (can also be used with the lxml) XML file as follows:
minidom.parse ()#解析文件, returns the DOM object
_get_documentelement () Dom is a tree structure that gets the root node of the tree structure
getElementsByTagName () finds child nodes under the root directory by name
getattribute () Gets the value of the DOM node's properties
The extracted code is as follows:
Class Get_xml ():
#加载获取xml的文档对象
def __init__ (self,address):
#解析address文件, returns the DOM object, address to file
Self.doc = Minidom.parse (address)
#DOM是树形结构, _get_documentelement () Gets the root node of the tree structure
Self.root = Self.doc._get_documentelement ()
#.getelementsbytagname (), finds child nodes under the root directory by name
Self.httpsample_nodes = Self.root.getElementsByTagName (' httpsample ')
def getxmldata (self):
Data_list=[]
j =-1
Responsedata_node = Self.root.getElementsByTagName ("ResponseData")
For I in Self.httpsample_nodes:
j = j+1
#getAttribute (), gets the value of the DOM node's properties
If I.getattribute ("lb") = = "Send Message":
A = ' Chatid ': "(. *?)" '
Elif I.getattribute ("lb") = = "Receive Message":
# a = "Chatid%3a%22" (. *?) %22
A = "Info%3a%22" (. *?) %22
if (I.getattribute ("lb") = = "Send Message" or I.getattribute ("lb") = = "Receive information") and I.getattribute ("rc") = = "200":
Try
#使用re包里面的方法, extracting data from regular expressions
b = Re.search (A, Responsedata_node[j].firstchild.data)
If B is not None:
D = B.group (1)
Print ("D:", D)
Data_list.append ((D, I.getattribute ("ts"), I.getattribute ("LT"), I.getattribute ("lb")))
Except
Pass
Return data_list
2. Save as Excel, export data to Excel
The workbook () used in the package Openpyxl,openpyxl.workbook to create the file in the memory, the last written into the disk
WB = load_workbook (filename = xxxx.xlsx): Reads an Excel file with a file address of xxxx.xlsx
WB = Workbook (): Create a Workbook object
ew = excelwriter (workbook = WB): Creates a new Excelwriter, which is then used to save
Wb.create_sheet (0, ' xxx '): Create a new sheet, location is 0,sheet name is XXX
WS = Wb.worksheets[0]: Open a sheet,sheet position is 0, that is, the 1th sheet
Ws.cell (row=1,column=1). Value = XXX: Add data in the 1 row 1 column position xxx
Ew.save (filename = xxxx.xlsx): Export data to local, local file address is xxxx.xlsx
An example of exporting Excel is as follows:
Import OPENPYXL
From OPENPYXL import Writer,load_workbook
# Workbook used to create files in memory last written to disk
From Openpyxl.workbook Import Workbook, workbook
From Openpyxl.writer.excel import Excelwriter
From Openpyxl.cell import Get_column_letter
# if __name__ = = "__main__":
def importexcel (match,dest_filename):
if (os.path.exists (dest_filename)):
WB = Load_workbook (Filename=dest_filename)
Else
WB = Workbook ()
ew = excelwriter (workbook = WB)
#创建一个新sheet
Wb.create_sheet (0, ' Chat send receive Request ')
# Open the first sheet that already exists, or you can use Get_sheet_names to get all the sheet names
WS = Wb.worksheets[0]
Ws.title = "Chat send receive Request"
Ws.cell (' A1 '). Value = "Chartid"
Ws.cell (' B1 '). Value = "Receive Timestamp"
Ws.cell (' C1 '). Value = "Send Timestamp"
Ws.cell (' D1 '). Value = "Timestamp difference"
Ws.cell (' E1 '). Value = "Receive LT"
Ws.cell (' F1 '). Value = "Send to received response time"
L = 2
For I in match:
Ws.cell (row=l,column=1). Value = i[' Chatid ']
Ws.cell (row=l,column=2). Value = i[' Accept_timestamp ']
Ws.cell (row=l,column=3). Value = i[' Send_timestamp ']
Ws.cell (row=l,column=4). Value = i[' Timestamp_gap ']
Ws.cell (row=l,column=5). Value = i[' Accept_lt ']
Ws.cell (row=l,column=6). Value = i[' Response_time ']
Print (i,l)
L = l+1
Ew.save (filename = dest_filename)
Use Python to extract the contents of XML and save it in Excel