Libxml is a C language library that provides functions such as reading, creating, and operating XML data. For details about its APIs, refer to the official documentation. Here we only introduce some common usage.
Libxml defines several node types when operating XML data:
Enum xmlElementType { XML_ELEMENT_NODE = 1 XML_ATTRIBUTE_NODE = 2 XML_TEXT_NODE = 3 XML_CDATA_SECTION_NODE = 4 XML_ENTITY_REF_NODE = 5 XML_ENTITY_NODE = 6 XML_PI_NODE = 7 XML_COMMENT_NODE = 8 XML_DOCUMENT_NODE = 9 XML_DOCUMENT_TYPE_NODE = 10 XML_DOCUMENT_FRAG_NODE = 11 XML_NOTATION_NODE = 12 XML_HTML_DOCUMENT_NODE = 13 XML_DTD_NODE = 14 XML_ELEMENT_DECL = 15 XML_ATTRIBUTE_DECL = 16 XML_ENTITY_DECL = 17 XML_NAMESPACE_DECL = 18 XML_XINCLUDE_START = 19 XML_XINCLUDE_END = 20 XML_DOCB_DOCUMENT_NODE = 21}
Commonly used are xml_element_node, xml_text_node, and xml_attribute_node, which can be called element nodes, text nodes, and attribute nodes. They are all xmlnode struct types and can be obtained through curnode-> type. These nodes correspond to different XML data. The following XML documents show that story and storyinfo are element nodes, while John Fleck is a text node. In terms of structure, the John fleck text node is a child node of the <author> element node.
Test XML:
<?xml version="1.0"?><story> <storyinfo> <author>John Fleck</author> <datewritten>June 2, 2002</datewritten> <keyword>example keyword</keyword> </storyinfo> <body>
1. parse the document:
xmlDocPtr doc; xmlNodePtr cur; doc = xmlParseFile(docname); if (doc == NULL ) { fprintf(stderr,"Document not parsed successfully. \n"); return; } cur = xmlDocGetRootElement(doc); if (cur == NULL) { fprintf(stderr,"empty document\n"); xmlFreeDoc(doc); return; } if (xmlStrcmp(cur->name, (const xmlChar *) "story")) { fprintf(stderr,"document of the wrong type, root node != story"); xmlFreeDoc(doc); return; }
2. Search nodes:
void parseStory (xmlDocPtr doc, xmlNodePtr cur) {xmlChar *key;cur = cur->xmlChildrenNode;while (cur != NULL) {if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) {key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);printf("keyword: %s\n", key);xmlFree(key);}cur = cur->next;}return;}
3. Document Retrieval Using XPath:
In addition to traversing the document tree to find a node, libxml2 also supports the use of XPath expressions to find a node set that meets the specified search rules. For the content of XPath, see http://www.w3schools.com/xpath/xpath_operators.asp and http://www.w3.org/TR/xpath. Test parameters: http://www.xpathtester.com/test. Here are some simple usage: "/infomation/cameraset" takes the absolute path, "// cameraitem" takes the relative path, and numbers can be directly compared with equal signs, text is compared with text.
char expr[128];sprintf(expr, "/Information/CameraSet/CameraItem[IP[text()='%s']]", pItem->sIP);sprintf(expr, "/Information/CameraSet/CameraItem[Index=%d]/VarTrafficLine", iIndex);xpObjPtr = getnodeset(doc, (xmlChar*)expr);
4. Add nodes and obtain text:
You can use xmlnodesetcontent (cur, (const xmlchar *) "sssss") to set the text of an element node. Other methods are also available, as shown in the following code. Get Text: xmlnodegetcontent (cur) and xmlnodelistgetstring (Doc, nodelist, inline ); the first method obtains the concatenation string of the content of the node whose type is text or entity_ref in the child node of cur; the second method only obtains the content of a node of the text or entity_ref type in the current node linked list.
Newnode = xmlnewnode (null, (const xmlchar *) "newnode"); xmladdchild (cur, newnode); xmlnewtextchild (newnode, null, (const xmlchar *) "code ", (const xmlchar *) "1001"); xmlnewtextchild (newnode, null, (const xmlchar *) "name", (const xmlchar *) "anewnode "); textnode = xmlnewtext (const xmlchar *) "abcdefg"); // Add the text xmladdchild (cur, textnode) to an existing element node );
5. delete a node:
xmlUnlinkNode(cur);xmlFreeNode(cur);
In this way, a new node tree structure is automatically formed after the specified node is deleted, without disrupting the original structure. For example, after the <author> node is deleted, the first subnode of <storyinfo> becomes <datewritten>, and the precursor node of <datewritten> is null, instead of <author> before deletion.
Code:
#include <libxml/parser.h>#include <libxml/xpath.h>xmlDocPtr getdoc (char *docname) {xmlDocPtr doc;doc = xmlParseFile(docname);if (doc == NULL ) {fprintf(stderr,"Document not parsed successfully. \n");return NULL;}return doc;}xmlXPathObjectPtr getnodeset (xmlDocPtr doc, xmlChar *xpath){xmlXPathContextPtr context;xmlXPathObjectPtr result;context = xmlXPathNewContext(doc);if (context == NULL) {printf("Error in xmlXPathNewContext\n");return NULL;}result = xmlXPathEvalExpression(xpath, context);xmlXPathFreeContext(context);if (result == NULL) {printf("Error in xmlXPathEvalExpression\n");return NULL;}if(xmlXPathNodeSetIsEmpty(result->nodesetval)){xmlXPathFreeObject(result);printf("No result\n");return NULL;}return result;}int main(int argc, char **argv) {char *docname;xmlDocPtr doc;xmlChar *xpath = (xmlChar*) "//root";xmlNodeSetPtr nodeset;xmlXPathObjectPtr result;int i;xmlChar *keyword;if (argc <= 1) {printf("Usage: %s docname\n", argv[0]);return(0);}docname = argv[1];doc = getdoc(docname);result = getnodeset (doc, xpath);if (result) {nodeset = result->nodesetval;for (i=0; i < nodeset->nodeNr; i++) {keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode,1);printf("keyword1: %s\n", keyword);xmlFree(keyword);}xmlXPathFreeObject (result);}xmlFreeDoc(doc);xmlCleanupParser();return (1);}