/** * Read XML content by the way * @param lablenames the name of the node to read * @param file_path_name file Absolute path * @return */ public static WebMagic ReadXML (List<String>lablenames, String file_path_name) {WebMagic WebMagic = new WebMagic (); try {documentbuilderfactory factory = documentbuilderfactory. newinstance (); Documentbuilder builder = Factory.newdocumentbuilder (); Org.w3c.dom.Document Document = builder.parse (new File (File_path_name)); Org.w3c.dom.Element rootelement = Document.getdocumentelement (); for (int i = 0; I<lablenames.size(); i++) {NodeList list= Rootelement.getelementsbytagname (Lablenames.get (i)); org.w3c.dom.Element Element= (org.w3c.dom.Element)List. Item (0); System.out.println (Element.getchildnodes (). Item (0). Getnodevalue ()); if ("StartURL". Equals (Element.getnodename ())) {Webmagic.setstarturl (Element.getchildnodes (). Item (0) . Getnodevalue ()); } if ("Regexstarturl". Equals (Element.getnodename ())) {Webmagic.setregexstarturl (element . Getchildnodes (). Item (0). Getnodevalue ()); } if ("LabelName". Equals (Element.getnodename ())) {Webmagic.setlabelname (Element.getchil Dnodes (). Item (0). Getnodevalue ()); } if ("Labeltype". Equals (Element.getnodename ())) {Webmagic.setlabeltype (Element.getchil Dnodes (). Item (0). Getnodevalue ()); } if ("Regexdescendants". Equals (Element.getnodename ())) {Webmagic.sEtregexdescendants (Element.getchildnodes (). Item (0). Getnodevalue ()); }}} catch (Exception e) {System.out.println ("Exception:" + e.getmessage ()); } return webMagic; }
Read XML content in a way