usingAnflecrawler.common;usingSystem;usingSystem.Collections.Generic;usingSystem.Linq;usingSystem.Text;usingSystem.Threading.Tasks;namespaceanflecrawler.dataanalyzer{Internal classQy58:analyzerbase { Public Override voidInit (Pagecrawler crawler) {Base. Init (crawler); varURL =NewUri ("http://qy.58.com/caohejing/pn1/?PGTID=14177711280840.45006677554920316&ClickID=1"); //http://qy.58.com/19583455460359/?PGTID=14177659184690.5166369006238447&clickid=4Crawler. Pushurl (URL,0); } protected Override voidanalyzeinternal (pagelandentity current) {varLander =Crawler.lander; varPhandler =Createcontenthandler (current); Switch(current. Depth) { Case 0: {pHandler.AjaxBlocks.Add (HACK); varDom =Lander. GetDocument (Phandler); Doperpaging (current, Dom. Documentnode,". Next"); foreach(varNodeinchQuerynodes (DOM. Documentnode,". Complist a")) { varURL =gethref (node, current. URL); Crawler.pushurl (URL,1); } } Break; Case 1: { varDom =Lander. GetDocument (Phandler); varattr =NewAttributefiller (); attr. Append ("name:{0}", Querytexts (DOM. Documentnode,". Compt"). First ()); foreach(varThinchQuerynodes (DOM. Documentnode,". basicmsg table th"). Skip (1)) { stringSTh =th. InnerText, STd; Switch(sTh) { Case "Contact Phone": Case "Email": varClient =NewSystem.Net.WebClient (); varINode = querynode (th. NextSibling,"img"); byte[] Imgraw = client. Downloaddata (Gethref (INode, current. URL, Attrname:"src")); varIMG =NewSystem.Drawing.Bitmap (NewSystem.IO.MemoryStream (Imgraw)); STd=OCR (IMG); Break; Case "Company Address": STd= querytexts (th. NextSibling,"span"). First (); Break; default: STd=th. NextSibling.InnerText.HtmlTrim (); Break; } attr. Append ("{0}:{1}", STh, sTd); } varBo =Newcompanyentity (); Bo. City="Shanghai"; Bo. GroupName="caohejing Enterprise"; Bo. Pageurl=Current . url.originalstring; Bo. Updatedate=DateTime.Now; attr. Fillentity (Bo,Newdictionary<string,string>() { {"Company Nature","Nature"}, {"Company Industry","Industry"}, {"Company Size"," Scale"}, {"Contact Person","ContactPerson"}, {"Corporate website","Website"}, {"Contact Phone","Tel"}, {"Email","Email"}, {"Company Address","Address"}, }); Repository.savecompany (BO); Crawler.outwrite ("save Enterprise {0}", Bo. Name); } Break; } } }}
58.com Qiyi