Version 1 cannot be output to txt, and there are still some unclear display errors,
For example:
Need to block
Market price: >Market price ¥299</> <Class =
Price: >Price $99</Span>
</Div>
</LI>
<
Some messy symbol items in, Leave the market price and price on the line .........
To close the job, use the stupid method first,
Appendstringtoatextfile. append (new file ("D: // list.txt"), "Test 1 ");
Store all the information in D:/list.txt, and then use the stupid method to replace all the symbols in batches. Save a decent "D:/list .txt ".
In addition, I also need to capture the image, and then display the local link of the image in the list ....
Package newfolder;
Import java. Io. bufferedreader;
Import java. Io. inputstreamreader;
Import java.net. httpurlconnection;
Import java.net. url;
Public class catchillfinalv1 {
Public static string gethtml (string urlstring ){
Try {
Stringbuffer html = new stringbuffer ();
URL url = new URL (urlstring );
Httpurlconnection conn = (httpurlconnection) URL. openconnection ();
Inputstreamreader ISR = new inputstreamreader (conn. getinputstream ());
Bufferedreader BR = new bufferedreader (ISR );
String temp;
While (temp = Br. Readline ())! = NULL ){
Html. append (temp). append ("/N ");
}
BR. Close ();
ISR. Close ();
String test = html. tostring ();
String [] testarray = test. Split ("/"");
System. Out. println (test. Split ("href"). Length );
Return test;
} Catch (exception e ){
E. printstacktrace ();
Return NULL;
}
}
Public static void main (string [] ARGs ){
String url = "http://s.vancl.com/search.aspx ";
Int I = 0;
String nextpageurl = "";
Try {
Do {// A dowhile should be used to determine whether the URL on the next page exists
String test = catchillfinalv1.gethtml (URL );
String [] stringarray = test. Split ("Li class =/" sclistarea ");
For (Int J = 1; j <40; j ++) {// circular control, 40 items, 41 paragraphs divided, []
String [] Output = stringarray [J]. Split ("/"");
System. Out. println ("th" + (40 * I + J) + "commodity ");
System. Out. println ("item link:" + output [output. Length-10]);
System. Out. println ("item name:" + output [output. Length-8]);
// System. Out. println ("Market Price:" + output [output. Length-3]);
System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("span ",""));
// System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("<. *> ",""));
System. Out. println ("Price:" + output [output. Length-1]);
}
// 40th, connected to the back, cannot use length-10 or the like
String [] Output = stringarray [40]. Split ("/"");
System. Out. println ("the current number is" + (40 * (I + 1) + "item ");
System. Out. println ("item link:" + output [62]);
System. Out. println ("item name:" + output [64]);
System. Out. println ("Market Price:" + output [69]);
// System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("<. *> ",""));
System. Out. println ("Price:" + output [71]);
// Get the jump Link
String [] Page = test. Split ("next page ");
String [] Count = page [1]. Split ("/"");
Nextpageurl = count [count. Length-2];
System. Out. println ("Next Page Link:" + nextpageurl );
Url = "http://s.vancl.com ";
URL + = nextpageurl; // directly use/search. aspx? S = 1 & D = 0 & B = 0 & P = 4 & R = 40 No, need to add http://s.vancl.com/search.aspx in front
I ++;
} While (nextpageurl! = "");
} Catch (exception e) {// The last page contains a java. Lang. arrayindexoutofboundsexception exception.
E. printstacktrace ();
}
// System. Out. println (stringarray [1]. Split ("/" "). Length );
// System. Out. println (test. Split ("Li") [1]);
// System. Out. println (test. Split ("href") [11]);
// System. Out. println (test. Split ("href") [111]);
// System. Out. println (catchurl. gethtml ("http://s.vancl.com/search.aspx "));
}
}