Version 1 cannot be output to txt, and there are still some unclear display errors,
 
 
 
For example:
 
Need to block
 
 
 
 
 
Market price: >Market price ¥299</> <Class =
 
Price: >Price $99</Span>
 
 
 
</Div>
 
</LI>
 
 
<
 
 
 
Some messy symbol items in, Leave the market price and price on the line .........
 
 
 
To close the job, use the stupid method first,
 
Appendstringtoatextfile. append (new file ("D: // list.txt"), "Test 1 ");
 
Store all the information in D:/list.txt, and then use the stupid method to replace all the symbols in batches. Save a decent "D:/list .txt ".
 
 
 
In addition, I also need to capture the image, and then display the local link of the image in the list ....
 
 
 
 
 
Package newfolder;
 
 
 
 
 
Import java. Io. bufferedreader;
 
Import java. Io. inputstreamreader;
 
Import java.net. httpurlconnection;
 
Import java.net. url;
 
 
 
Public class catchillfinalv1 {
 
 
 
Public static string gethtml (string urlstring ){
 
Try {
 
Stringbuffer html = new stringbuffer ();
 
URL url = new URL (urlstring );
 
Httpurlconnection conn = (httpurlconnection) URL. openconnection ();
 
Inputstreamreader ISR = new inputstreamreader (conn. getinputstream ());
 
Bufferedreader BR = new bufferedreader (ISR );
 
String temp;
 
 
 
While (temp = Br. Readline ())! = NULL ){
 
Html. append (temp). append ("/N ");
 
}
 
BR. Close ();
 
ISR. Close ();
 
String test = html. tostring ();
 
String [] testarray = test. Split ("/"");
 
System. Out. println (test. Split ("href"). Length );
 
 
 
Return test;
 
} Catch (exception e ){
 
E. printstacktrace ();
 
Return NULL;
 
}
 
}
 
 
 
Public static void main (string [] ARGs ){
 
String url = "http://s.vancl.com/search.aspx ";
 
Int I = 0;
 
String nextpageurl = "";
 
Try {
 
Do {// A dowhile should be used to determine whether the URL on the next page exists
 
 
 
String test = catchillfinalv1.gethtml (URL );
 
String [] stringarray = test. Split ("Li class =/" sclistarea ");
 
 
 
For (Int J = 1; j <40; j ++) {// circular control, 40 items, 41 paragraphs divided, []
 
String [] Output = stringarray [J]. Split ("/"");
 
System. Out. println ("th" + (40 * I + J) + "commodity ");
 
System. Out. println ("item link:" + output [output. Length-10]);
 
System. Out. println ("item name:" + output [output. Length-8]);
 
// System. Out. println ("Market Price:" + output [output. Length-3]);
 
System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("span ",""));
 
// System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("<. *> ",""));
 
System. Out. println ("Price:" + output [output. Length-1]);
 
}
 
 
 
// 40th, connected to the back, cannot use length-10 or the like
 
String [] Output = stringarray [40]. Split ("/"");
 
System. Out. println ("the current number is" + (40 * (I + 1) + "item ");
 
System. Out. println ("item link:" + output [62]);
 
System. Out. println ("item name:" + output [64]);
 
System. Out. println ("Market Price:" + output [69]);
 
// System. Out. println ("Market Price:" + output [output. Length-3]. replaceall ("<. *> ",""));
 
System. Out. println ("Price:" + output [71]);
 
 
 
 
 
// Get the jump Link
 
String [] Page = test. Split ("next page ");
 
String [] Count = page [1]. Split ("/"");
 
Nextpageurl = count [count. Length-2];
 
System. Out. println ("Next Page Link:" + nextpageurl );
 
 
 
 
 
Url = "http://s.vancl.com ";
 
URL + = nextpageurl; // directly use/search. aspx? S = 1 & D = 0 & B = 0 & P = 4 & R = 40 No, need to add http://s.vancl.com/search.aspx in front
 
 
 
I ++;
 
 
 
} While (nextpageurl! = "");
 
} Catch (exception e) {// The last page contains a java. Lang. arrayindexoutofboundsexception exception.
 
E. printstacktrace ();
 
 
 
}
 
 
 
 
 
 
 
 
 
 
 
// System. Out. println (stringarray [1]. Split ("/" "). Length );
 
// System. Out. println (test. Split ("Li") [1]);
 
// System. Out. println (test. Split ("href") [11]);
 
// System. Out. println (test. Split ("href") [111]);
 
// System. Out. println (catchurl. gethtml ("http://s.vancl.com/search.aspx "));
 
}
 
}