Crawler feel very interesting, write a simplest crawl Baidu homepage HTML code program. Although it's a little simpler, it will deepen later.
1 Packagetest;2 3 ImportJava.io.BufferedReader;4 ImportJava.io.InputStreamReader;5 ImportJava.net.URL;6 Importjava.net.URLConnection;7 8 Public classMain9 {Ten Public Static voidMain (string[] args) One { A //define the links that will be accessed -String url = "https://www.baidu.com/"; - //defines a string used to store Web page content theString result = ""; - //defines a buffered character input stream -BufferedReader in =NULL; - Try + { - //to turn a string into a URL object +URL Realurl =Newurl (url); A //initialize a connection to that URL atURLConnection connection =realurl.openconnection (); - //start the actual connection - Connection.connect (); - //initializes the BufferedReader input stream to read the response of the URL -in =NewBufferedReader (NewInputStreamReader (Connection.getinputstream ())); - //used to temporarily store data for each row crawled to in String Line; - while(line = In.readline ())! =NULL) to { + //traverse each row that is fetched and store it in result -Result + = line + "\ n"; the } *}Catch(Exception e) $ {Panax NotoginsengSYSTEM.OUT.PRINTLN ("Send GET request exception!") " +e); - e.printstacktrace (); the}//use finally to close the input stream + finally A { the Try + { - if(In! =NULL) $ { $ in.close (); - } -}Catch(Exception E2) the { - e2.printstacktrace ();Wuyi } the } - System.out.println (result); Wu } - } About
Java crawl Baidu Home source code