也不多說什麼了,就拿urlconnection類舉個例子吧,做一個擷取網站源碼的代碼
public class urldemo {
public static void main(string[] args) {
scanner scan = new scanner(system.in);
system.out.println("請輸入網址:");
string urlstr ="http://"+scan.next();
try {
url url = new url(urlstr); // 將urlstr字串網址執行個體化為url定位地址s
urlconnection urlconn = url.openconnection(); //開啟網站連結s
bufferedreader reader = new bufferedreader(new inputstreamreader(
urlconn.getinputstream())); //執行個體化輸入資料流,並擷取網頁代碼
string s; //依次迴圈,至到讀的值為空白
stringbuilder sb = new stringbuilder();
while ((s = reader.readline()) != null) {
sb.append(s);
}
reader.close();
bufferedwriter bw = new bufferedwriter(new filewriter("d:1.html")); //write寫入檔案(位元組流)
bw.write(sb.tostring());
bw.flush();
bw.close();
system.out.println("另存新檔成功!");
}
catch (exception e) {
// todo auto-generated catch block
e.printstacktrace();
}
}
}
大家都知道url就是統一資源定位器
它由協議名和資源名組成
下面一個漢字中文亂碼處理方法
package mynet;
import java.io.ioexception;
import java.io.inputstream;
import java.net.malformedurlexception;
import java.net.url;
import java.util.date;
import sun.net.www.protocol.http.httpurlconnection;
public class urldemo {
public static void main(string[] args) {
system.out.println( "starting... ");
int c;
httpurlconnection urlcon = null;
try {
url url = new url( "http://www.111cn.net ");
try {
urlcon = (httpurlconnection)url.openconnection();
} catch (ioexception e) {
}
system.out.println( "the date is : " + new date(urlcon.getdate()));
system.out.println( "content_type : " + urlcon.getcontenttype());
try {
inputstream in = urlcon.getinputstream();
int all= in.available();
byte[] b= new byte[all];
// while (((c = in.read()) != -1)) {
// system.out.print((char) c);
// }
in.read(b);
string webpage = new string(b, "utf-8 ");
in.read(b);
string webpage = new string(b, "utf-8 ");
in.close();
system.out.println(webpage);
} catch (ioexception e) {
system.out.println( " " + e);
}
} catch (malformedurlexception e) {
system.out.println( " " + e);
}
}
}