1 ImportJava.io.BufferedReader;2 ImportJava.io.InputStream;3 ImportJava.io.InputStreamReader;4 ImportJava.net.URL;5 Importjava.net.URLConnection;6 Importjava.util.ArrayList;7 Importjava.util.List;8 9 Public classTestindex {Ten One PrivateString Rooturl = "http://localhost/apk/"; A PrivateString listURL = Rooturl + "test-index.htm"; - Private StaticList<string> imageurllist =NewArraylist<string>(); - Public Static voidMain (String args[]) { theTestindex ti =NewTestindex (); - Ti.getdata (); - System.out.println (Imageurllist.size ()); - for(inti=0; I<imageurllist.size (); i++){ + System.out.println (Imageurllist.get (i)); - } + A } at - PrivateInputStream Getnetinputstream (String urlstr) - { - Try - { -URL url =NewURL (URLSTR); inURLConnection conn =url.openconnection (); - Conn.connect (); toInputStream is =Conn.getinputstream (); + returnis ; - } the Catch(Exception e) * { $ Panax Notoginseng } - return NULL; the } + Private voidGetData () { A Try the { +InputStream is =Getnetinputstream (listurl); -InputStreamReader ISR =NewInputStreamReader (IS); $BufferedReader br =NewBufferedReader (ISR); $String s =NULL; -String html= ""; - while((s = br.readline ())! =NULL) the { -html+=s;Wuyi } the - is.close (); WuString startstr = "src=\" https://"; -String endstr = "Width="; About intStart = 0; $ intEnd = 0; - intIndex =0; - imageurllist.clear (); - while(true) A { +Start =Html.indexof (startstr, index); the if(Start < 0) - Break; $index=start; theEnd =Html.indexof (endstr, index); theString ss = html.substring (start+5,end-1); the Imageurllist.add (ss); theIndex + =ss.length (); - } in } the Catch(Exception e) the { About //Todo:handle Exception the } the } the}
Resolves the URLs contained in the HTM file.
Results:
20https://encrypted-tbn3.gstatic.com/images?q=tbn:and9gcrvqgujsvdbncm3mvigiyiue87bnlyjuy2bnsap8kuotanrc_ Css5mvawhttps://encrypted-tbn2.gstatic.com/images?q=tbn:and9gcthd8cyjotmcgyjzxx5ls-xpxaalh1_yocoscqi5_ 7OKL29SNTBCZ7Q2YOJHTTPS://ENCRYPTED-TBN0.GSTATIC.COM/IMAGES?Q=TBN: and9gctl-fzkmsppxuwzmtitgcv9udxmrwr1pg0lw8mud9wkwiloasxqebemnvjzhttps://encrypted-tbn3.gstatic.com/images?q= Tbn:and9gcqwbmizjixkhv2iotbp7zsy6kd5g5vpzvtbtljyyr5nwttki2-0_u93ql4ehttps://encrypted-tbn1.gstatic.com/images? Q=tbn:and9gcslrli_gtvguehu7cofe1emdrjxpdvs42itqxkla0g75s31nbfaq2u1le4https://encrypted-tbn3.gstatic.com/images ? q=tbn:and9gcskrlygxss8dr_7k3muvogq1ve45lghz0zehiedd9llziaomce7iaqn8hohttps://encrypted-tbn0.gstatic.com/ images?q=tbn:and9gctu__ousj4r4ekbu4joi2zadhohpvqiby3-sfni8fypn8wvc9kjg_awuk_whttps:// encrypted-tbn3.gstatic.com/images?q=tbn:and9gcr3bf7ytshj813a5_wwzpxiy4mbemqz5nlw3qv1npxozqvjh7qly-qyscghttps:/ /encrypted-tbn0.gstatic.com/images?q=tbn:and9gctob4njpqvwnzn0xeasnxyhxggoqhxdype6kzimtfv9k52eire3iysA6IXMHTTPS://ENCRYPTED-TBN1.GSTATIC.COM/IMAGES?Q=TBN: and9gctkkw0lpqdb2eqmupwdqdvm9dtentq1mrvmnivoqtn37p3m0opsx4me9i4ohttps://encrypted-tbn0.gstatic.com/images?q= Tbn:and9gcszgzmf_3hmddktz91yp5zqi-egwlcenz0u446sxt2nqyuwlwri_v_bviwihttps://encrypted-tbn0.gstatic.com/images? q=tbn:and9gctqf-55t5gm3dldaoafpdliyk0esnvm6-bsb4-b2rqteyd5ggockxokexm-https://encrypted-tbn2.gstatic.com/ images?q=tbn:and9gcrorjo4tfexmx47ze6vh0ylco0iq2hbsohyimjci9msryg_pf1whhbqg76qhttps:// encrypted-tbn1.gstatic.com/images?q=tbn:and9gcrrdegt1koey51dlwrjabvmjblcez7fpl2mztyym6onvxocrcq030ft1gehttps:/ /encrypted-tbn1.gstatic.com/images?q=tbn:and9gcttnqpte0uq9ue9nsg25geo1kw_-hcn69oztqkimbhrxkwlanutyhwkd9xmhttps ://encrypted-tbn0.gstatic.com/images?q=tbn: and9gcrnrdxzmufkaboggyv2sc0gmticosl2lb3v1fbmowntvbzxhkymw4icwbfjhttps://encrypted-tbn0.gstatic.com/images?q= Tbn:and9gcqr40cef75nwcj5dg-oektb9zk6mhktu7vnfoyah5ioy34goc3c9ptdkqwphttps://encrypted-tbn2.gstatic.com/images? Q=tbn:and9gcqunyhrvebppqhznwnqrijhbfp0x34grf7pkw6pdt4ggepb2K9G-P71SGGHHTTPS://ENCRYPTED-TBN1.GSTATIC.COM/IMAGES?Q=TBN: And9gcr9us9qblbtjaw47gulxci8shkn4i61gyst2ijebtzzgsmdi8gmyqqpiiwhttps://encrypted-tbn0.gstatic.com/images?q=tbn : and9gcsirw-ibbzjm9ztn60r9qe1_fimjt494qgx12tqslsibyplufvwyvsgz1i
Get HTML through the Web and parse out the URLs