Java multi-threaded grab ringtones A lot of the official website ringtones data _java

Source: Internet
Author: User
Tags http request

Always wanted to practice Java multi-threaded crawl data.

One day I found that the ringtone website (http://www.shoujiduoduo.com/main/) has a lot of data.

Ajax by looking at their front-end to get ringtones data


http://www.shoujiduoduo.com/ringweb/ringweb.php?type=getlist&listid={category id}&page={Paging page number}

It's easy to find JSON data that can get ringtones from the server by changing Listid and page, by parsing the JSON data,

You can see that all with {"Hasmore": 1, "Curpage": 1} Such a child's instructions, by judging the value of Hasmore, determine whether the next page to crawl.

But a download address with no ringtone in the JSON returned via the link above

You'll soon find out that clicking on the page "downloads" will see

With the following request, you can get the download address of the ringtone.

http://www.shoujiduoduo.com/ringweb/ringweb.php?type=geturl&act=down&rid={Ringtones ID}


So, their data is very easy to be stolen. So I started ...

The source code has been sent on the GitHub. If interested in child shoes can be viewed

Github:https://github.com/yongbo000/duoduoaudiorobot

Code on:

Package Me.yongbo.DuoduoRingRobot;
Import Java.io.BufferedReader;
Import Java.io.File;
Import Java.io.FileWriter;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.InputStreamReader;
Import Java.net.URL;
Import java.net.URLConnection;
Import Java.util.Iterator;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern;
Import Com.google.gson.Gson;
Import Com.google.gson.JsonArray;
Import com.google.gson.JsonElement;
Import Com.google.gson.JsonParser; * * * @author yongbo_ * @created 2013/4/16 * * */public class Duoduoringrobotclient implements Runnable {public static St Ring Get_ringinfo_url = "http://www.shoujiduoduo.com/ringweb/ringweb.php?type=getlist&listid=%1$d&page=%2
$d "; public static String Get_down_url = "http://www.shoujiduoduo.com/ringweb/ringweb.php?type=geturl&act=down&
Rid=%1$d "; The public static String error_msg = "Listid is%1$d robot An error occurred and has stopped automatically.
The current page is%2$d ";p ublic static String status_msg =" Start crawling data, current Listid:%1$d, Current page:%2$d "; PubLic static string file_dir = "e:/ringdata/";p ublic static string file_name = "Listid=%1$d.txt";p rivate boolean errorflag =
false;private int listid;private int page;
private int endpage = -1;private int hasmore = 1;
Private DBHelper DBHelper;  /** * constructor * @param listid Menu ID * @param page start Page * @param endpage End Page Number */public duoduoringrobotclient (int listid, int beginpage, int endpage) {This.listid = Listid;this.page = Beginpage;this.endpage = Endpage;this.dbhelper = new DbHelper (
);} /** * constructor * @param listid Menu ID * @param page start Page * * */public duoduoringrobotclient (int listid, int page) {This (Listid, p
Age,-1);} /** * Get ringtones * */public void getrings () {String URL = String.Format (get_ringinfo_url, Listid, page);
String responsestr = httpget (URL); hasmore = Gethasmore (RESPONSESTR);
page = Getnextpage (RESPONSESTR); Ringparse (Responsestr.replaceall ("\\{\" hasmore\ ": [0-9]*,\" curpage\ ": [0-9]*\\},", ""). ReplaceAll (",]", "]"));} /** * Initiates HTTP request * @param weburl Request Connection Address * */public String httpget (String weburl) {URL url; URLConnection Conn; StringBuilder sb = new StringBuilder (); String resultstr = ""; try {url = new URL (weburl); conn = Url.openconnection (); Conn.connect (); InputStream is = Conn.getinput Stream (); InputStreamReader ISR = new InputStreamReader (IS); BufferedReader bufreader = new BufferedReader (ISR); String linetext;while ((Linetext = Bufreader.readline ())!= null) {sb.append (linetext);} ResultStr = Sb.tostring ();} catch (Exception e) {Errorflag = true;//writes the error to Txtwritetofile (String.Format (error_msg, Listid, page)); return resultstr;} /** * Converts the JSON string into a ring object and deposits it in TXT * @param json JSON string * */public void Ringparse (string json) {Ring Ring = NULL; jsonelement element = new Jsonparser (). Parse (JSON);
Jsonarray array = Element.getasjsonarray ();//traversal array iterator<jsonelement> it = Array.iterator (); Gson Gson = new Gson () while (It.hasnext () &&!errorflag) {jsonelement e = It.next ();//Jsonelement Convert to JavaBean object ri ng = Gson.fromjson (E, Ring.class); Ring.setdownurl (Getringdownurl ring.GetId ()); if (isavailablering (ring)) {System.out.println (ring.tostring ());//Optionally write to the database or write to the text//writetofile (
Ring.tostring ()); Writetodatabase (ring);}}}  /** * Write txt * @param data string * */public void WriteToFile (String data) {string path = File_dir + String.Format (file_name, Listid); File dir = new file (File_dir); File File = new file (path); FileWriter FW = Null;if (!dir.exists ()) {dir.mkdirs ();} try {if (!file.exists ()) {file.createnewfile ();}
FW = new FileWriter (file, true); Fw.write (data); Fw.write ("\ r \ n"); Fw.flush ();} catch (IOException e) {//TODO auto-generated catch Blocke.printstacktrace ();} Finally {try {if (fw!= null) {Fw.close ();} catch (IOException e) {/TODO auto-generated catch Blocke.printstacktrace ();} }}/** * Write to Database * @param ring An instance of a ring * */public void writetodatabase (Ring ring) {Dbhelper.execute ("addring", Ring);} @Ov erridepublic void Run () {while (Hasmore = = 1 &&!errorflag) {if (EndPage!=-1) {if (page > EndPage) {break;}} System.out.println (String.Format (status_msg,Listid, page)); Getrings (); System.out.println (String.Format ("This page data writes to finish"));
System.out.println ("Ending ...");} 
 private int Gethasmore (String resultstr) {Pattern p = pattern.compile ("\ hasmore\":([0-9]*), \ curpage\:([0-9]*)];  
 Matcher match = P.matcher (RESULTSTR);
  if (Match.find ()) {return Integer.parseint (Match.group (1));
return 0; private int getnextpage (String resultstr) {Pattern p = pattern.compile ("\ hasmore\":([0-9]*), \ curpage\ ":([0-9]*)"); Matcher match = P.matcher (RESULTSTR); if (Match.find ()) {return Integer.parseint (Match.group (2));}
return 0;} /** * Determines whether the current ring satisfies the condition. When the ring's name is greater than 50 characters or the duration is a decimal, the condition is excluded.
* @param ring Current instance of ring Object * */private Boolean isavailablering (Ring ring) {Pattern p = pattern.compile ("^[1-9][0-9]*$");
Matcher match = P.matcher (Ring.getduration ()); if (!match.find ()) {return false;} if (Ring.getname (). Length () > | | ring.getartist (). Length () > | | ring.getdownurl (). Length () = 0) {return false;
}return true;} /** * Get ringtones Download address * @param The ID of the RID ring* */public string Getringdownurl (string rid) {string url = String.Format (get_down_url, RID); String responsestr = httpget (URL); return responsestr;}}
Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.