Collect Web site data in large quantities in JavaScript

Source: Internet
Author: User

recently, in order to write a paper, to collect the user data of the network in large batches (because the User's Personal homepage is public), so wrote a plugin to Collect. Need to be entered in the Web Console. finally, more than 3,000 data were collected.

/*

Collection items

Collection criteria

User number

MU class network user number is seven digits increments, the range is within 5000000. therefore, The collection will be randomly collected from 1000000 to 5000000 of the data in the partition Section.

Length of study

Mu class has recorded the total length of study for users, such as 110 34 Minutes. To facilitate post-statistics, the data is converted into Minutes.

Integral

Points reflect the degree to which a user participates in a social Network.

Experience

Compared with the length of study, experience can directly reflect the degree of completion of the Course.

Number of learning courses

To prevent users from taking the course without learning, the course is based on the admission criteria for courses with more than 5% of the learning Schedule.

Average degree of completion

means the average degree of completion of a student's Course.

Number of courses completed

Courses completed for courses of 80% and Above.

Number of participating paths

The number of participants in the learning path of the Mu-class Network.

Path Completion degree

The degree of completion of the learning Path.

*/varUserInfo =function(user,link,i) {varuser =user; varlink =link;  this. Name =i; var_this_ = this;  this. Getbasicinfo (user);  this. Getcoursenum (user);  this. Getroute (user,link); SetTimeout (function() {console.log (_this_.name+ "\ t" +_this_.learntime+ "\ t" +_this_.credit+ "\ t" +_this_.mp+ "\ t" +_this_. coursenum+ "\ t" +_this_.learnave+ "\ t" +_this_.finish+ "\ t" +_this_.routenum+ "\ t" +_this_.routeave);    User.close (); },10000);};Userinfo.prototype ={//Convenient value-taking function;Getelem:function(user,classname,tag) {varInfor =user.document.getelementsbyclassname (className) [0].getelementsbytagname (tag) [0].innerhtml; returninfor; },//get basic information: name, length of study, integral, experience Value. Getbasicinfo:function(d) {varuser =d; //Get time;                    varTime = this. Getelem (user, "u-info-learn", "em"); varCuthour =/\d+ (? =\w{2})/g; varhour = parseint (time.match (cuthour)) *60; varCutminute =/\d+ (?! \w{2})/g; varminute =parseint (time.match (cutminute)); if(isNaN (hour)) {varLearntime =minute; }Else{                        varLearntime = hour+minute; }                     this. Learntime =learntime; //Get credit;                    varCredit = parsefloat ( this. Getelem (user, "u-info-credit", "em"));  this. Credit =credit ; //Get mp;                    varMP = parsefloat ( this. Getelem (user, "u-info-mp", "em"));  this. MP =mp; },//get the number of courses and completion of coursesGetcoursenum:function(d) {varSelf = this; varuser =d; varCoursenum = 0, Learnsum= 0, Finish= 0; //get the number of courses and the completion of a page                                functiongetonepage (obj) {if(obj.document.getElementsByClassName ("course-one"). length > 0){                        varCourse = Obj.document.getElementsByClassName ("course-one");  for(i = 0;i<course.length;i++){                            varHaslearn = Course[i].getelementsbyclassname ("i-left") [0].innerhtml, Cutword=/\w\w/g; varHaslearn =parsefloat (haslearn.replace (cutword, "")); if(haslearn > 5) {coursenum= Coursenum + 1; Learnsum= Learnsum +haslearn; if(haslearn > 80) {finish= Finish + 1;                        }                            };                        }; Self. Coursenum=coursenum; if(coursenum!== 0) {self.learnave= (learnsum/coursenum). toFixed (2);} self.finish=finish;                }                }; //get current pageGetonepage (user); //Get additional pages                if(user.document.getElementsByClassName ("text-page-tag"). length > 0){                    varPages = User.document.getElementsByClassName ("text-page-tag"); if(pages.length > 1){                         for(i=1;i<pages.length;i++){                            !function(i) {varpage = window.open (pages[i].getattribute ("href")); SetTimeout (functionget () {if(page.document.getElementsByClassName ("course-one"). length > 0) {getonepage (page);                                            Page.close (); Self. Coursenum=coursenum; Self.learnave= (learnsum/coursenum). toFixed (2); Self.finish =finish; }Else{console.log ("page" + i + "didn ' t load!");                                    }                                },5000);//What if some pages do not open in 4S, resulting in the inability to take a value? } (i);                    };                }; }                //get a list of other pages                if(user.document.getElementsByClassName ("page"). length > 0){                varLastPage =user.document.getelementsbyclassname ("page") [0].lastchild.getattribute ("href")); varAllpage = parseint (lastpage.match (/\d$/)); if(allpage>7){                    varOtherlink = Lastpage.match (/^.*= (? =\d)/g);  for(i=8;i <= allpage;i++){                            !function(i) {varpage = window.open (otherlink+i); SetTimeout (functionget () {if(page.document.getElementsByClassName ("course-one"). length > 0) {getonepage (page);                                        Page.close (); Self. Coursenum=coursenum; Self.learnave= (learnsum/coursenum). toFixed (2); Self.finish =finish; }Else{console.log ("page" + i + "didn ' t load!");                                }                            },4000);//Remember to find a good network, to prevent some of the page does not open in 4S, resulting in a value cannot be Taken. } (i);                };                }; }    },//get number of participating pathsGetroute:function(d,link) {varuser =d; varSelf = this; varCurrpage =link; varOpenpage = window.open (currpage.replace ("courses", "plans")) SetTimeout (function(){                if(openPage.document.getElementsByClassName ("plans-item"). length > 0){                    varRoute = OpenPage.document.getElementsByClassName ("plans-item"); varRoutenum =route.length; varRoutelearn = OpenPage.document.getElementsByClassName ("plans-list-progress"); varroutesum = 0,learnsum = 0;  for(i=0;i<routenum;i++){                        varHaslearn = parsefloat (routelearn[i].innerhtml.replace (/\w+/, "")); if(haslearn>2) {routesum= Routesum +haslearn; Learnsum+=1;                    };                    }; if(learnsum > 0) {routeave= routesum/learnsum; Self.routeave =routeave; } self.routenum=routenum;                };            Openpage.close (); },5000)                }};

Open Pagevarn = math.round (math.random () *1000000+4000000);//take a random number between 3000000-4000000varMin =n;functionOpenLink () {if(n< min+1000) {//fetch 1000 copies of datavarlink = "http://www.imooc.com/u/" + n + "/courses"; varuser =window.open (link); SetTimeout (function(){ if(user.document.getElementsByClassName ("user-name"). length > 0){ NewUserInfo (user,link,n); }Else{console.log (n+ "page didn ' t exit!") }; },4000); SetTimeout (function() {n = N+1;openlink ();},4000)}};openlink ();

Collect Web site data in large quantities in JavaScript

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.