recently, in order to write a paper, to collect the user data of the network in large batches (because the User's Personal homepage is public), so wrote a plugin to Collect. Need to be entered in the Web Console. finally, more than 3,000 data were collected.
/*
Collection items |
Collection criteria |
User number |
MU class network user number is seven digits increments, the range is within 5000000. therefore, The collection will be randomly collected from 1000000 to 5000000 of the data in the partition Section. |
Length of study |
Mu class has recorded the total length of study for users, such as 110 34 Minutes. To facilitate post-statistics, the data is converted into Minutes. |
Integral |
Points reflect the degree to which a user participates in a social Network. |
Experience |
Compared with the length of study, experience can directly reflect the degree of completion of the Course. |
Number of learning courses |
To prevent users from taking the course without learning, the course is based on the admission criteria for courses with more than 5% of the learning Schedule. |
Average degree of completion |
means the average degree of completion of a student's Course. |
Number of courses completed |
Courses completed for courses of 80% and Above. |
Number of participating paths |
The number of participants in the learning path of the Mu-class Network. |
Path Completion degree |
The degree of completion of the learning Path. |
*/varUserInfo =function(user,link,i) {varuser =user; varlink =link; this. Name =i; var_this_ = this; this. Getbasicinfo (user); this. Getcoursenum (user); this. Getroute (user,link); SetTimeout (function() {console.log (_this_.name+ "\ t" +_this_.learntime+ "\ t" +_this_.credit+ "\ t" +_this_.mp+ "\ t" +_this_. coursenum+ "\ t" +_this_.learnave+ "\ t" +_this_.finish+ "\ t" +_this_.routenum+ "\ t" +_this_.routeave); User.close (); },10000);};Userinfo.prototype ={//Convenient value-taking function;Getelem:function(user,classname,tag) {varInfor =user.document.getelementsbyclassname (className) [0].getelementsbytagname (tag) [0].innerhtml; returninfor; },//get basic information: name, length of study, integral, experience Value. Getbasicinfo:function(d) {varuser =d; //Get time; varTime = this. Getelem (user, "u-info-learn", "em"); varCuthour =/\d+ (? =\w{2})/g; varhour = parseint (time.match (cuthour)) *60; varCutminute =/\d+ (?! \w{2})/g; varminute =parseint (time.match (cutminute)); if(isNaN (hour)) {varLearntime =minute; }Else{ varLearntime = hour+minute; } this. Learntime =learntime; //Get credit; varCredit = parsefloat ( this. Getelem (user, "u-info-credit", "em")); this. Credit =credit ; //Get mp; varMP = parsefloat ( this. Getelem (user, "u-info-mp", "em")); this. MP =mp; },//get the number of courses and completion of coursesGetcoursenum:function(d) {varSelf = this; varuser =d; varCoursenum = 0, Learnsum= 0, Finish= 0; //get the number of courses and the completion of a page functiongetonepage (obj) {if(obj.document.getElementsByClassName ("course-one"). length > 0){ varCourse = Obj.document.getElementsByClassName ("course-one"); for(i = 0;i<course.length;i++){ varHaslearn = Course[i].getelementsbyclassname ("i-left") [0].innerhtml, Cutword=/\w\w/g; varHaslearn =parsefloat (haslearn.replace (cutword, "")); if(haslearn > 5) {coursenum= Coursenum + 1; Learnsum= Learnsum +haslearn; if(haslearn > 80) {finish= Finish + 1; } }; }; Self. Coursenum=coursenum; if(coursenum!== 0) {self.learnave= (learnsum/coursenum). toFixed (2);} self.finish=finish; } }; //get current pageGetonepage (user); //Get additional pages if(user.document.getElementsByClassName ("text-page-tag"). length > 0){ varPages = User.document.getElementsByClassName ("text-page-tag"); if(pages.length > 1){ for(i=1;i<pages.length;i++){ !function(i) {varpage = window.open (pages[i].getattribute ("href")); SetTimeout (functionget () {if(page.document.getElementsByClassName ("course-one"). length > 0) {getonepage (page); Page.close (); Self. Coursenum=coursenum; Self.learnave= (learnsum/coursenum). toFixed (2); Self.finish =finish; }Else{console.log ("page" + i + "didn ' t load!"); } },5000);//What if some pages do not open in 4S, resulting in the inability to take a value? } (i); }; }; } //get a list of other pages if(user.document.getElementsByClassName ("page"). length > 0){ varLastPage =user.document.getelementsbyclassname ("page") [0].lastchild.getattribute ("href")); varAllpage = parseint (lastpage.match (/\d$/)); if(allpage>7){ varOtherlink = Lastpage.match (/^.*= (? =\d)/g); for(i=8;i <= allpage;i++){ !function(i) {varpage = window.open (otherlink+i); SetTimeout (functionget () {if(page.document.getElementsByClassName ("course-one"). length > 0) {getonepage (page); Page.close (); Self. Coursenum=coursenum; Self.learnave= (learnsum/coursenum). toFixed (2); Self.finish =finish; }Else{console.log ("page" + i + "didn ' t load!"); } },4000);//Remember to find a good network, to prevent some of the page does not open in 4S, resulting in a value cannot be Taken. } (i); }; }; } },//get number of participating pathsGetroute:function(d,link) {varuser =d; varSelf = this; varCurrpage =link; varOpenpage = window.open (currpage.replace ("courses", "plans")) SetTimeout (function(){ if(openPage.document.getElementsByClassName ("plans-item"). length > 0){ varRoute = OpenPage.document.getElementsByClassName ("plans-item"); varRoutenum =route.length; varRoutelearn = OpenPage.document.getElementsByClassName ("plans-list-progress"); varroutesum = 0,learnsum = 0; for(i=0;i<routenum;i++){ varHaslearn = parsefloat (routelearn[i].innerhtml.replace (/\w+/, "")); if(haslearn>2) {routesum= Routesum +haslearn; Learnsum+=1; }; }; if(learnsum > 0) {routeave= routesum/learnsum; Self.routeave =routeave; } self.routenum=routenum; }; Openpage.close (); },5000) }};
Open Pagevarn = math.round (math.random () *1000000+4000000);//take a random number between 3000000-4000000varMin =n;functionOpenLink () {if(n< min+1000) {//fetch 1000 copies of datavarlink = "http://www.imooc.com/u/" + n + "/courses"; varuser =window.open (link); SetTimeout (function(){ if(user.document.getElementsByClassName ("user-name"). length > 0){ NewUserInfo (user,link,n); }Else{console.log (n+ "page didn ' t exit!") }; },4000); SetTimeout (function() {n = N+1;openlink ();},4000)}};openlink ();
Collect Web site data in large quantities in JavaScript