First, the basic indicators of the site (that is, for the website user behavior generated by the statistical analysis of the log)
1. PV: PageView (Page view page views, as long as access to the page generated a record, unlimited IP, statistical points per day (more)/weekly/monthly/.) 2. UV: Number of independent visitors (Unique Vistor, based on a cookie, one user visited multiple times in the same day, only as one) 3. VV: Number of visitors visited (Visit view, session-based, visitor visits the site to turn off all pages of the site as one visit) 4. IP: Number of independent IP (that is, record different IP, same IP access counted once) 5. Usually website traffic (traffic) refers to the number of visits to a website, which is used to describe how many users have visited a site and the number of pages viewed by the user. for virtual space quotient, traffic refers to the amount of data generated by the user during the site visit.
Ii. Examples of UV statistics (that is, how many people visit the site each day in each province)
1. Analysis Requirements 1> What kind of data we get, find out what we have in common, what the meeting map,shuffle,reduce do 2> What data we want, List 2. Where the implementation plan is noted 1> What separates the data and whether we need to customize the data type 2> roughly we need to filter out invalid records use custom data types to combine the fields we need and then accumulate (de-re-stage) the records according to the province 3> The data type can be undefined, use text to combine the field values and then go to the reduce method, set up HashMap for the data to be combined into the desired output data based on time and province increments stored in cleanup
Three, UV Statistics code example
Webuvmr.java============package Com.bigdata_senior. Webuvmr;import java.io.ioexception;import java.util.hashmap;import Java.util.map;import java.util.Set;import Org.apache.commons.lang.stringutils;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.Path ; Import Org.apache.hadoop.io.longwritable;import Org.apache.hadoop.io.nullwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.Mapper; Import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;public class WEBUVMR {//mapper Classprivate static class Wordcountmapper extends Mapper<longwritable, text, text, nullwritable>{private text Mapoutkey = new text (); overridepublic void Map (longwritable key, Text value, Context context) throws IOException, interruptedexception {String li Nevalue = Value.tostring (); String [] strvalue = Linevalue.split ("\ t");(> Strvalue.length) {return;} String Guididvalue = strvalue[5];if (Stringutils.isblank (Guididvalue)) {return;} String Tracktimevalue = strvalue[17];if (Stringutils.isblank (Tracktimevalue)) {return;} String DateValue = tracktimevalue.substring (0,10), Integer proviceidvalue = Integer.max_value;try{if ( Stringutils.isblank (strvalue[23]) {return;} Proviceidvalue = integer.valueof (strvalue[23]);} catch (Exception e) {return;} Mapoutkey.set (datevalue+ "\ T" +proviceidvalue+ "_" +guididvalue);//system.out.println ("Key-->" +mapOutKey+ " Value--> "+nullwritable.get ()); Context.write (Mapoutkey, Nullwritable.get ());}} Reduce classprivate Static class Wordcountreduce extends Reducer<text, nullwritable, Text, longwritable>{ Private map<string,integer> datemap;private Text Outputkey = new text ();p rivate longwritable outputvalue = new LongW Ritable (); @Overrideprotected void Setup (context context) throws Ioexception,interruptedexception {Datemap = new HashMap <String,Integer> ();} @Overridepublic VOID reduce (Text key, iterable<nullwritable> Values,context Context) throws IOException, Interruptedexception { String date = Key.tostring (). Split ("_") [0];if (Datemap.containskey (date)) {Integer PREUV = datemap.get (date);// System.out.println ("====->" +PREUV); Integer UV = Preuv + 1;datemap.put (date, UV);} Else{datemap.put (date, 1);} System.out.println (Datemap.tostring ());} @Overrideprotected void Cleanup (context context) throws ioexception,interruptedexception {set<string> Dateset = Datemap.keyset ();//system.out.println (dateset.tostring ()); for (String date:dateset) {Integer UV = Datemap.get (date) ; Outputkey.set (date); Outputvalue.set (UV); System.out.println ("Result:-->key" +outputkey+ "value-->" +outputvalue); Context.write (Outputkey, Outputvalue );}}} driverpublic int Run (string[] args) throws Exception {Configuration configuration = new configuration (); Job Job = job.getinstance (configuration, This.getclass (). Getsimplename ()); Job.setjarbyclass (This.getclass ());// InputPath InPAth = new Path (args[0]); Fileinputformat.addinputpath (Job,inpath);//outputpath Outpath = new Path (args[1]); Fileoutputformat.setoutputpath (Job, Outpath);//mapperjob.setmapperclass (Wordcountmapper.class); Job.setmapoutputkeyclass (Text.class); Job.setmapoutputvalueclass (nullwritable.class);// Reducejob.setreducerclass (Wordcountreduce.class); Job.setoutputkeyclass (Text.class); Job.setOutputValueClass ( Longwritable.class);//submit Jobboolean issuccess = Job.waitforcompletion (true); return issuccess? 0:1;} public static void Main (string[] args) throws Exception {args = new string[]{"hdfs://hadoop09-linux-01.ibeifeng.com:8020 /user/liuwl/tmp/webuv/input "," Hdfs://hadoop09-linux-01.ibeifeng.com:8020/user/liuwl/tmp/webuv/output4 "};//run Jobint status = New WEBUVMR (). run (args); System.exit (status);}}
Hadoop.2.x_webuv Example