Not much to say, directly on the code.
Traffic statistics on the original traffic log, the user statistics of different provinces output to different files.
Code
Package zhouls.bigdata.myMapReduce.flowsum;
Import Java.io.DataInput;
Import Java.io.DataOutput;
Import java.io.IOException;
Import org.apache.hadoop.io.Writable;
Import org.apache.hadoop.io.WritableComparable;
public class Flowbean implements writablecomparable<flowbean>{
Private String Phonenb;
Private long Up_flow;
Private long D_flow;
Private long S_flow;
When deserializing, the reflection mechanism needs to call the Null parameter constructor, so the display defines an empty parameter constructor
Public Flowbean () {}
To facilitate initialization of object data, add a constructor with parameters
Public Flowbean (String Phonenb, Long up_flow, long D_flow) {
This.phonenb = Phonenb;
This.up_flow = Up_flow;
This.d_flow = D_flow;
This.s_flow = Up_flow + d_flow;
}
Public String Getphonenb () {
return PHONENB;
}
public void Setphonenb (String phonenb) {
This.phonenb = Phonenb;
}
Public long Getup_flow () {
return up_flow;
}
public void Setup_flow (long up_flow) {
This.up_flow = Up_flow;
}
Public long Getd_flow () {
return d_flow;
}
public void Setd_flow (long d_flow) {
This.d_flow = D_flow;
}
Public long Gets_flow () {
return s_flow;
}
public void Sets_flow (long s_flow) {
This.s_flow = S_flow;
}
Serialization of object data into a stream
public void Write (DataOutput out) throws IOException {
Out.writeutf (PHONENB);
Out.writelong (Up_flow);
Out.writelong (D_flow);
Out.writelong (S_flow);
}
Deserializing the data from the data stream from the object
When object fields are read from the data stream, they must be consistent with the order in which they are serialized
public void ReadFields (Datainput in) throws IOException {
Phonenb = In.readutf ();
Up_flow = In.readlong ();
D_flow = In.readlong ();
S_flow = In.readlong ();
}
@Override
Public String toString () {
Return "" + Up_flow + "\ T" +d_flow + "\ T" + s_flow;
}
public int compareTo (Flowbean o) {
Return S_flow>o.gets_flow ()? -1:1;
}
}
Package zhouls.bigdata.myMapReduce.flowsum;
Import java.io.IOException;
Import Org.apache.commons.lang.StringUtils;
Import org.apache.hadoop.io.LongWritable;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.Mapper;
/**
* Flowbean is one of our custom data types, to be transferred between the nodes of Hadoop, should follow the Hadoop serialization mechanism
* The corresponding serialized interface for Hadoop must be implemented
*
*
*/
public class Flowsummapper extends mapper<longwritable, text, text, flowbean>{
Get a row of data in the log, slice the fields, extract the fields we need: Mobile number, upstream traffic, downstream traffic, and then package it into KV to send out
@Override
protected void Map (longwritable key, Text value,context Context)
Throws IOException, Interruptedexception {
Take a row of data
String line = value.tostring ();
Cut into individual fields
string[] fields = Stringutils.split (line, "\ t");
Get the fields we need
String Phonenb = fields[1];
Long U_flow = Long.parselong (fields[7]);
Long D_flow = Long.parselong (fields[8]);
Package data is kv and output
Context.write (New Text (PHONENB), New Flowbean (Phonenb,u_flow,d_flow));
}
}
Package zhouls.bigdata.myMapReduce.flowsum;
Import java.io.IOException;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.Reducer;
public class Flowsumreducer extends Reducer<text, Flowbean, Text, flowbean>{
Frames each pass a set of data <1387788654,{flowbean,flowbean,flowbean,flowbean ...} > Call our reduce method once
The business logic in reduce is to traverse the values and then add the sum and then output
@Override
protected void reduce (Text key, iterable<flowbean> values,context Context)
Throws IOException, Interruptedexception {
Long up_flow_counter = 0;
Long d_flow_counter = 0;
for (Flowbean bean:values) {
Up_flow_counter + = Bean.getup_flow ();
D_flow_counter + = Bean.getd_flow ();
}
Context.write (Key, New Flowbean (Key.tostring (), Up_flow_counter, D_flow_counter));
}
}
Package zhouls.bigdata.myMapReduce.flowsum;
Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.conf.Configured;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.InputFormat;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.OutputFormat;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.Tool;
Import Org.apache.hadoop.util.ToolRunner;
Import Zhouls.bigdata.myMapReduce.Anagram.Anagram;
This is the canonical notation for job description and submission classes
public class Flowsumrunner extends configured implements tool{
public int run (string[] arg0) throws Exception {
Configuration conf = new configuration ();
Job Job = job.getinstance (conf);
Job.setjarbyclass (Flowsumrunner.class);
Job.setmapperclass (Flowsummapper.class);
Job.setreducerclass (Flowsumreducer.class);
Job.setmapoutputkeyclass (Text.class);
Job.setmapoutputvalueclass (Flowbean.class);
Job.setoutputkeyclass (Text.class);
Job.setoutputvalueclass (Flowbean.class);
Fileinputformat.addinputpath (Job, New Path (arg0[0]));//File input path
Fileoutputformat.setoutputpath (Job, New Path (arg0[1]));//File Output path
Job.waitforcompletion (TRUE);
return 0;
}
public static void Main (string[] args) throws Exception {
Cluster path
String[] Args0 = {"Hdfs://hadoopmaster:9000/flowsum/http_20130313143750.dat",
"Hdfs://hadoopmaster:9000/out/flowsum"};
Local Path
String[] Args0 = {"./data/flowsum/http_20130313143750.dat",
"./out/flowsum/"};
int EC = Toolrunner.run (New Configuration (), New Flowsumrunner (), ARGS0);
System. Exit (EC);
}
}
Hadoop MapReduce Programming API Starter Series Web traffic version 1 (22)