Not much to say, directly on the code.
Statistics of the highest scores for male and female students of each age group
Here, for the error of space, direct, we sometimes, like the following to row data.
Code
Package zhouls.bigdata.myMapReduce.Gender;
Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.conf.Configured;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.io.Text;
Import org.apache.hadoop.mapred.JobConf;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Partitioner;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.Tool;
Import Org.apache.hadoop.util.ToolRunner;
/**
*
* @function statistics of the highest scores of men and women in different age groups
*
*
*/
/*
Alice<tab>23<tab>female<tab>45
bob<tab>34<tab>male<tab>89
chris<tab>67<tab>male<tab>97
kristine<tab>38<tab>female<tab>53
Connor<tab>25<tab>male<tab>27
Daniel<tab>78<tab>male<tab>95
james<tab>34<tab>male<tab>79
alex<tab>52<tab>male<tab>69
Nancy<tab>7<tab>female<tab>98
Adam<tab>9<tab>male<tab>37
Jacob<tab>7<tab>male<tab>23
mary<tab>6<tab>female<tab>93
clara<tab>87<tab>female<tab>72
monica<tab>56<tab>female<tab>92
*/
public class Gender extends configured implements Tool {
/*
*
* @function Mapper Parse input data and output as required
* @input key= line offset value= student data
* @output Key=gender Value=name+age+score
*
*/
public static class Pcmapper extends Mapper<object, text, text, text>
{
public void Map (Object key, Text value, Context context) throws IOException, Interruptedexception
{//Take alice<tab>23<tab>female<tab>45
string[] tokens = value.tostring (). Split ("<tab>");//use delimiter <tab> to parse data into arrays tokens
Get Alice23 Female45
i.e. Tokens[0] tokens[1] tokens[2] tokens[3]
String gender = tokens[2].tostring ();//gender
String Nameagescore = tokens[0] + "\ T" + tokens[1] + "\ T" + tokens[3];
Output Key=gender Value=name+age+score
Output Key=female value=alice+23+45
Context.write (new text (gender), new text (Nameagescore));//Writes (female, alice+ 23+ 45) to the context
}
}
public static class Myhashpartitioner extends Partitioner<text, text>
{
/** Use {@link object#hashcode ()} to partition. */
@Override
public int getpartition (text key, text Value,int numreducetasks)
{
Return (Key.hashcode ())% Numreducetasks;
}
}
/**
*
* @function partitioner Select reduce partition according to age
*
*/
public static class Pcpartitioner extends Partitioner<text, text>
{
@Override
public int getpartition (text key, text value, int numreducetasks)
{
TODO auto-generated Method Stub
string[] Nameagescore = value.tostring (). Split ("\ t");
String age = nameagescore[1];//Student
int ageint = Integer.parseint (age);//Partitioning by ages
Default specified partition 0
if (Numreducetasks = = 0)
return 0;
Age less than or equal to 20, specify partition 0
if (Ageint <= 20) {
return 0;
}
Age greater than 20, less than or equal to 50, specifying partition 1
if (Ageint > && ageint <= 50) {
return 1% Numreducetasks;
}
Remaining age, specify partition 2
Else
return 2% Numreducetasks;
}
}
/**
*
* @function define combiner merge Mapper output results
*
*/
public static class Pccombiner extends Reducer<text, text, text, text>
{
Private text text = new text ();
public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception br>{
int maxscore = Integer.min_value;
String name = "";
String age = "";
int score = 0;
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Maxscore = score;
}
}
Text.set (name + "\ T" + age + "\ T" + maxscore);
Context.write (key, text);
}
}
/*
*
* @function Reducer statistics for the highest score of different ages and genders
* input key=gender value=name+age+score
* Output key=name Value=age+gender+score
*
*/
Static class Pcreducer extends Reducer<text, text, text, text>
{
@Ov Erride
public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception
{
int maxscore = Integer.min_value;
String name = "";
String age = "";
String gender = "";
int score = 0;
//Based on key, iterate the values collection to find the highest score
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Gender = key.tostring ();
Maxscore = score;
}
}
Context.write (new text (name), new text ("age-" + age + "\ T" + Gender + "\tscore-" + Maxscore));
}
}
/**
* @function task-driven approach
* @param args
* @return
* @throws Exception
*/
@Override
public int run (string[] args) throws Exception
{
TODO auto-generated Method Stub
Configuration conf = new configuration ();//Read config file
Path MyPath = new Path (args[1]);
FileSystem HDFs = Mypath.getfilesystem (conf);
if (Hdfs.isdirectory (MyPath))
{
Hdfs.delete (MyPath, true);
}
@SuppressWarnings ("deprecation")
Job Job = new Job (conf, "gender");//Create a new task
Job.setjarbyclass (Gender.class);//Main class
Job.setmapperclass (pcmapper.class);//mapper
Job.setreducerclass (pcreducer.class);//reducer
Job.setpartitionerclass (Myhashpartitioner.class);
Job.setpartitionerclass (Pcpartitioner.class);//Set Partitioner class
Job.setnumreducetasks (3);//reduce number set to 3
Job.setmapoutputkeyclass (text.class);//map output Key type
Job.setmapoutputvalueclass (text.class);//map Output value type
Job.setcombinerclass (Pccombiner.class);//Set Combiner class
Job.setoutputkeyclass (Text.class);//Output result key type
Job.setoutputvalueclass (Text.class);//Output result value type
Fileinputformat.addinputpath (Job, New Path (args[0]));//input path
Fileoutputformat.setoutputpath (Job, New Path (args[1]));//Output path
Job.waitforcompletion (TRUE);//Submit Task
return 0;
}
/**
* @function Main method
* @param args
* @throws Exception
*/
public static void Main (string[] args) throws Exception
{
String[] Args0 = {
"Hdfs://hadoopmaster:9000/gender/gender.txt",
"Hdfs://hadoopmaster:9000/out/partition/"};
String[] Args0 = {
"./data/gender/gender.txt",
"./out/gender"};
int EC = Toolrunner.run (New Configuration (), New Gender (), ARGS0);
System.exit (EC);
}
}
Introduction to the Hadoop MapReduce Programming API series Statistics student score 2 (18)