Introduction to the Hadoop MapReduce Programming API series Statistics student score 2 (18)

Last Update:2016-12-12 Source: Internet

Author: User

Tags iterable hadoop mapreduce

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Not much to say, directly on the code.

Statistics of the highest scores for male and female students of each age group

Here, for the error of space, direct, we sometimes, like the following to row data.

Code

Package zhouls.bigdata.myMapReduce.Gender;

Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.conf.Configured;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.io.Text;
Import org.apache.hadoop.mapred.JobConf;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Partitioner;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.Tool;
Import Org.apache.hadoop.util.ToolRunner;
/**
*
* @function statistics of the highest scores of men and women in different age groups
*
*
*/

/*
Alice<tab>23<tab>female<tab>45
bob<tab>34<tab>male<tab>89
chris<tab>67<tab>male<tab>97
kristine<tab>38<tab>female<tab>53
Connor<tab>25<tab>male<tab>27
Daniel<tab>78<tab>male<tab>95
james<tab>34<tab>male<tab>79
alex<tab>52<tab>male<tab>69
Nancy<tab>7<tab>female<tab>98
Adam<tab>9<tab>male<tab>37
Jacob<tab>7<tab>male<tab>23
mary<tab>6<tab>female<tab>93
clara<tab>87<tab>female<tab>72
monica<tab>56<tab>female<tab>92
*/
public class Gender extends configured implements Tool {
/*
*
* @function Mapper Parse input data and output as required
* @input key= line offset value= student data
* @output Key=gender Value=name+age+score
*
*/
public static class Pcmapper extends Mapper<object, text, text, text>
{
public void Map (Object key, Text value, Context context) throws IOException, Interruptedexception
{//Take alice<tab>23<tab>female<tab>45
string[] tokens = value.tostring (). Split ("<tab>");//use delimiter <tab> to parse data into arrays tokens
Get Alice23 Female45
i.e. Tokens[0] tokens[1] tokens[2] tokens[3]
String gender = tokens[2].tostring ();//gender
String Nameagescore = tokens[0] + "\ T" + tokens[1] + "\ T" + tokens[3];
Output Key=gender Value=name+age+score
Output Key=female value=alice+23+45
Context.write (new text (gender), new text (Nameagescore));//Writes (female, alice+ 23+ 45) to the context
}
}
public static class Myhashpartitioner extends Partitioner<text, text>
{
/** Use {@link object#hashcode ()} to partition. */
@Override
public int getpartition (text key, text Value,int numreducetasks)
{
Return (Key.hashcode ())% Numreducetasks;
}

}
/**
*
* @function partitioner Select reduce partition according to age
*
*/
public static class Pcpartitioner extends Partitioner<text, text>
{

@Override
public int getpartition (text key, text value, int numreducetasks)
{
TODO auto-generated Method Stub
string[] Nameagescore = value.tostring (). Split ("\ t");
String age = nameagescore[1];//Student
int ageint = Integer.parseint (age);//Partitioning by ages

Default specified partition 0
if (Numreducetasks = = 0)
return 0;

Age less than or equal to 20, specify partition 0
if (Ageint <= 20) {
return 0;
}
Age greater than 20, less than or equal to 50, specifying partition 1
if (Ageint > && ageint <= 50) {

return 1% Numreducetasks;
}
Remaining age, specify partition 2
Else
return 2% Numreducetasks;
}
}

/**
*
* @function define combiner merge Mapper output results
*
*/
public static class Pccombiner extends Reducer<text, text, text, text>
{
Private text text = new text ();

public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception br>{
int maxscore = Integer.min_value;
String name = "";
String age = "";
int score = 0;
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Maxscore = score;
}
}
Text.set (name + "\ T" + age + "\ T" + maxscore);
Context.write (key, text);
}
}

/*
*
* @function Reducer statistics for the highest score of different ages and genders
* input key=gender value=name+age+score
* Output key=name Value=age+gender+score
*
*/
Static class Pcreducer extends Reducer<text, text, text, text>
{
@Ov Erride
public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception
{
int maxscore = Integer.min_value;
String name = "";
String age = "";
String gender = "";
int score = 0;
//Based on key, iterate the values collection to find the highest score
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Gender = key.tostring ();
Maxscore = score;
}
}
Context.write (new text (name), new text ("age-" + age + "\ T" + Gender + "\tscore-" + Maxscore));
}
}

/**
* @function task-driven approach
* @param args
* @return
* @throws Exception
*/
@Override
public int run (string[] args) throws Exception
{
TODO auto-generated Method Stub
Configuration conf = new configuration ();//Read config file

Path MyPath = new Path (args[1]);
FileSystem HDFs = Mypath.getfilesystem (conf);
if (Hdfs.isdirectory (MyPath))
{
Hdfs.delete (MyPath, true);
}

@SuppressWarnings ("deprecation")
Job Job = new Job (conf, "gender");//Create a new task
Job.setjarbyclass (Gender.class);//Main class
Job.setmapperclass (pcmapper.class);//mapper
Job.setreducerclass (pcreducer.class);//reducer

Job.setpartitionerclass (Myhashpartitioner.class);
Job.setpartitionerclass (Pcpartitioner.class);//Set Partitioner class
Job.setnumreducetasks (3);//reduce number set to 3

Job.setmapoutputkeyclass (text.class);//map output Key type
Job.setmapoutputvalueclass (text.class);//map Output value type

Job.setcombinerclass (Pccombiner.class);//Set Combiner class

Job.setoutputkeyclass (Text.class);//Output result key type
Job.setoutputvalueclass (Text.class);//Output result value type

Fileinputformat.addinputpath (Job, New Path (args[0]));//input path
Fileoutputformat.setoutputpath (Job, New Path (args[1]));//Output path
Job.waitforcompletion (TRUE);//Submit Task
return 0;
}
/**
* @function Main method
* @param args
* @throws Exception
*/
public static void Main (string[] args) throws Exception
{
String[] Args0 = {
"Hdfs://hadoopmaster:9000/gender/gender.txt",
"Hdfs://hadoopmaster:9000/out/partition/"};

String[] Args0 = {
"./data/gender/gender.txt",
"./out/gender"};

int EC = Toolrunner.run (New Configuration (), New Gender (), ARGS0);
System.exit (EC);
}
}

Introduction to the Hadoop MapReduce Programming API series Statistics student score 2 (18)

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More