Introduction to the Hadoop MapReduce Programming API series Statistics student score 2 (18)

Source: Internet
Author: User
Tags iterable hadoop mapreduce

Not much to say, directly on the code.

Statistics of the highest scores for male and female students of each age group

Here, for the error of space, direct, we sometimes, like the following to row data.

Code

Package zhouls.bigdata.myMapReduce.Gender;

Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.conf.Configured;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.io.Text;
Import org.apache.hadoop.mapred.JobConf;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Partitioner;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.Tool;
Import Org.apache.hadoop.util.ToolRunner;
/**
*
* @function statistics of the highest scores of men and women in different age groups
*
*
*/

/*
Alice<tab>23<tab>female<tab>45
bob<tab>34<tab>male<tab>89
chris<tab>67<tab>male<tab>97
kristine<tab>38<tab>female<tab>53
Connor<tab>25<tab>male<tab>27
Daniel<tab>78<tab>male<tab>95
james<tab>34<tab>male<tab>79
alex<tab>52<tab>male<tab>69
Nancy<tab>7<tab>female<tab>98
Adam<tab>9<tab>male<tab>37
Jacob<tab>7<tab>male<tab>23
mary<tab>6<tab>female<tab>93
clara<tab>87<tab>female<tab>72
monica<tab>56<tab>female<tab>92
*/
public class Gender extends configured implements Tool {
/*
*
* @function Mapper Parse input data and output as required
* @input key= line offset value= student data
* @output Key=gender Value=name+age+score
*
*/
public static class Pcmapper extends Mapper<object, text, text, text>
{
public void Map (Object key, Text value, Context context) throws IOException, Interruptedexception
{//Take alice<tab>23<tab>female<tab>45
string[] tokens = value.tostring (). Split ("<tab>");//use delimiter <tab> to parse data into arrays tokens
Get Alice23 Female45
i.e. Tokens[0] tokens[1] tokens[2] tokens[3]
String gender = tokens[2].tostring ();//gender
String Nameagescore = tokens[0] + "\ T" + tokens[1] + "\ T" + tokens[3];
Output Key=gender Value=name+age+score
Output Key=female value=alice+23+45
Context.write (new text (gender), new text (Nameagescore));//Writes (female, alice+ 23+ 45) to the context
}
}
public static class Myhashpartitioner extends Partitioner<text, text>
{
/** Use {@link object#hashcode ()} to partition. */
@Override
public int getpartition (text key, text Value,int numreducetasks)
{
Return (Key.hashcode ())% Numreducetasks;
}

}
/**
*
* @function partitioner Select reduce partition according to age
*
*/
public static class Pcpartitioner extends Partitioner<text, text>
{

@Override
public int getpartition (text key, text value, int numreducetasks)
{
TODO auto-generated Method Stub
string[] Nameagescore = value.tostring (). Split ("\ t");
String age = nameagescore[1];//Student
int ageint = Integer.parseint (age);//Partitioning by ages

Default specified partition 0
if (Numreducetasks = = 0)
return 0;

Age less than or equal to 20, specify partition 0
if (Ageint <= 20) {
return 0;
}
Age greater than 20, less than or equal to 50, specifying partition 1
if (Ageint > && ageint <= 50) {

return 1% Numreducetasks;
}
Remaining age, specify partition 2
Else
return 2% Numreducetasks;
}
}

/**
*
* @function define combiner merge Mapper output results
*
*/
public static class Pccombiner extends Reducer<text, text, text, text>
{
Private text text = new text ();

public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception br>{
int maxscore = Integer.min_value;
String name = "";
String age = "";
int score = 0;
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Maxscore = score;
}
}
Text.set (name + "\ T" + age + "\ T" + maxscore);
Context.write (key, text);
}
}

/*
*
* @function Reducer statistics for the highest score of different ages and genders
* input key=gender value=name+age+score
* Output key=name Value=age+gender+score
*
*/
Static class Pcreducer extends Reducer<text, text, text, text>
{
@Ov Erride
public void reduce (Text key, iterable<text> values, context context) throws IOException, Interruptedexception
{
int maxscore = Integer.min_value;
String name = "";
String age = "";
String gender = "";
int score = 0;
//Based on key, iterate the values collection to find the highest score
for (Text val:values)
{
string[] Valtokens = val.tostring (). Split ("\\t");
Score = Integer.parseint (valtokens[2]);
if (Score > Maxscore)
{
name = Valtokens[0];
age = valtokens[1];
Gender = key.tostring ();
Maxscore = score;
}
}
Context.write (new text (name), new text ("age-" + age + "\ T" + Gender + "\tscore-" + Maxscore));
}
}

/**
* @function task-driven approach
* @param args
* @return
* @throws Exception
*/
@Override
public int run (string[] args) throws Exception
{
TODO auto-generated Method Stub
Configuration conf = new configuration ();//Read config file

Path MyPath = new Path (args[1]);
FileSystem HDFs = Mypath.getfilesystem (conf);
if (Hdfs.isdirectory (MyPath))
{
Hdfs.delete (MyPath, true);
}

@SuppressWarnings ("deprecation")
Job Job = new Job (conf, "gender");//Create a new task
Job.setjarbyclass (Gender.class);//Main class
Job.setmapperclass (pcmapper.class);//mapper
Job.setreducerclass (pcreducer.class);//reducer

Job.setpartitionerclass (Myhashpartitioner.class);
Job.setpartitionerclass (Pcpartitioner.class);//Set Partitioner class
Job.setnumreducetasks (3);//reduce number set to 3

Job.setmapoutputkeyclass (text.class);//map output Key type
Job.setmapoutputvalueclass (text.class);//map Output value type

Job.setcombinerclass (Pccombiner.class);//Set Combiner class

Job.setoutputkeyclass (Text.class);//Output result key type
Job.setoutputvalueclass (Text.class);//Output result value type

Fileinputformat.addinputpath (Job, New Path (args[0]));//input path
Fileoutputformat.setoutputpath (Job, New Path (args[1]));//Output path
Job.waitforcompletion (TRUE);//Submit Task
return 0;
}
/**
* @function Main method
* @param args
* @throws Exception
*/
public static void Main (string[] args) throws Exception
{
String[] Args0 = {
"Hdfs://hadoopmaster:9000/gender/gender.txt",
"Hdfs://hadoopmaster:9000/out/partition/"};

String[] Args0 = {
"./data/gender/gender.txt",
"./out/gender"};


int EC = Toolrunner.run (New Configuration (), New Gender (), ARGS0);
System.exit (EC);
}
}

Introduction to the Hadoop MapReduce Programming API series Statistics student score 2 (18)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.