import org.apache.spark._import sparkcontext._import org.apache.spark.sparkconfimport java.util.dateimport java.text.simpledateformatimport org.apache.hadoop.io.textimport org.apache.hadoop.mapred.textoutputformatimport org.apache.spark.partitionerobject partitioner { def main (args: array[string]): unit = { val time = new simpledateformat ("MMddHHmm"). Format (New Date () ); val sparkconf = new sparkconf (). SetAppName ("Wordcount_" + Time) sparkconf.set ("Mapreduce.framework.name", "yarn"); val sc =new sparkcontext (sparkconf) val textfile = sc.textfile ( "hdfs:// Namenode:9000/data/mapreduce/cHuping/test_in_1/new5 ", 1). Cache () val result = textFile.flatMap (Line => line.split ("T") ) . map ( word => (word,1)). Reducebykey (new testpartitioner, _+_) result.saveastextfile ("Hdfs://namenode:9000/data/zk/test/partitioner" +time) sc.stop () }}class testPartitioner extends Partitioner{ val numPartitions = 3 def getpartition (Key: any) =1 Specify to the first few reduce}
The program here is only a test program, use is also a count, can not reflect the actual role of Partitioner, but in the actual production, the use of partitioner abound
Using Partitioner in Spark