package com.twq.javaapi.java7;import org.apache.spark.sparkconf;import org.apache.spark.api.java.javardd;import org.apache.spark.api.java.javasparkcontext;import org.apache.spark.api.java.function.function2;import org.apache.spark.api.java.function.voidfunction; Import scala. Tuple2;import java.io.serializable;import java.util.arrays;import java.util.comparator;import java.util.Iterator;import java.util.concurrent.TimeUnit;/** * Created by tangweiqun on 2017/9/16. */public class baseactionapitest { public static void main (String[] args) { sparkconf conf = new sparkconf (). Setappname ("AppName"). Setmaster ("local"); javasparkcontext sc = new javasparkcontext (conf); javardd<integer> listrdd = sc.parallelize (Arrays.aslist (1, 2, 4, 3, 3, 6), 2); //results: [1, 2, 4, 3, 3, 6] system.out.println ("collect = " + listrdd.collect ()); //results:[1, 2] system.out.println ("Take (2) = " + listrdd.take (2)); //Results:[6, 4] system.out.println ("Top (2) = " + listrdd.top (2)); //Result: 1 system.out.println ("first = " + listrdd.first ()); //results:1 systEm.out.println ("min = " + listrdd.min (New asccomparator ())); //Result: 6 system.out.println ("min = " + listrdd.min (New desccomparator ())); // Results: 6 system.out.println ("max = " + Listrdd.max (New asccomparator ())); //results:1 system.out.println ("max = " + listrdd.max (new Desccomparator ())); //results:[1, 2] system.out.println ("takeordered (2) = " + listrdd.takeordered (2)); //Results:[1, 2] system.out.println ("takeordered(2) = " + listrdd.takeordered (2, new asccomparator ())); //Results:[6, 4] System.out.println ("takeordered (2) = " + listrdd.takeordered (2, new Desccomparator ())); listrdd.foreach (new VoidFunction< Integer> () { @Override public void call (Integer Element) throws Exception { //This performance is too poor, the time to traverse each element will need to call the more time-consuming getinitnumber &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;//recommends using Foreachpartition instead of the foreach Operation &nBsp; integer initnumber = getinitnumber ("foreach"); system.out.println (( Element + initnumber) + "========="); } }); listrdd.foreachpartition (new voidfunction<iterator<integer>> () { @Override public void call (iterator<integer> Integeriterator) throws Exception { The function of //and Foreach api is the same, but one is to apply the function to each record, which is to apply the function to each partition //If you have a more time-consuming operation, you only need to perform this operation once per partition, then use this function //this time-consuming operation can be connected to the database and other operations, do not need to calculate every time to connect to the database, One partition can only be connected once on the line Integer initnumber = getinitnumber ("foreach"); while (Integeriterator.hasnext ()) { System.out.println ((Integeriterator.next () + initnumber) + "========="); } } }); integer reduceresult = listrdd.reduce (new function2<integer, integer, Integer> () { @Override public integer call (Integer Ele1, integer ele2) throws Exception { return ele1 + ele2; } }); //Results:19 System.out.println ("reduceresult = " + reduceresult); integer treereduceresult = listrdd.treereduce (new function2<integer, Integer, integer> () &NBSP;{&NBsp; @Override public integer call (Integer integer, integer &NBSP;INTEGER2) throws Exception { return integer + integer2; &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;}&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;},&NBSP;3); //Results:19 System.out.println ("treereduceresult = " + treereduceresult); The function of //and reduce is similar, except that it is necessary to add an initial value of 0 when calculating each partition, and then add the value calculated by each partition together with this initial value integer foldresult = listrdd.fold (0, new Function2< Integer, integer, integer> () &nbsP { @Override public integer call (Integer integer, INTEGER&NBSP;INTEGER2) throws Exception { return integer + integer2; } }); //Results:19 System.out.println ("foldresult = " + foldresult); //First initializes the initial value of the data type that we want to return //then applies function one to each element in each partition (ACC, value) => (acc._1 + value, acc._2 + 1) for aggregation //finally the data application letter generated for each partitionNumber (ACC1,&NBSP;ACC2) => (acc1._1 + acc2._1, acc1._2 + acc2._2) for aggregation tuple2 aggregateresult = listrdd.aggregate (new Tuple2<integer, integer> (0, 0), new function2<tuple2<integer, integer>, integer, Tuple2<Integer, Integer>> () { @Override public tuple2 <integer, integer> call (tuple2<integer, integer> acc, integer Integer) throws Exception { return new tuple2<> (Acc._1 + integer, &NBSP;ACC._2&NBSP;+&NBSP;1); } }, new function2<tuple2<integer, integer>, tuple2 <Integer, Integer>, Tuple2<Integer, Integer>> () { @Override public tuple2<integer, integer> call (tuple2<integer, integer> &NBSP;ACC1,&NBSP;TUPLE2<INTEGER,&NBSP;INTEGER>&NBSP;ACC2) throws Exception { return New tuple2<> (acc1._1 + acc2._1, acc1._2 + acc2._2); } }); //results: (19,6) System.out.println ("aggregateresult = " + aggregateresult); tuple2 treeaggregateresult = listrdd.treeaggregate (New Tuple2<Integer , integer> (0, 0), new Function2<Tuple2<Integer, Integer>, Integer, Tuple2< Integer, integer>> () { @Override public tuple2<integer, integer> call (Tuple2<Integer, integer> acc, integer integer) throws Exception { return new Tuple2<> (acc._1 + integer, acc._2 + 1); } } , new function2<tuple2<integer, integer>, tuple2<integer, integer>, tuple2<integer, inteGer>> () { @Override public Tuple2<Integer, Integer> Call (TUPLE2<INTEGER,&NBSP;INTEGER>&NBSP;ACC1,&NBSP;TUPLE2<INTEGER,&NBSP;INTEGER>&NBSP;ACC2) throws exception { return new Tuple2<> (acc1._1 + acc2._1, acc1._2 + acc2._2); } }); //results: (19,6) &Nbsp; system.out.println ("treeaggregateresult = " + Treeaggregateresult); } public static integer Getinitnumber (String source) { system.out.println (" get init number from " + source + ", may be take Much time ... "); try { timeunit.seconds.sleep (1); } catch (interruptedexception e) { e.printstacktrace (); } return 1; } Private static class asccomparator implements comparator<integer>, serializable { @Override public int compare ( JAVA.LANG.INTEGER&NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2) { return o1 - o2; } } private static class DescComparator implements comparator<integer>, serializable { @Override public int compare (Java.lang.Integer &NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2) { return o2 - o1; } }}
For detailed principles of the API such as reduce, treereduce, fold, aggragate, treeaggrate, and more, you can refer to the Spark core RDD API's rationale.
spark2.x deep into the end series six of the RDD Java API detailed two