spark2.x deep into the end series six of the RDD Java API detailed two

Source: Internet
Author: User

package com.twq.javaapi.java7;import org.apache.spark.sparkconf;import  org.apache.spark.api.java.javardd;import org.apache.spark.api.java.javasparkcontext;import  org.apache.spark.api.java.function.function2;import org.apache.spark.api.java.function.voidfunction; Import scala. Tuple2;import java.io.serializable;import java.util.arrays;import java.util.comparator;import  java.util.Iterator;import java.util.concurrent.TimeUnit;/** * Created by  tangweiqun on 2017/9/16. */public class baseactionapitest {     public static void main (String[] args)  {         sparkconf conf = new sparkconf (). Setappname ("AppName"). Setmaster ("local");         javasparkcontext sc = new javasparkcontext (conf);          javardd<integer> listrdd = sc.parallelize (Arrays.aslist (1, 2, 4, 3,  3, 6),  2);         //results: [1, 2, 4,  3, 3, 6]        system.out.println ("collect = "  + listrdd.collect ());         //results:[1, 2]         system.out.println ("Take (2)  = "  + listrdd.take (2));         //Results:[6, 4]         system.out.println ("Top (2)  = "  + listrdd.top (2));         //Result: 1        system.out.println ("first =  " + listrdd.first ());         //results:1         systEm.out.println ("min = "  + listrdd.min (New asccomparator ()));         //Result: 6        system.out.println ("min =   " + listrdd.min (New desccomparator ()));         // Results: 6        system.out.println ("max = "  +  Listrdd.max (New asccomparator ()));         //results:1         system.out.println ("max = "  + listrdd.max (new  Desccomparator ()));         //results:[1, 2]         system.out.println ("takeordered (2)  = "  + listrdd.takeordered (2));         //Results:[1, 2]         system.out.println ("takeordered(2)   =  " + listrdd.takeordered (2, new asccomparator ()));         //Results:[6, 4]         System.out.println ("takeordered (2)   = "  + listrdd.takeordered (2, new  Desccomparator ()));         listrdd.foreach (new VoidFunction< Integer> ()  {             @Override              public void call (Integer  Element)  throws Exception {                 //This performance is too poor, the time to traverse each element will need to call the more time-consuming getinitnumber       &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;//recommends using Foreachpartition instead of the foreach Operation            &nBsp;     integer initnumber = getinitnumber ("foreach");                 system.out.println (( Element + initnumber)  +  "=========");             }        });         listrdd.foreachpartition (new voidfunction<iterator<integer>> ()  {              @Override              public void call (iterator<integer>  Integeriterator)  throws Exception {           The function of       //and Foreach api is the same, but one is to apply the function to each record, which is to apply the function to each partition                 //If you have a more time-consuming operation, you only need to perform this operation once per partition, then use this function                  //this time-consuming operation can be connected to the database and other operations, do not need to calculate every time to connect to the database, One partition can only be connected once on the line                  Integer initnumber = getinitnumber ("foreach");                 while  (Integeriterator.hasnext ())  {                      System.out.println ((Integeriterator.next ()  + initnumber)  +  "=========");                 }             }        });         integer reduceresult = listrdd.reduce (new function2<integer, integer,  Integer> ()  {             @Override              public integer call (Integer  Ele1, integer ele2)  throws Exception {                 return ele1 + ele2;             }        });         //Results:19         System.out.println ("reduceresult = "  + reduceresult);         integer treereduceresult = listrdd.treereduce (new function2<integer,  Integer, integer> () &NBSP;{&NBsp;            @Override              public integer call (Integer integer, integer &NBSP;INTEGER2)  throws Exception {                 return integer + integer2;      &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;}&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;},&NBSP;3);         //Results:19         System.out.println ("treereduceresult = "  + treereduceresult);     The function of     //and reduce is similar, except that it is necessary to add an initial value of 0 when calculating each partition, and then add the value calculated by each partition together with this initial value          integer foldresult = listrdd.fold (0, new Function2< Integer, integer, integer> () &nbsP {             @Override              public integer call (Integer integer,  INTEGER&NBSP;INTEGER2)  throws Exception {                 return integer + integer2;             }        });         //Results:19         System.out.println ("foldresult = "  + foldresult);         //First initializes the initial value of the data type that we want to return         //then applies function one to each element in each partition (ACC,  value)  =>  (acc._1 + value, acc._2 + 1) for aggregation          //finally the data application letter generated for each partitionNumber (ACC1,&NBSP;ACC2)  =>  (acc1._1 + acc2._1, acc1._2 + acc2._2) for aggregation          tuple2 aggregateresult = listrdd.aggregate (new  Tuple2<integer, integer> (0, 0),                 new function2<tuple2<integer, integer>, integer,  Tuple2<Integer, Integer>> ()  {                      @Override                      public tuple2 <integer, integer> call (tuple2<integer, integer> acc, integer  Integer)  throws Exception {                         return new tuple2<> (Acc._1 + integer, &NBSP;ACC._2&NBSP;+&NBSP;1);                     }                 }, new function2<tuple2<integer, integer>, tuple2 <Integer, Integer>, Tuple2<Integer, Integer>> ()  {                      @Override                      public tuple2<integer, integer> call (tuple2<integer, integer> &NBSP;ACC1,&NBSP;TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC2)  throws Exception {                         return  New tuple2<> (acc1._1 + acc2._1, acc1._2 + acc2._2);                     }                 });         //results: (19,6)          System.out.println ("aggregateresult = "  + aggregateresult);         tuple2 treeaggregateresult = listrdd.treeaggregate (New Tuple2<Integer , integer> (0, 0),                 new Function2<Tuple2<Integer, Integer>, Integer, Tuple2< Integer, integer>> ()  {                      @Override                      public tuple2<integer, integer> call (Tuple2<Integer,  integer> acc, integer integer)  throws Exception {                          return new Tuple2<> (acc._1 + integer, acc._2 + 1);                      }                } ,  new function2<tuple2<integer, integer>, tuple2<integer, integer>,  tuple2<integer, inteGer>> ()  {                      @Override                      public Tuple2<Integer, Integer>  Call (TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC1,&NBSP;TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC2)   throws exception {                         return new Tuple2<> (acc1._1  + acc2._1, acc1._2 + acc2._2);                     }                 });         //results: (19,6)    &Nbsp;    system.out.println ("treeaggregateresult = "  +  Treeaggregateresult);    }    public static integer  Getinitnumber (String source)  {        system.out.println (" get init number from  " + source + ", may be take  Much time ... ");        try {             timeunit.seconds.sleep (1);         } catch  (interruptedexception e)  {             e.printstacktrace ();        }         return 1;    }     Private static class asccomparator implements comparator<integer>, serializable {          @Override         public int compare ( JAVA.LANG.INTEGER&NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2)  {             return o1 - o2;        }     }    private static class DescComparator  implements comparator<integer>, serializable {          @Override         public int compare (Java.lang.Integer &NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2)  {             return o2 - o1;        }     }}



For detailed principles of the API such as reduce, treereduce, fold, aggragate, treeaggrate, and more, you can refer to the Spark core RDD API's rationale.

spark2.x deep into the end series six of the RDD Java API detailed two

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.