spark2.x deep into the end series six of the RDD Java API detailed two

Last Update:2017-09-18 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

package com.twq.javaapi.java7;import org.apache.spark.sparkconf;import  org.apache.spark.api.java.javardd;import org.apache.spark.api.java.javasparkcontext;import  org.apache.spark.api.java.function.function2;import org.apache.spark.api.java.function.voidfunction; Import scala. Tuple2;import java.io.serializable;import java.util.arrays;import java.util.comparator;import  java.util.Iterator;import java.util.concurrent.TimeUnit;/** * Created by  tangweiqun on 2017/9/16. */public class baseactionapitest {     public static void main (String[] args)  {         sparkconf conf = new sparkconf (). Setappname ("AppName"). Setmaster ("local");         javasparkcontext sc = new javasparkcontext (conf);          javardd<integer> listrdd = sc.parallelize (Arrays.aslist (1, 2, 4, 3,  3, 6),  2);         //results: [1, 2, 4,  3, 3, 6]        system.out.println ("collect = "  + listrdd.collect ());         //results:[1, 2]         system.out.println ("Take (2)  = "  + listrdd.take (2));         //Results:[6, 4]         system.out.println ("Top (2)  = "  + listrdd.top (2));         //Result: 1        system.out.println ("first =  " + listrdd.first ());         //results:1         systEm.out.println ("min = "  + listrdd.min (New asccomparator ()));         //Result: 6        system.out.println ("min =   " + listrdd.min (New desccomparator ()));         // Results: 6        system.out.println ("max = "  +  Listrdd.max (New asccomparator ()));         //results:1         system.out.println ("max = "  + listrdd.max (new  Desccomparator ()));         //results:[1, 2]         system.out.println ("takeordered (2)  = "  + listrdd.takeordered (2));         //Results:[1, 2]         system.out.println ("takeordered(2)   =  " + listrdd.takeordered (2, new asccomparator ()));         //Results:[6, 4]         System.out.println ("takeordered (2)   = "  + listrdd.takeordered (2, new  Desccomparator ()));         listrdd.foreach (new VoidFunction< Integer> ()  {             @Override              public void call (Integer  Element)  throws Exception {                 //This performance is too poor, the time to traverse each element will need to call the more time-consuming getinitnumber       &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;//recommends using Foreachpartition instead of the foreach Operation            &nBsp;     integer initnumber = getinitnumber ("foreach");                 system.out.println (( Element + initnumber)  +  "=========");             }        });         listrdd.foreachpartition (new voidfunction<iterator<integer>> ()  {              @Override              public void call (iterator<integer>  Integeriterator)  throws Exception {           The function of       //and Foreach api is the same, but one is to apply the function to each record, which is to apply the function to each partition                 //If you have a more time-consuming operation, you only need to perform this operation once per partition, then use this function                  //this time-consuming operation can be connected to the database and other operations, do not need to calculate every time to connect to the database, One partition can only be connected once on the line                  Integer initnumber = getinitnumber ("foreach");                 while  (Integeriterator.hasnext ())  {                      System.out.println ((Integeriterator.next ()  + initnumber)  +  "=========");                 }             }        });         integer reduceresult = listrdd.reduce (new function2<integer, integer,  Integer> ()  {             @Override              public integer call (Integer  Ele1, integer ele2)  throws Exception {                 return ele1 + ele2;             }        });         //Results:19         System.out.println ("reduceresult = "  + reduceresult);         integer treereduceresult = listrdd.treereduce (new function2<integer,  Integer, integer> () &NBSP;{&NBsp;            @Override              public integer call (Integer integer, integer &NBSP;INTEGER2)  throws Exception {                 return integer + integer2;      &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;}&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;},&NBSP;3);         //Results:19         System.out.println ("treereduceresult = "  + treereduceresult);     The function of     //and reduce is similar, except that it is necessary to add an initial value of 0 when calculating each partition, and then add the value calculated by each partition together with this initial value          integer foldresult = listrdd.fold (0, new Function2< Integer, integer, integer> () &nbsP {             @Override              public integer call (Integer integer,  INTEGER&NBSP;INTEGER2)  throws Exception {                 return integer + integer2;             }        });         //Results:19         System.out.println ("foldresult = "  + foldresult);         //First initializes the initial value of the data type that we want to return         //then applies function one to each element in each partition (ACC,  value)  =>  (acc._1 + value, acc._2 + 1) for aggregation          //finally the data application letter generated for each partitionNumber (ACC1,&NBSP;ACC2)  =>  (acc1._1 + acc2._1, acc1._2 + acc2._2) for aggregation          tuple2 aggregateresult = listrdd.aggregate (new  Tuple2<integer, integer> (0, 0),                 new function2<tuple2<integer, integer>, integer,  Tuple2<Integer, Integer>> ()  {                      @Override                      public tuple2 <integer, integer> call (tuple2<integer, integer> acc, integer  Integer)  throws Exception {                         return new tuple2<> (Acc._1 + integer, &NBSP;ACC._2&NBSP;+&NBSP;1);                     }                 }, new function2<tuple2<integer, integer>, tuple2 <Integer, Integer>, Tuple2<Integer, Integer>> ()  {                      @Override                      public tuple2<integer, integer> call (tuple2<integer, integer> &NBSP;ACC1,&NBSP;TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC2)  throws Exception {                         return  New tuple2<> (acc1._1 + acc2._1, acc1._2 + acc2._2);                     }                 });         //results: (19,6)          System.out.println ("aggregateresult = "  + aggregateresult);         tuple2 treeaggregateresult = listrdd.treeaggregate (New Tuple2<Integer , integer> (0, 0),                 new Function2<Tuple2<Integer, Integer>, Integer, Tuple2< Integer, integer>> ()  {                      @Override                      public tuple2<integer, integer> call (Tuple2<Integer,  integer> acc, integer integer)  throws Exception {                          return new Tuple2<> (acc._1 + integer, acc._2 + 1);                      }                } ,  new function2<tuple2<integer, integer>, tuple2<integer, integer>,  tuple2<integer, inteGer>> ()  {                      @Override                      public Tuple2<Integer, Integer>  Call (TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC1,&NBSP;TUPLE2&LT;INTEGER,&NBSP;INTEGER&GT;&NBSP;ACC2)   throws exception {                         return new Tuple2<> (acc1._1  + acc2._1, acc1._2 + acc2._2);                     }                 });         //results: (19,6)    &Nbsp;    system.out.println ("treeaggregateresult = "  +  Treeaggregateresult);    }    public static integer  Getinitnumber (String source)  {        system.out.println (" get init number from  " + source + ", may be take  Much time ... ");        try {             timeunit.seconds.sleep (1);         } catch  (interruptedexception e)  {             e.printstacktrace ();        }         return 1;    }     Private static class asccomparator implements comparator<integer>, serializable {          @Override         public int compare ( JAVA.LANG.INTEGER&NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2)  {             return o1 - o2;        }     }    private static class DescComparator  implements comparator<integer>, serializable {          @Override         public int compare (Java.lang.Integer &NBSP;O1,&NBSP;JAVA.LANG.INTEGER&NBSP;O2)  {             return o2 - o1;        }     }}

For detailed principles of the API such as reduce, treereduce, fold, aggragate, treeaggrate, and more, you can refer to the Spark core RDD API's rationale.

spark2.x deep into the end series six of the RDD Java API detailed two

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

spark2.x deep into the end series six of the RDD Java API detailed two

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

spark2.x deep into the end series six of the RDD Java API detailed two

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support