Various operators for transformation operations in Spark (Java edition)

Source: Internet
Author: User
Tags iterable

Package Cn.spark.study.core;

Import Java.util.Arrays;
Import Java.util.Iterator;
Import java.util.List;

Import org.apache.spark.SparkConf;
Import Org.apache.spark.api.java.JavaPairRDD;
Import Org.apache.spark.api.java.JavaRDD;
Import Org.apache.spark.api.java.JavaSparkContext;
Import org.apache.spark.api.java.function.FlatMapFunction;
Import org.apache.spark.api.java.function.Function;
Import Org.apache.spark.api.java.function.Function2;
Import org.apache.spark.api.java.function.VoidFunction;

Import Scala. Tuple2;

/**
* Transformation Operation Combat
* @author DD
*
*/
public class Transformationoperation {
public static void Main (string[] args) {
Maptest ();
Filtertest ();
Flatmaptest ();
Groupbykeytest ();
Reducebykeytest ();
Sortbykeytest ();
Jointest ();
}

/** * Map operator Case: * Multiply the elements in the collection by 2 */private static void Maptest () {sparkconf conf = new sparkconf (). SetA    Ppname ("Map"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf);    list<integer> numbers = arrays.aslist (1,2,3,4,5);    javardd<integer> Numberrdd = sc.parallelize (numbers); javardd<integer> Multiplenumberrdd = Numberrdd.map (New Function<integer, integer> () {private static fi        nal long serialversionuid = 1L;            @Override public integer Call (integer arg0) throws Exception {//TODO auto-generated method stub        return arg0*2;    }    }); Multiplenumberrdd.foreach (New voidfunction<integer> () {@Override public void call (Integer arg0) throw        s Exception {//TODO auto-generated Method stub System.out.print (arg0+ "");    }    }); Sc.close ();} /** * Filter Operator Case: * Filter even */private static void Filtertest () in the collection {Sparkconf conf =new sparkconf (). Setappname ("filter"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf);    list<integer> numbers = arrays.aslist (1,2,3,4,5,6,7,8,9,10);    javardd<integer> Numberrdd = sc.parallelize (numbers); The return value of the filter operator that is also the Function,call method is a Boolean//element in each initial RDD is passed into the call method and returns True if you want to keep the element in the new RDD, otherwise false javardd< integer> Evennumberrdd = numberrdd.filter (New Function<integer, boolean> () {private static final long ser        Ialversionuid = 1L;            @Override Public Boolean Call (Integer arg0) throws Exception {//TODO auto-generated method stub        return arg0% 2 = = 0;    }    });        Evennumberrdd.foreach (New voidfunction<integer> () {private static final long serialversionuid = 1L;        @Override public void Call (Integer arg0) throws Exception {System.out.println (arg0);    }    }); Sc.close ();} /** * FLATMAP Zi * Splits a line of text into a word */private static void Flatmaptest () {sparkconf conf = new sparkconf (). Setappname (    "Faltmap"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf);    List<string> linelist = arrays.aslist ("Hello", "Hello Me", "Hello World");    Javardd<string> lines = sc.parallelize (linelist); /* * Flatmap operator for RDD splits each line of text into multiple words * flatmap is actually receiving each element in the original RDD and performing various processing to return multiple elements, namely encapsulated in a new rdd in Iterable, which encapsulates all the new elements,        With the new RDD size must be greater than the original RDD */javardd<string> words = Lines.flatmap (new flatmapfunction<string, string> () {        Private static final long serialversionuid = 1L; @Override Public iterable<string> Call (String arg0) throws Exception {//TODO auto-generated Meth        OD stub return Arrays.aslist (Arg0.split (""));    }    });        Words.foreach (New voidfunction<string> () {private static final long serialversionuid = 1L;  @Override      public void Call (String arg0) throws Exception {//TODO auto-generated method stub System.out        . println (ARG0);    }    }); Sc.close ();}                    /** * Groupbykey operator * Case: Grouping results by class */private static void Groupbykeytest () {sparkconf conf = new sparkconf ()    . Setappname ("Groupbykey"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf); List<tuple2<string, integer>> scores = arrays.aslist (New tuple2<string, integer> ("cl Ass1 ",", New tuple2<string, integer> ("Class2", "page"), New tuple2<string, Int    Eger> ("Class1"), New tuple2<string, integer> ("Class2", 65));    Create Javapairrdd javapairrdd<string, integer> Scoresrdd = Sc.parallelizepairs (scores);    javapairrdd<string, iterable<integer>> groupscores = Scoresrdd.groupbykey (); Groupscores.foreach (New voidfunction<tuple2<String,iterable<integer>>> () {@Override public void call (Tuple2<string, iterable<integer& Gt;> arg0) throws Exception {//TODO auto-generated Method Stub System.out.println ("Class:" +arg0            . _1);            Iterator<integer> it = Arg0._2.iterator ();            while (It.hasnext ()) {System.out.println (It.next ());        } System.out.println ("====================================");    }    }); Sc.close ();} /** * Reducebykey operator * Case: Ask each class total */private static void Reducebykeytest () {sparkconf conf = new sparkconf (). Setappna    Me ("Reducebykey"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf); List<tuple2<string, integer>> scores = arrays.aslist (New tuple2<string, integer> ("Class1", 80            ), New tuple2<string, integer> ("Class2", "page"), New tuple2<string, integer> ("Class1", 90), New Tuple2<string, INteger> ("Class2", 65));    javapairrdd<string, integer> Scoresrdd = Sc.parallelizepairs (scores); javapairrdd<string, integer> totalscores = Scoresrdd.reducebykey (new Function2<integer, Integer, Integer>        () {private static final long serialversionuid = 1L; @Override public integer Call (integer arg0, integer arg1) throws Exception {//TODO auto-generated Meth        OD stub return arg0+arg1;    }    }); Totalscores.foreach (New voidfunction<tuple2<string,integer>> () {@Override public void call (TUPL E2<string, integer> arg0) throws Exception {//TODO auto-generated method stub System.out.pri        Ntln (arg0._1+ ":" +arg0._2);    }    }); Sc.close ();} /** * Sortbykey operator * Case study: Sort student scores */private static void Sortbykeytest () {sparkconf conf = new sparkconf (). Setappname    ("Sortbykey"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf); List<tUple2<integer, string>> scores = arrays.aslist (New Tuple2<integer, string> (Ten, "Leo"), New Tuple2<integer, string> ("KSC"), New Tuple2<integer, string> ("my"), new TUPL    E2<integer, string> ("Jack"));    Javapairrdd<integer, string> Scoresrdd = Sc.parallelizepairs (scores);    Default true Ascending, False descending javapairrdd<integer, string> Sortedrdd = Scoresrdd.sortbykey (); Sortedrdd.foreach (New voidfunction<tuple2<integer,string>> () {@Override public void call (Tuple2        <integer, string> arg0) throws Exception {System.out.println (arg0._1+ ":" +arg0._2);    }    }); Sc.close ();} /** * Join * case: Print student results */private static void Jointest () {sparkconf conf = new sparkconf (). Setappnam    E ("Joinandcogroup"). Setmaster ("local");    Javasparkcontext sc = new Javasparkcontext (conf); List<tuple2<integer, string>> StudeNtslist = arrays.aslist (New Tuple2<integer, string> (1, "Leo"), New Tuple2<integer, STRING&G    t; (2, "Jack"), New Tuple2<integer, string> (3, "Tom")); List<tuple2<integer, integer>> scoreslist = arrays.aslist (New Tuple2<integer, Integer> (1,100    ), New Tuple2<integer, integer> (2,90), New Tuple2<integer, integer> (3,60));    Parallelization of two sets Javapairrdd<integer, string> Studentsrdd = Sc.parallelizepairs (studentslist);    Javapairrdd<integer, integer> Scoresrdd = Sc.parallelizepairs (scoreslist); Use the join operator to correlate two Rdd Javapairrdd<integer, tuple2<string, integer>> studentscores = Studentsrdd.join (    SCORESRDD);        Studentscores.foreach (New voidfunction<tuple2<integer,tuple2<string,integer>>> () {@Override            public void Call (Tuple2<integer, tuple2<string, integer>> arg0) throws Exception { TODO auto-Generated method stub System.out.println ("Student ID:" +arg0._1);            System.out.println ("Student Name:" +arg0._2._1);            System.out.println ("Student score:" +arg0._2._2);        System.out.println ("=========================================="); }    });}

}

Various operators for transformation operations in Spark (Java edition)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.