sparkconf sparkconf =Newsparkconf (); sparkconf. Setappname ("Internal_func"). Setmaster ("Local"); Javasparkcontext Javasparkcontext=NewJavasparkcontext (sparkconf); SqlContext SqlContext=NewSqlContext (Javasparkcontext); List<String> list =NewArraylist<string>(); List.add ("The"); List.add ("2,11"); List.add ("2,111"); List.add ("2,111"); List.add ("3,1111"); List.add ("3,11111"); Javardd<String> rdd_str = javasparkcontext.parallelize (list, 5); Javardd<Row> Rdd_row = Rdd_str.map (NewFunction<string, row>() {@Override PublicRow Call (String v1)throwsException {String ary[]= V1.split (","); returnRowfactory.create (Ary[0], Long.parselong (ary[1])); } }); List<StructField> FieldList =NewArraylist<structfield>(); Fieldlist.add (Datatypes.createstructfield ("Name", Datatypes.stringtype,true)); Fieldlist.add (Datatypes.createstructfield ("SC", Datatypes.longtype,true)); Structtype tmp=Datatypes.createstructtype (FieldList); DataFrame DF=sqlcontext.createdataframe (Rdd_row, TMP); Df.registertemptable ("TMP_SC"); DataFrame Df_agg= Sqlcontext.sql ("Select Name,count (Distinct (SC)) from TMP_SC Group by name");//Post-weight group summation statisticsdf_agg.show ();
Spark-sql group de-Weight totals statistics UV