The recent project uses SPARKSQL to do statistical analysis of the data, and to record it at leisure. Directly on the code: IMPORT&NBSP;ORG.APACHE.SPARK.SPARKCONTEXTIMPORT&NBSP;ORG.APACHE.SPARK.SQL.SQLCONTEXTOBJECT&NBSP;SPARKSQL { //defines two case class a and b: // a as basic information for the user: including the customer number, * * * Number and gender // b is the user's transaction information: including customer number, consumption amount and consumption status case class a (custom_ id:string,id_code:string,sex:string) case class b (custom_id:string,money:string,status:i NT) def main (args:array[string]): unit = { / /data volume is small, the test found that using local[*] is more efficient than local and yarn-based efficiencies. //use local[*] mode, set appname to "Sparksql" val sc = New sparkcontext ("local[*]", "Sparksql") val sqlcontext = new sqlcontext (SC) import sqlContext.createSchemaRDD //defines two Rdd:a_rdd and B_rdd. Between data with CHAR (1) char (1) separates and takes out the corresponding customer information. val a_rdd = sc.textfile ("Hdfs://172.16.30.2:25000/usr/tmpdata/a.dat"). Map (_.split ("\u0001\u0001")). Map (t => tbclient (t (0), t (4), t ()) val b_rdd = sc.textfile ("Hdfs://172.16.30.3:25000/usr/tmpdata/b.dat"). Map (_.split ("\u0001 \u0001 ")). Map (T=>tbtrans (t (+), T (+), T (toint)) // Convert ordinary RDD to Schemardd a_rdd.registertemptable ("A_rdd") b_ Rdd.registertemptable ("B_rdd") def toint (s: String): int = { try { s.toInt } catch { case e: Exception => 9999 } &nBSP;} def myfun2 (id_code:string):int = { val i = id_code.length i } //definition function: According to the Zodiac //here to notice the use of Scala substring method, and Java, Oracle, etc. are different def myfun5 (id_code:String): String = { var year = "" if ( id_code.length == 18) { val md = toint ( Id_code.substring (6,10)) val i = 1900 val years=new array[string] years (0) = "Rat" years (1) = "OX" &nbsP; years (2) = "Tiger" years ( 3) = "Rabbit" years (4) = "Dragon" years (5) = "Snake" years (6) = "Horse" years (7) = "Sheep" years (8) = "Monkey" years (9) = "Chicken" years (Ten) = "dog" years (one) = "Pig" year = years ((md-i)%years.length) } year } //Set Age &NBSP;DEF&NBSP;MYFUN3 (id_code:string):string = { var rt = "" if (id_code.length == 18) { val Age = toint (id_code.substring (6,10)) if (age > = 1910 && age < 1920) { rt = "1910 ~ 1920" } else if (age >= 1920 && age < 1930) { rt = "1920 ~ 1930 " } else if (age >= 1930 && age < 1940) { rt = "1930 ~ 1940" } else if (age >= 1940 && age < 1950) { rt = "1940 ~ 1950" } else if ( age >= 1950 && age < 1960) { rt = "1950 ~ 1960" } else if (age >= 1960 && age <1970) { rt = "1960 ~ 1970 " } Else if (age >= 1970 && age <1980) { rt = "1970 ~ 1980" } else if (age >= 1980 && age <1990) { rt = "1980 ~ 1990" } else if (age >= 1990 && age <2000) { rt = "1990 ~ 2000" } else if (age >= 2000 && age < ) { rt = "2000 ~ 2010" &nbSp; } else if (age >= 2010 && age<2014) { rt = " After 2010 " } } rt } //Division of Consumption amount def myfun4 (money:string):string = { var rt = "" if (money>= "10000" && money < "50000") { rt = "10000 ~ 50000" } else if (money>= "50000" & & money< "60000") { rt = "50000 ~ 60000 " } else if (money>= "60000" && money < "70000") { rt = "60000 ~ 70000" } else if (money>= "70000" & & money< "80000") { rt = "70000 ~ 80000 " } else if (money>=") 80000 " && money<" 100000 ") { rt = "80000 ~ 100000" } else if (money>= "100000" && money< "150000") { rt = "100000 ~ 150000" } else if (MONEY≫= "150000" && money< "200000") { rt = "150000 ~ 200000" } else if (money>= "200000" && money< "1000000") { rt = "200000 ~ 1000000" } else if (money>= "1000000" && money< "10000000") { rt = "1000000 ~ 10000000" } else if (money>= "10000000" && money < "50000000") { rt = "10000000 ~ 50000000" } else if (money>= "5000000" && money< "100000000 ") { rt = " 5000000 ~ 100000000 " } rt } //According to birthday horoscope def myfun1 (id_code:String): string = { var rt = "" if (id_code.length == 18) { val md = toint (id_code.substring (10,14)) if (md >= 120 && md <= 219) { rt = "Aquarius" } else if (md > = 220 && md <= 320) { rt = "Pisces" } else if (md >= 321 && md <= 420) { rt = "Aries" } else if (md >= 421 && md <= 521) { rt = "Taurus" } else if (MD >= 522 && md <= 621) { rt = "Gemini" } else if (md >= 622 && md <= 722) { rt = "cancer" } else if (md >= 723 && md <= 823) { rt = "Leo" } else if ( md >= 824 && md <= 923) { rt = "* * * seat" } else if (md >= 924 && md <= 1023) { rt = "Libra" } else if (md >= 1024 && md <= 1122) { rt = "Scorpio" } else if (md >= 1123 && md <= 1222) { rt = "Sagittarius" } else if (md >= 1223 && md <= 1231) | (md >= 101 && md <= 119) { rt = "Capricorn" } else rt = "Invalid" } rt } //Registration function sqlcontext.registerfunction ("Fun1", (x:string) =>myfun1 (x)) sqlcontext.registerfunction ("Fun3", (z:string) =>myfun3 (z)) Sqlcontext.registerfunction ("Fun4", (m:string) =>myfun4 (m)) Sqlcontext.registerfunction ("Fun5", (n:string) =>myfun5 (n)) //constellation Statistics, note that there must be fun2 (id_ Code) =18 This restriction, otherwise the first field has this limit, and the second statistic field value does not have this limit &Nbsp; val result1 = sqlcontext.sql ("Select fun1 (Id_code ), COUNT (*) from a_rdd t where fun2 (Id_code) =18 group by fun1 (Id_code) " ) //Zodiac Statistics val result2 = Sqlcontext.sql ("Select fun5 (A.id_code), COUNT (*) from a_rdd a where fun2 (Id_code) =18 group by fun5 (A.id_code) ") // Statistics of consumption and total amount val result3 = sqlcontext.sql according to the consumption interval ("Select fun4 (A.money) , Count (distinct a.custom_id), SUM (A.money) from b_rdd a where a.status=8 and a.custom_id in (Select b.custom_id from a_rdd b where fun2 (b.id_ Code) =18) group by fun4 (A.money) //print results result3.coLlect (). foreach (println) //can also save the results to Os/hdfs Result2.saveastextfile ("File:///tmp/age") }}
When testing the RESULT3, the error was found:
Exception in thread "main" java.lang.RuntimeException: [1.101] Failure: ' not ' expected but ' select ' Found
Select Fun5 (A.id_code), COUNT (*) from A_rdd a where fun2 (A.id_code) =18 and a.custom_id in (select distinct b.custom_id from B_rdd b where b.status=8) GROUP by Fun5
(A.id_code)
^
At Scala.sys.package$.error (package.scala:27)
At Org.apache.spark.sql.catalyst.SqlParser.apply (sqlparser.scala:60)
At ORG.APACHE.SPARK.SQL.SQLCONTEXT.PARSESQL (sqlcontext.scala:74)
At Org.apache.spark.sql.SQLContext.sql (sqlcontext.scala:267)
At Sparksql$.main (sparksql.scala:198)
At Sparksql.main (Sparksql.scala)
At Sun.reflect.NativeMethodAccessorImpl.invoke0 (Native Method)
At Sun.reflect.NativeMethodAccessorImpl.invoke (nativemethodaccessorimpl.java:57)
At Sun.reflect.DelegatingMethodAccessorImpl.invoke (delegatingmethodaccessorimpl.java:43)
At Java.lang.reflect.Method.invoke (method.java:606)
At Com.intellij.rt.execution.application.AppMain.main (appmain.java:140)
At the commissioning stage, it is possible to visually sparksql support for conditional neutron queries (just guessing).
If there is a problem, but also look past the master of the Liberal enlighten.
This article is from the "one step. One Step" blog, be sure to keep this source http://snglw.blog.51cto.com/5832405/1632901
Preliminary application of Sparksql