Spark-shell--master=spark://namenode01:7077--executor-memory 2g--driver-class-path/app/spark141/lib/mysql-connector-java-5.1.6-bin.jar HDFs DFS-put readme.md./Valfile=sc.textfile ("hdfs:///user/hadoop/readme.md"). Filter (Line=>line.contains ("Spark")) Val wordcount=sc.textfile ("hdfs:///user/hadoop/readme.md"). FlatMap (_.Split(' '). Map ((_,1). Reducebykey (_+_) Wordcount.savetextfile ("/data/result")//sortby Countval Wordcount2=sc.textfile ("hdfs:///user/hadoop/readme.md"). FlatMap (_.Split(' '). Map ((_,1). Reducebykey (_+_). Map (x=> (x._2,x._1)). Sortbykey (). Map (x=>(x._2,x._1)) Wordcount2.saveastextfile ("/data/wordcount2") //start Hive metasotre service SPARK SQL showNohup hive--service metastore > Metastore.log2>&1&Note: If you want to use hive, you need to add hive-site.xml file Copy to conf/Next Pssh"cp/app/hive/lib/mysql-connector-java-5.1.6-bin.jar/app/spark141/lib/"Spark-shell--master=spark://namenode01:7077--executor-memory 2g--driver-class-path/app/spark141/lib/mysql-connector-java-5.1.6-bin.jar Val Hivecontext =New Org.apache.spark.sql.hive.HiveContext (SC) hivecontext.sql ("Use test") Hivecontext.sql ("Show Tables"). Collect (). foreach (println) Spark-sql--driver-class-path/app/spark141/lib/mysql-connector-java-5.1.6-bin.jarjust like use hive,Writesqluse testshow Tables//parallelize ShowVal Num=sc.parallelize (1ToTen) Val Alpha=sc.parallelize ('a'To'Z') Val num2=num.map (_*2). Collect (). foreach (println) Val num3=num.map (_%3==0). Collect (). foreach (println) Val num3=num.filter (_%3==0). Collect (). foreach (println) num.reduce (_+_) Num.reduce (_*_) Num.reducebykey (_+_) Num.sortby (X=>x,false)
//k-v DemoVal Kv1=sc.parallelize (List ("A",1),("B",2),("C",3),("A",4),("B",5)) Kv1.sortbykey (). Collect//Note that Sortbykey's parentheses cannot save ASCKv1.sortbykey (false). Collect//desc//How to Sort by value?Kv1.map (x=> (x._2,x._1)). Sortbykey (). Map (x=>(X._2,x._1)). Collectkv1.sortby (x=>x). Collect Kv1.groupbykey (). Collect Kv1.reducebykey (_+_). Collectval Kv2=sc.parallelize (List ("A",4),("A",4),("C",3),("A",4),("B",5)) kv2.distinct.collectkv1.union (kv2). Collectval kv3=sc.parallelize (List ("A",Ten),("B", -),("D", -)) Kv1.Join(KV3). Collectkv1.cogroup (KV3). Collectval kv4=sc.parallelize (List (1,2), List (3,4)) Kv4.flatmap (x=>x.map (_+1). Collect
Spark Basic Code Demo