1. SQL in Oracle
2. Hive/shark version
Package Cn.com.gzkit.spark
Import Org.apache.spark.SparkContext
Import Org.apache.spark.sparkcontext._
Import org.apache.spark.rdd.PairRDDFunctions;
Object test2 {
def main (args:array[string]) {
var master="spark://kit-b1:7077";
var sc=new"Hdfstest", System.getenv ("Spark_home"), Sparkcontext.jarofclass (This . GetClass));
var file1 = sc.textfile ("Hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_psmis.txt"). Map (_.split (' | '));
var file2 = sc.textfile ("Hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_gis.txt"). Map (_.split (' | ')). Map (m=> (m (+), m (0)));
var file3 = sc.textfile ("Hdfs://kit-b1/demodata/utf8_tb_show_multi_question.txt"). Map (_.split (' | ')). Map (m=> (m), M (+), m (+), m (0));
File1.cache ();
File2.cache ();
File3.cache ();
File1.count ();
File2.count ();
File3.count ();
System. out. println ("-----------begin-----------------");
var T=file1.filter (_ (+) = ="0");
var t1=t.map (t=> (t (+), T));
var q=file3.filter (_._2=="105754659"). Filter (_._3==" only compared to System ");
var q1=q.map (q=> (q._1,q));
var tq=t1.leftouterjoin (Q1);
System. out. println ("-----------end-----------------");
System. out. println ("-----------begin2-----------------");
var t2=tq.map (m=> (m._2._1), m._2);
var s=file2;
var ts=t2.leftouterjoin (s);
var rs=ts.filter (_._2._2==none). filter (_._2._1._2==none);
System. out. println ("Sava file");
Rs.saveastextfile ("hdfs://kit-b1/demodata/test/02");
System. out. println ("Sava file End");
var v2=rs.count ();
System. out. println ("v2="+v2);
System. out. println ("-----------end2-----------------");
Sc.stop ();
}
}