Val conf = hbaseconfiguration.create () Conf.addresource (The New Path ("/opt/cloudera/parcels/cdh-5.4.4-1.cdh5.4.4.p0.4/ Lib/hbase/conf/hbase-site.xml ")) Conf.addresource (New Path ("/opt/cloudera/parcels/cdh-5.4.4-1.cdh5.4.4.p0.4/lib/ Hadoop/etc/hadoop/core-site.xml ")) Conf.set (tableinputformat.input_table," FLOW ")//Add filter conditions older than 18 years//val scan = New Scan ()//conf.set (Tableinputformat.scan, convertscantostring (Scan))/* Scan.setfilter (New Singlecolumnvaluef Ilter ("Basic". GetBytes, "age". GetBytes, Compareop.greater_or_equal, Bytes.tobytes ()) */val Usersrdd = sc.ne Wapihadooprdd (conf, Classof[tableinputformat], classof[org.apache.hadoop.hbase.io.immutablebyteswritable], class Of[org.apache.hadoop.hbase.client.result]) Val data1 = Usersrdd.count () val sf = new SimpleDateFormat ("Yyyy-mm-dd HH : Mm:ss. Sssss ") println (" Data length: "+ data1) var map = hashmap[string, hashmap[string, Collection.mutable.arraybuffer[dou BLE]] () Usersrdd.collect (). Map {Case (_, result) = Val key = Bytes.toint (Result.getrow) println ("key:" + key) Val IP = Bytes.tostring (Result.getvalue ("F". GetBytes, "saddr". GetBytes)) Val port = bytes.tostring (Result.getvalue ("F". GE Tbytes, "SPORT". getBytes)) Val Starttimelong = bytes.tostring (Result.getvalue ("F". GetBytes, "Stime". GetBytes)) Val Endtimelong = bytes.tostring (Result.getvalue ("F". GetBytes, "Ltime". GetBytes)) Val protocol = bytes.tostring ( Result.getvalue ("F". GetBytes, "PROTO". getBytes)) Val sumtime = bytes.tostring (Result.getvalue ("F". GetBytes, "DUR". G etbytes)) Val sum = bytes.tostring (Result.getvalue ("F". GetBytes, "Dbytes". getBytes)). ToDouble println ("IP:" + IP + ", Port:" + Port + ", StartTime:" + Starttimelong + ", EndTime:" + Endtimelong + ", Protocol:" + protocol + ", sum:" + S " UM)//ip+port+udp,14:02 14:07 list//ip+port+tcp,15:02 15:07 list val starttimedate = Sf.parse (Startt Imelong) Val EndtImelongdate = Sf.parse (endtimelong) Val starthours = starttimedate.gethours val startminutes = starttimedate . getminutes val endhours = endtimelongdate.gethours val endminutes = endtimelongdate.getminutes val k Ey1 = IP + "_" + Port + "_" + Protocol println ("Key1:" + key1) val key2 = starthours + ":" + startminutes + "_" + endhours + ":" + endminutes println ("Key2:" + key2) val tmpmap = Map.get (key1) if (!tmpmap.isem Pty) {println ("--------------------map is NOT null:" + tmpmap.size + "--------------------") Val Sumarr ay = tmpMap.get.get (key2) if (!sumarray.isempty) {sumarray.get + = sum}}} else { println ("--------------------map is null--------------------")//if the current key does not exist, it is a completely new IP Val sumarray = Collection.mutable.arraybuffer[double] () Sumarray + = sum val secondmap = hashmap[string, collection.m Utable. Arraybuffer[doublE]] () Secondmap + = (Key2-sumarray) Map + = (Key1-secondmap)} map Printl N ("Map size-----------------:" + map.size)} println ("Map size:" + map.size) map.map (E = = {println ("---- ----------------Statistics Start--------------------") val resultKey1 = e._1 val resultVal1 = e._2 println ("ResultKey1:" + resultKey1) Resultval1.foreach (f = = {Val ResultKey2 = f._1 val resultVal2 = f._2 println ("ResultKey2:" + resultKey2) println ("-----------------resultVal2:" + resultval2.length) Resultval2.map (f=>{println ("------------------------F:" +f)}) Val DataArray = resultval2.ma P (f = vectors.dense (f)) Val summary:multivariatestatisticalsummary = Statistics.colstats (Sc.parallelize (dataAr Ray)//println ("--------------------mean:" + Summary.mean + "--------------------") println ("----- ---------------Variance:"+ summary.variance +"--------------------") println ("--------------------mean apply 0: "+ summary.mean.toArray. Apply (0) + "--------------------") println ("--------------------variance Apply 0:" + summary.variance.apply (0) + " --------------------") Val upbase = summary.mean.toArray.apply (0) + 1.960 * MATH.SQRT (summary.variance.apply (0)) Val downbase = summary.mean.toArray.apply (0)-1.960 * MATH.SQRT (summary.variance.apply (0)) println ("-------- -----------"+ upbase +"----------"+ downbase) val df = new DecimalFormat (". # # ") val upbasestring = df. Format (upbase) Val downbasestring = Df.format (downbase)//resultmap.put (key, value) Val RESULT3 = has Hmap[double, Double] ()//RESULT3 + = (upbase-downbase) println ("IP port:" + ResultKey1 + ", Time:" + resu LtKey2 + ", Upbase:" + upbase + ", Downbase:" + Downbase)})}) println ("--------------------BaseLine end------- -------------") Sc.stOP ()
Spark reads data from HBase