Spark-streaming and Flume integration push
Package Cn.my.sparkStreamimport Org.apache.spark.SparkConfimport org.apache.spark.storage.StorageLevelimport Org.apache.spark.streaming._import org.apache.spark.streaming.flume._/** */ObjectSparkflumepush {def main (args:array[string]) {if(Args.length <2) {System.err.println ("usage:flumeeventcount ") System.exit (1)} loglevel.setstreamingloglevels () Val Array (host, Port)=args Val batchinterval= Milliseconds ( -) //Create the context and set the batch sizeVal sparkconf =NewSparkconf (). Setappname ("Flumeeventcount"). Setmaster ("Local[2]") Val SSC=NewStreamingContext (sparkconf, Batchinterval)//Create a flume streamVal stream =Flumeutils.createstream (SSC, host, Port.toint, storagelevel.memory_only_ser_2)//Print out the " count of events received from" This server in each batchStream.count (). Map (cnt ="Received"+ CNT +"Flume events."). Print ()//get the event in the message, take out the event body,body is the real message bodyStream.flatmap (t=>{NewString (T.Event. Getbody.array ()). Split (" ")}). Map ((_,1). Reducebykey (_+_). Print Ssc.start () ssc.awaittermination ()}}
Package Cn.my.sparkStream
Import java.net.InetSocketAddress
Import org.apache.spark.SparkConf
Import Org.apache.spark.storage.StorageLevel
Import Org.apache.spark.streaming._
Import Org.apache.spark.streaming.flume._
/**
*
*/
Object Sparkflumepull {
def main (args:array[string]) {
if (Args.length < 2) {
System.err.println (
"Usage:flumeeventcount System.exit (1)
}
Loglevel.setstreamingloglevels ()
Val Array (host, port) = args
Val Batchinterval = Milliseconds (2000)
Create the context and set the batch size
Val sparkconf = new sparkconf (). Setappname ("Flumeeventcount"). Setmaster ("local[2]")
Val SSC = new StreamingContext (sparkconf, Batchinterval)
Create a flume stream
Val stream = Flumeutils.createstream (SSC, host, Port.toint, storagelevel.memory_only_ser_2)
Val flumestream = Flumeutils.createpollingstream (SSC, host, Port.toint)
/*
Def createpollingstream (
Jssc:javastreamingcontext,
Addresses:array[inetsocketaddress],
Storagelevel:storagelevel
):
*/
When Sink has more than one,
Val flumesinklist = array[inetsocketaddress] (new inetsocketaddress ("Mini1", 8888))
Val flumestream = Flumeutils.createpollingstream (SSC, flumesinklist, storagelevel.memory_only_2)
Flumestream.count (). Map (cnt = "Received" + cnt + "flume events."). Print ()
Flumestream.flatmap (T + = {
New String (T.event.getbody.array ()). Split ("")
}). Map ((_, 1)). Reducebykey (_ + _). Print ()
Print out the "Count of events received from" This server in each batch
Stream.count (). Map (cnt = "Received" + cnt + "flume events."). Print ()
Get the event in the message, take out the event body,body is the real message body
Stream.flatmap (T=>{new String (T.event.getbody.array ()). Split ("")}). Map ((_,1)). Reducebykey (_+_). Print
Ssc.start ()
Ssc.awaittermination ()
}
}
Http://spark.apache.org/docs/1.6.3/streaming-flume-integration.html
Spark and Flume integration