//Scalastyle:off println Packageorg.apache.spark.examples.streamingImportKafka.serializer.StringDecoderImportorg.apache.spark.SparkConfImportorg.apache.spark.streaming._Importorg.apache.spark.streaming.kafka._ImportOrg.apache.spark.streaming.scheduler.StreamingListenerImportScala.util.parsing.json.JSON/*** Consumes messages from one or more topics to analysis log * Calaculate the threadhold under certain time window */Object LOGANALYSISB {def main (args:array[string]) {if(Args.length < 2) {System.err.println (s"""| Usage:directkafkawordcount <brokers> <topics> | <brokers>is a list of one or more Kafka brokers| <topics>is a list of one or more Kafka topics to consume from| "" ". Stripmargin) System.exit (1)} Val window_length=NewDuration (30 * 1000) Val Slide_interval=NewDuration (10 * 1000) Streamingexamples.setstreamingloglevels () Val Array (brokers, topics)=args Val sparkconf=NewSparkconf (). Setappname ("ELK Log analysis Windows Threhold") Val SSC=NewStreamingContext (sparkconf,slide_interval) Ssc.addstreaminglistener (NewRulefilelistenerb ())//Create Direct Kafka stream with brokers and topicsVal topicsset = Topics.split (","). toset Val Kafkaparams= Map[string, String] ("Metadata.broker.list"brokers) Val Messages=kafkautils.createdirectstream[string, String, Stringdecoder, Stringdecoder] (SSC, Kafkaparams, Topicsset) //Get the lines, split them into words, count the words and printVal lines =Messages.map (_._2). Map (Hostapplog.parselogline) Val Windowdstream=Lines.window (window_length,slide_interval) windowdstream.foreachrdd (Logs={val Topchar=logs. Map (log= = (Log.msg, 1). Reducebykey (_+_). Top (3) (orderingutils.secondvalueordering) println ("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ $$$$$$$$$$$$") println (s"" "Top Endpoints: ${topchar.mkstring (" [",", ","] ")}" "") Val toptest=logs. Map (log= = (Log.host+log.app,if(Log.msg.contains ("A")) 1Else0). Reducebykey (_+_). Filter (_._2> 5). Take (10) println (s"" "A > 5 times: ${toptest.mkstring (" [",", ","] ")}" "") } ) //Start the computationSsc.start () ssc.awaittermination ()} def WC (Ssc:streamingcontext,map:map[any,any]): Unit= { if(Map.get ("message"). ToString (). Contains ("A")) println ("Find A in message:" +map.tostring ())}}classRulefilelistenerbextendsStreaminglistener {override Def onbatchstarted (batchstarted: org.apache.spark.streaming.scheduler.StreamingListenerBatchStarted) {println ("-------------------------------------------------------------------------------------------------------------- -------------------------------") println ("Check whether the file's modified date is change, if change then reload the configuration file") //val Source = Scala.io.Source.fromFile ("d:/code/scala/test")//val lines = Try Source.mkstring finally Source.close ()//println (lines)println ("------------------------------------------------------------------------------------------------------ ---------------------------------------") }}//Scalastyle:on println
Spark Streaming Kafka Example