Kafka-->storm-->mongodb

Source: Internet
Author: User
Tags emit mongoclient zookeeper stringbuffer

Objective:

These records are updated to MongoDB by spout transmitting Kafka data to bolt to count the number of each word.

Spout's Nexttuple method will always be in a while loop, and once each piece of data is sent to the bolt, the bolt will call the Execute method once.

Spout is used for transmitting data, and bolts are used to process the data.

Mongoutil:mongo Tool Class

Package storm;

Import Com.mongodb.BasicDBObject;
Import Com.mongodb.DB;
Import com.mongodb.DBCollection;
Import Com.mongodb.DBObject;
Import com.mongodb.MongoClient;

public class Mongoutil {
Private Mongoutil () {}
private static mongoclient MONGO;
private static DB db;
private static Dbcollection collection;
static{
MONGO = new Mongoclient ("192.168.170.185", 27017);
db = Mongo.getdb ("Myspout");
Collection = Db.getcollection ("Mybolt");
}
public static Long GetCount () {
Return Collection.count (New Basicdbobject ("_id", 1L));
}
public static void Insert (String substring) {
DBObject obj = new Basicdbobject ();
Obj.put ("_id", 1);
Obj.put ("Bolt", substring);
Collection.insert (obj);
}
public static void update (String substring) {
DBObject obj = new Basicdbobject ();
Obj.put ("_id", 1);
DBObject obj2 = Collection.findone (obj);
Obj2.put ("Bolt", substring);
Collection.update (obj, obj2);
}

}

Sentencespout: Spout of transmitting data, reading data from Kafka.

Package storm;

Import Java.util.HashMap;
Import java.util.List;
Import Java.util.Map;
Import java.util.Properties;

Import Org.apache.kafka.common.utils.Utils;
Import org.apache.storm.Constants;
Import Org.apache.storm.spout.SpoutOutputCollector;
Import Org.apache.storm.task.TopologyContext;
Import Org.apache.storm.topology.OutputFieldsDeclarer;
Import Org.apache.storm.topology.base.BaseRichSpout;
Import Org.apache.storm.tuple.Fields;
Import org.apache.storm.tuple.Values;

Import Kafka. Kafkaconsumer;
Import Kafka. Kafkaproducer;
Import Kafka.consumer.ConsumerConfig;
Import Kafka.consumer.ConsumerIterator;
Import Kafka.consumer.KafkaStream;
Import Kafka.javaapi.consumer.ConsumerConnector;
Import Kafka.serializer.StringDecoder;
Import kafka.utils.VerifiableProperties;

public class Sentencespout extends baserichspout{
Private Spoutoutputcollector collector;
private int index = 0;
Private Consumerconnector consumer;
Private MAP conf;
@Override
public void open (map map, Topologycontext context, Spoutoutputcollector Collector) {//try to write the initialization in the open method, otherwise you may get an error.
this.conf = map;
This.collector = collector;
Properties Props = new properties ();

Zookeeper configuration
Props.put ("Zookeeper.connect", "192.168.170.185:2181");

Consumer group
Props.put ("Group.id", "Testgroup");

ZK Connection timed out
Props.put ("zookeeper.session.timeout.ms", "4000");
Props.put ("zookeeper.sync.time.ms", "200");
Props.put ("auto.commit.interval.ms", "1000");
Props.put ("Auto.offset.reset", "smallest");

Serialization classes
Props.put ("Serializer.class", "Kafka.serializer.StringEncoder");

Consumerconfig config = new Consumerconfig (props);

This.consumer = kafka.consumer.Consumer.createJavaConsumerConnector (config);
}
@Override
public void Nexttuple () {

map<string, integer> topiccountmap = new hashmap<string, integer> ();
Topiccountmap.put ("HelloWorld", New Integer (1));

Stringdecoder Keydecoder = new Stringdecoder (new Verifiableproperties ());
Stringdecoder Valuedecoder = new Stringdecoder (new Verifiableproperties ());
Map<string, list<kafkastream<string, string>>> consumermap =
Consumer.createmessagestreams (Topiccountmap,keydecoder,valuedecoder);
kafkastream<string, string> stream = Consumermap.get ("HelloWorld"). Get (0);
Consumeriterator<string, string> it = Stream.iterator ();

int messagecount = 0;
while (It.hasnext ()) {
This.collector.emit (New Values (It.next (). Message (). toString ()));
}
index = (index+1>=sentences.length)? 0:index+1;
}

@Override
public void Declareoutputfields (Outputfieldsdeclarer declarer) {
Declarer.declare (new fields ("sentence"));
}

}
Splitsentencebolt: Cutting Word Bolt

Package storm;

Import Java.util.Map;

Import org.apache.storm.Constants;
Import Org.apache.storm.task.OutputCollector;
Import Org.apache.storm.task.TopologyContext;
Import Org.apache.storm.topology.OutputFieldsDeclarer;
Import Org.apache.storm.topology.base.BaseRichBolt;
Import Org.apache.storm.tuple.Fields;
Import Org.apache.storm.tuple.Tuple;
Import org.apache.storm.tuple.Values;

public class Splitsentencebolt extends baserichbolt{
Private Outputcollector collector;
Private Map stormconf;
@Override
public void prepare (map map, Topologycontext context, Outputcollector collector) {
this.stormconf = map;
This.collector = collector;
}

@Override
public void execute (tuple tuple) {
String str = Tuple.getstringbyfield ("sentence");
string[] split = Str.split ("");
for (String word:split) {
This.collector.emit (New Values (word));
}
}


@Override
public void Declareoutputfields (Outputfieldsdeclarer declarer) {
Declarer.declare (New fields ("word"));
}

}

Wordcountbolt: Count of Bolts

Package storm;

Import Java.io.ByteArrayInputStream;
Import Java.io.ByteArrayOutputStream;
Import java.io.IOException;
Import Java.io.ObjectInputStream;
Import Java.io.ObjectOutputStream;
Import Java.util.HashMap;
Import Java.util.Map;

Import Org.apache.storm.Config;
Import org.apache.storm.Constants;
Import Org.apache.storm.task.OutputCollector;
Import Org.apache.storm.task.TopologyContext;
Import Org.apache.storm.topology.OutputFieldsDeclarer;
Import Org.apache.storm.topology.base.BaseRichBolt;
Import Org.apache.storm.tuple.Fields;
Import Org.apache.storm.tuple.Tuple;
Import org.apache.storm.tuple.Values;

public class Wordcountbolt extends baserichbolt{
Private Map boltconf;
Private Outputcollector collector;
Private hashmap<string,long> counts = null;
@Override
public void prepare (map map, Topologycontext context, Outputcollector collector) {
this.boltconf = map;
This.collector=collector;
this.counts = new hashmap<string,long> ();
}

@Override
public void execute (tuple tuple) {
String Word = Tuple.getstringbyfield ("word");
This.counts.put (Word, this.counts.containsKey (word)? This.counts.get (word) +1:1);
This.collector.emit (New Values (Word,counts.get (word)));
}


@Override
public void Declareoutputfields (Outputfieldsdeclarer declarer) {
Declarer.declare (New fields ("word", "count"));
}

}

Reportbolt: Prints the results of the record and inserts the results into the MongoDB bolt

Package storm;

Import java.util.ArrayList;
Import Java.util.HashMap;
Import java.util.List;
Import Java.util.Map;

Import Org.apache.storm.Config;
Import org.apache.storm.Constants;
Import Org.apache.storm.task.OutputCollector;
Import Org.apache.storm.task.TopologyContext;
Import Org.apache.storm.topology.OutputFieldsDeclarer;
Import Org.apache.storm.topology.base.BaseRichBolt;
Import Org.apache.storm.tuple.Tuple;

Import Com.mongodb.BasicDBObject;
Import Com.mongodb.DB;
Import com.mongodb.DBCollection;
Import Com.mongodb.DBObject;
Import com.mongodb.MongoClient;

public class Reportbolt extends baserichbolt{
Private hashmap<string,long> counts = null;
Private Map boltconf;
Private StringBuffer buf = null;
@Override
public void Prepare (Map arg0, Topologycontext arg1, Outputcollector arg2) {
this.boltconf = arg0;
This.counts=new hashmap<string,long> ();
This.buf = new StringBuffer ();
}

@Override
public void execute (tuple tuple) {
String Word = Tuple.getstringbyfield ("word");
Long counts = Tuple.getlongbyfield ("Count");
This.counts.put (word, counts);
SYSTEM.OUT.PRINTLN ("------Statistical results------");
list<string> keys = new arraylist<string> ();
Keys.addall (This.counts.keySet ());

Buf.append ("{");
for (String Key:keys) {

Buf.append (key+ ":" +this.counts.get (Key)). Append (",");
SYSTEM.OUT.PRINTLN (key + ":" +this.counts.get (key));
}
System.out.println ("------------------");
Buf.append ("}");
String substring = buf.delete (Buf.length ()-2, Buf.length ()-1). ToString ();

Long Count = Mongoutil.getcount ();
if (count<=0) {
Mongoutil.insert (substring);
}else{
Mongoutil.update (substring);
}
BUF = Buf.delete (0, Buf.length ());
}
@Override
public void Declareoutputfields (Outputfieldsdeclarer arg0) {
TODO auto-generated Method Stub
}
/* @Override
Public map<string, Object> getcomponentconfiguration () {
hashmap<string, object> HashMap = new hashmap<string, object> ();
Hashmap.put (Config.topology_tick_tuple_freq_secs, 10);
return HASHMAP;
}*/
}

Assembly of Wordcounttopology:topology,storm Parts

Package storm;

Import Org.apache.storm.Config;
Import Org.apache.storm.LocalCluster;
Import Org.apache.storm.topology.TopologyBuilder;
Import Org.apache.storm.tuple.Fields;

public class Wordcounttopology {
private static final String sentence_spout_id = "Sentence-spout";
private static final String split_bolt_id = "Split-bolt";
private static final String count_bolt_id = "Count-bolt";
private static final String report_bolt_id = "Report-bolt";
private static final String Topology_name = "Word-count-topology";

public static void Main (string[] args) throws Exception {

--instantiation of spout and bolts
sentencespout spout = new Sentencespout ();
Splitsentencebolt Splitbolt = new Splitsentencebolt ();
Wordcountbolt Countbolt = new Wordcountbolt ();
Reportbolt Reportbolt = new Reportbolt ();
--Create an instance of the Topologybuilder class
Topologybuilder builder = new Topologybuilder ();

--Register Sentencespout
Builder.setspout (sentence_spout_id, SPOUT);
--register Splitsentencebolt, subscribe to the tuple sent by sentencespout
The Shufflegrouping method is used here, which specifies that all tuples are randomly distributed evenly to the instance of Splitsentencebolt.
Builder.setbolt (split_bolt_id, Splitbolt). shufflegrouping (sentence_spout_id);
--Register Wordcountbolt, subscribe splitsentencebolt to send the tuple
The Filedsgrouping method is used here to route a tuple of the specified name to the same Wordcountbolt instance
Builder.setbolt (count_bolt_id, Countbolt). fieldsgrouping (split_bolt_id, new fields ("word"));
--register Reprotbolt, subscribe to the tuple sent by Wordcountbolt
The Globalgrouping method is used here to indicate that all tuples are routed to a unique Reprotbolt instance
Builder.setbolt (report_bolt_id, Reportbolt). globalgrouping (count_bolt_id);

--Create a Configuration object
Config conf = new config ();


--Create an object representing the cluster, Localcluster represents a complete storm cluster in the local development environment
Native mode is a simple way to develop and test, eliminating the overhead of repeated deployments in a distributed cluster
In addition, you can perform breakpoints debugging very convenient
Localcluster cluster = new Localcluster ();

--Submit topology to run the cluster
Cluster.submittopology (topology_name, Conf, builder.createtopology ());

--kill topology after running for 10 seconds to shut down the cluster
Thread.Sleep (300000000);
Cluster.killtopology (Topology_name);
Cluster.shutdown ();
}
}

Kafka-->storm-->mongodb

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.