Make the following changes based on the previous word count:
1. Using a reliable message processing mechanism
2. Configure worker, executor, task count
3. Commit using cluster mode
Data source spout
Package Com.zhch.v2;import Backtype.storm.spout.spoutoutputcollector;import Backtype.storm.task.TopologyContext; Import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichspout;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.values;import Java.io.bufferedreader;import Java.io.filereader;import Java.util.map;import Java.util.uuid;import Java.util.concurrent.ConcurrentHashMap; public class Sentencespout extends Baserichspout {private FileReader filereader = null; Private Boolean completed = false; Private Concurrenthashmap<uuid, values> pending; Private Spoutoutputcollector collector; @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {outputfieldsdeclarer.decl Is (new fields ("sentence")); } @Override public void open (map map, Topologycontext Topologycontext, Spoutoutputcollector spoutoutputcollector) { This.collector = Spoutoutputcollector; This.penDing = new Concurrenthashmap<uuid, values> (); try {this.filereader = new FileReader (Map.get ("Wordsfile"). toString ()); } catch (Exception e) {throw new RuntimeException ("Error reading file [" + Map.get ("wordsfile") + "]"); }} @Override public void Nexttuple () {if (completed) {try {Thread.Sleep (10 00); } catch (Interruptedexception e) {}} String line; BufferedReader reader = new BufferedReader (FileReader); try {while (line = Reader.readline ())! = null) {Values values = new values (line); UUID msgId = Uuid.randomuuid (); This.pending.put (msgId, values); This.collector.emit (values, msgId); Take MsgId on launch, implement reliable Message Handling}} catch (Exception e) {throw new RuntimeException ("Error reading Tu Ple ", e); } finally {completed = true;}} @Override public void ack (Object msgId) {this.pending.remove (msgId);//confirm that the tuple} is removed from the list @Over Ride public void Fail (Object msgId) {this.collector.emit (This.pending.get (msgId), msgId);//failure or timeout, re-launch}}
Implement statement splitting Bolt
Package Com.zhch.v2;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Backtype.storm.tuple.values;import Java.util.map;public class Splitsentencebolt extends Baserichbolt {private Outputcollector collector; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this. Collector = Outputcollector; } @Override public void execute (tuple tuple) {String sentence = Tuple.getstringbyfield ("sentence"); string[] Words = Sentence.split (""); for (String word:words) {this.collector.emit (tuple, new Values (word));//anchor the output tuple and input tuple for reliable message processing } this.collector.ack (tuple); } @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarER) {outputfieldsdeclarer.declare (New fields ("word")); }}
implement word count bolt
Package Com.zhch.v2;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Java.io.bufferedwriter;import Java.io.filewriter;import Java.util.hashmap;import Java.util.iterator;import Java.util.map;public class Wordcountbolt extends Baserichbolt {private Outputcollector collector; Private Hashmap<string, long> counts = null; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this. Collector = Outputcollector; this.counts = new hashmap<string, long> (); } @Override public void execute (tuple tuple) {String word = Tuple.getstringbyfield ("word"); Long count = This.counts.get (word); if (count = = null) {count = 0L; } count++; This.counTs.put (Word, count); BufferedWriter writer = null; try {writer = new BufferedWriter (New FileWriter ("/home/grid/stormdata/result.txt")); iterator<string> keys = This.counts.keySet (). Iterator (); while (Keys.hasnext ()) {String w = keys.next (); Long C = This.counts.get (w); Writer.write (w + ":" + C); Writer.newline (); Writer.flush (); }} catch (Exception e) {e.printstacktrace (); } finally {if (writer! = null) {try {writer.close (); } catch (Exception e) {e.printstacktrace (); } writer = null; }} this.collector.ack (tuple); } @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {Outputfieldsdeclarer . Declare (New fields ("word", "count")); }}
implement word count topology
Package Com.zhch.v2;import Backtype.storm.config;import Backtype.storm.localcluster;import Backtype.storm.stormsubmitter;import Backtype.storm.topology.topologybuilder;import Backtype.storm.tuple.Fields; public class Wordcounttopology {private static final String sentence_spout_id = "Sentence-spout"; private static final String split_bolt_id = "Split-bolt"; private static final String count_bolt_id = "Conut-bolt"; private static final String Topology_name = "Word-count-topology-v2"; public static void Main (string[] args) throws Exception {sentencespout spout = new Sentencespout (); Splitsentencebolt Spiltbolt = new Splitsentencebolt (); Wordcountbolt Countbolt = new Wordcountbolt (); Topologybuilder builder = new Topologybuilder (); Builder.setspout (sentence_spout_id, SPOUT, 2); Use 2 spout executor Builder.setbolt (split_bolt_id, Spiltbolt, 2). Setnumtasks (4)//Use 2 Spiltbolt executor,4 task . shufflegrouping (sentence_spOUT_ID); Builder.setbolt (count_bolt_id, Countbolt, 2)//use 2 Countbolt executor. fieldsgrouping (split_bolt_id, New Fi ELDs ("word")); Config config = new config (); Config.put ("Wordsfile", Args[0]); if (args! = null && args.length > 1) {config.setnumworkers (2);//Use of 2 worker processes// Cluster mode start Stormsubmitter.submittopology (args[1], config, builder.createtopology ()); } else {Localcluster cluster = new Localcluster (); Cluster.submittopology (topology_name, config, builder.createtopology ()); try {thread.sleep (5 * 1000); } catch (Interruptedexception e) {} cluster.killtopology (topology_name); Cluster.shutdown (); } }}
after writing the program, use the command mvn clean install package, and then submit to the Storm cluster
[Email protected] stormtarget]$ storm jar Storm02-1.0-snapshot.jar Com.zhch.v2.wordcounttopology/home/grid/ Stormdata/input.txt Word-count-topology-v2
Operation Result:
[[email protected] stormdata]$ cat result.txt Can:1second:1simple:1set:1data:2unbounded:1has:1apache: 1open:1over:1free:1easy:2fast:: 1reliably:1any:1with:1million:1is:6learning:1analytics:1torm:1nod E:1processed:2what:1batch:1operate:1will:1language:1fault-tolerant:1[[email protected] stormData]$ CA T result.txt to:3for:2distributed:2use:2used:1storm:4it:1online:1cases:: 1of:2programming:1more:1cl Ocked:1scalable:1processing:2guarantees:1be:2etl:1continuous:1it:2hadoop:1makes:1your:1a:4at:1di D:1fun:1machine:1up:1and:5process:1rpc:1many:1system:1source:1realtime:3benchmark:1per:2doing: 1lot:1streams:1computation:2tuples:1[[email protected] stormdata]$ cat result.txt to:3for:2distributed: 2use:2used:1storm:4it:1online:1cases:: 1of:2programming:1more:1clocked:1scalable:1processing:2guara Ntees:1be:2etl:1coNtinuous:1it:2hadoop:1makes:1your:1a:4at:1did:1fun:1machine:1up:1and:5process:1rpc:1many:1sy Stem:1source:1realtime:3benchmark:1per:2doing:1lot:1streams:1computation:2tuples:1
Storm Experiment--word Count 2