Make the following changes based on the previous word count: useDirect Grouping grouping policy to send words with the same initial letter to the same task count
Data source spout
Package Com.zhch.v3;import Backtype.storm.spout.spoutoutputcollector;import Backtype.storm.task.TopologyContext; Import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichspout;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.values;import Java.io.bufferedreader;import Java.io.filereader;import Java.util.map;import Java.util.uuid;import Java.util.concurrent.ConcurrentHashMap; public class Sentencespout extends Baserichspout {private FileReader filereader = null; Private Boolean completed = false; Private Concurrenthashmap<uuid, values> pending; Private Spoutoutputcollector collector; @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {outputfieldsdeclarer.decl Is (new fields ("sentence")); } @Override public void open (map map, Topologycontext Topologycontext, Spoutoutputcollector spoutoutputcollector) { This.collector = Spoutoutputcollector; This.penDing = new Concurrenthashmap<uuid, values> (); try {this.filereader = new FileReader (Map.get ("Wordsfile"). toString ()); } catch (Exception e) {throw new RuntimeException ("Error reading file [" + Map.get ("wordsfile") + "]"); }} @Override public void Nexttuple () {if (completed) {try {Thread.Sleep (10 00); } catch (Interruptedexception e) {}} String line; BufferedReader reader = new BufferedReader (FileReader); try {while (line = Reader.readline ())! = null) {Values values = new values (line); UUID msgId = Uuid.randomuuid (); This.pending.put (msgId, values); This.collector.emit (values, msgId); }} catch (Exception e) {throw new RuntimeException ("Error reading tuple", e); } finally {completed = true; }} @Overridepublic void Ack (Object msgId) {this.pending.remove (msgId); } @Override public void fail (Object msgId) {this.collector.emit (This.pending.get (msgId), msgId); }}
implement statement split Bolt
Package Com.zhch.v3;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Backtype.storm.tuple.values;import Java.util.list;import Java.util.map;public class Splitsentencebolt extends Baserichbolt {private Outputcollector Colle ctor Private list<integer> numcountertasks; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this. Collector = Outputcollector; Get TaskID list of downstream bolts this.numcountertasks = Topologycontext.getcomponenttasks (wordcounttopology.count_bolt_id); } @Override public void execute (tuple tuple) {String sentence = Tuple.getstringbyfield ("sentence"); string[] Words = Sentence.split (""); for (String word:words) {Integer taskId = thIs.numCounterTasks.get (This.getwordcountindex (word)); Collector.emitdirect (taskId, tuple, new Values (word)); } this.collector.ack (tuple); The public Integer Getwordcountindex (String word) {word = Word.trim (). toUpperCase (); if (Word.isempty ()) return 0; else {//The first letter of the word to the downstream bolt taskId list length takes the remainder return Word.charat (0)% numcountertasks.size (); }} @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {Outputfieldsdec Larer.declare (New fields ("word")); }}
implements the word count bolt
Package Com.zhch.v3;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Java.io.bufferedwriter;import Java.io.filewriter;import Java.util.hashmap;import Java.util.iterator;import Java.util.map;public class Wordcountbolt extends Baserichbolt {private Outputcollector collector; Private Hashmap<string, long> counts = null; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this. Collector = Outputcollector; this.counts = new hashmap<string, long> (); } @Override public void execute (tuple tuple) {String word = Tuple.getstringbyfield ("word"); Long count = This.counts.get (word); if (count = = null) {count = 0L; } count++; This.counTs.put (Word, count); BufferedWriter writer = null; try {writer = new BufferedWriter (New FileWriter ("/home/grid/stormdata/result.txt")); iterator<string> keys = This.counts.keySet (). Iterator (); while (Keys.hasnext ()) {String w = keys.next (); Long C = This.counts.get (w); Writer.write (w + ":" + C); Writer.newline (); Writer.flush (); }} catch (Exception e) {e.printstacktrace (); } finally {if (writer! = null) {try {writer.close (); } catch (Exception e) {e.printstacktrace (); } writer = null; }} this.collector.ack (tuple); } @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {Outputfieldsdeclarer . Declare (New fields ("word", "count")); }}
implements the word count topology
Package Com.zhch.v3;import Backtype.storm.config;import Backtype.storm.localcluster;import Backtype.storm.stormsubmitter;import Backtype.storm.topology.topologybuilder;public class WordCountTopology { public static final String sentence_spout_id = "Sentence-spout"; public static final String split_bolt_id = "Split-bolt"; public static final String count_bolt_id = "Count-bolt"; public static final String topology_name = "Word-count-topology-v3"; public static void Main (string[] args) throws Exception {sentencespout spout = new Sentencespout (); Splitsentencebolt Spiltbolt = new Splitsentencebolt (); Wordcountbolt Countbolt = new Wordcountbolt (); Topologybuilder builder = new Topologybuilder (); Builder.setspout (sentence_spout_id, SPOUT, 2); Builder.setbolt (split_bolt_id, Spiltbolt, 2). Setnumtasks (4). shufflegrouping (sentence_spout_id); Builder.setbolt (count_bolt_id, Countbolt, 2). Directgrouping (SPLIT_BOLT_ID); Use the Direct Grouping grouping policy config config = new config (); Config.put ("Wordsfile", Args[0]); if (args! = null && args.length > 1) {config.setnumworkers (2); Cluster mode start Stormsubmitter.submittopology (args[1], config, builder.createtopology ()); } else {Localcluster cluster = new Localcluster (); Cluster.submittopology (topology_name, config, builder.createtopology ()); try {thread.sleep (5 * 1000); } catch (Interruptedexception e) {} cluster.killtopology (topology_name); Cluster.shutdown (); } }}
submit to Storm cluster
Storm Jar Storm02-1.0-snapshot.jar Com.zhch.v3.wordcounttopology/home/grid/stormdata/input.txt Word-count-topology-v3
Operation Result:
[email protected] stormdata]$ cat Result.txt second:1can:1set:1simple:1use:2unbounded:1used:1it:1storm: 4online:1cases:: 1open:1apache:1of:2over:1more:1clocked:1easy:2scalable:1any:1guarantees:1etl:1mi Llion:1continuous:1is:6with:1it:2makes:1your:1a:4at:1machine:1analytics:1up:1and:5many:1system : 1source:1what:1operate:1will:1computation:2streams:1[[email protected] stormdata]$ cat Result.txt to:3for : 2data:2distributed:2has:1free:1programming:1reliably:1fast:: 1processing:2be:2hadoop:1did:1fun:1 Learning:1torm:1process:1rpc:1node:1processed:2per:2realtime:3benchmark:1batch:1doing:1lot:1langua Ge:1tuples:1fault-tolerant:1
Storm Experiment--word Count 3