Storm Experiment--word Count 2

Last Update:2015-05-03 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Make the following changes based on the previous word count:
1. Using a reliable message processing mechanism
2. Configure worker, executor, task count
3. Commit using cluster mode

Data source spout

Package Com.zhch.v2;import Backtype.storm.spout.spoutoutputcollector;import Backtype.storm.task.TopologyContext; Import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichspout;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.values;import Java.io.bufferedreader;import Java.io.filereader;import Java.util.map;import Java.util.uuid;import Java.util.concurrent.ConcurrentHashMap;    public class Sentencespout extends Baserichspout {private FileReader filereader = null;    Private Boolean completed = false;    Private Concurrenthashmap<uuid, values> pending;    Private Spoutoutputcollector collector; @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {outputfieldsdeclarer.decl    Is (new fields ("sentence"));        } @Override public void open (map map, Topologycontext Topologycontext, Spoutoutputcollector spoutoutputcollector) {        This.collector = Spoutoutputcollector; This.penDing = new Concurrenthashmap<uuid, values> ();        try {this.filereader = new FileReader (Map.get ("Wordsfile"). toString ());        } catch (Exception e) {throw new RuntimeException ("Error reading file [" + Map.get ("wordsfile") + "]"); }} @Override public void Nexttuple () {if (completed) {try {Thread.Sleep (10            00);        } catch (Interruptedexception e) {}} String line;        BufferedReader reader = new BufferedReader (FileReader);                try {while (line = Reader.readline ())! = null) {Values values = new values (line);                UUID msgId = Uuid.randomuuid ();                This.pending.put (msgId, values); This.collector.emit (values, msgId); Take MsgId on launch, implement reliable Message Handling}} catch (Exception e) {throw new RuntimeException ("Error reading Tu        Ple ", e);        } finally {completed = true;}} @Override public void ack (Object msgId) {this.pending.remove (msgId);//confirm that the tuple} is removed from the list @Over Ride public void Fail (Object msgId) {this.collector.emit (This.pending.get (msgId), msgId);//failure or timeout, re-launch}}

Implement statement splitting Bolt

Package Com.zhch.v2;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Backtype.storm.tuple.values;import    Java.util.map;public class Splitsentencebolt extends Baserichbolt {private Outputcollector collector; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this.    Collector = Outputcollector;        } @Override public void execute (tuple tuple) {String sentence = Tuple.getstringbyfield ("sentence");        string[] Words = Sentence.split ("");        for (String word:words) {this.collector.emit (tuple, new Values (word));//anchor the output tuple and input tuple for reliable message processing    } this.collector.ack (tuple); } @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarER) {outputfieldsdeclarer.declare (New fields ("word")); }}

implement word count bolt

Package Com.zhch.v2;import Backtype.storm.task.outputcollector;import Backtype.storm.task.topologycontext;import Backtype.storm.topology.outputfieldsdeclarer;import Backtype.storm.topology.base.baserichbolt;import Backtype.storm.tuple.fields;import Backtype.storm.tuple.tuple;import Java.io.bufferedwriter;import Java.io.filewriter;import Java.util.hashmap;import Java.util.iterator;import Java.util.map;public class    Wordcountbolt extends Baserichbolt {private Outputcollector collector;    Private Hashmap<string, long> counts = null; @Override public void prepare (map map, Topologycontext Topologycontext, Outputcollector outputcollector) {this.        Collector = Outputcollector;    this.counts = new hashmap<string, long> ();        } @Override public void execute (tuple tuple) {String word = Tuple.getstringbyfield ("word");        Long count = This.counts.get (word);        if (count = = null) {count = 0L;        } count++; This.counTs.put (Word, count);        BufferedWriter writer = null;            try {writer = new BufferedWriter (New FileWriter ("/home/grid/stormdata/result.txt"));            iterator<string> keys = This.counts.keySet (). Iterator ();                while (Keys.hasnext ()) {String w = keys.next ();                Long C = This.counts.get (w);                Writer.write (w + ":" + C);                Writer.newline ();            Writer.flush ();        }} catch (Exception e) {e.printstacktrace ();                } finally {if (writer! = null) {try {writer.close ();                } catch (Exception e) {e.printstacktrace ();            } writer = null;    }} this.collector.ack (tuple); } @Override public void Declareoutputfields (Outputfieldsdeclarer outputfieldsdeclarer) {Outputfieldsdeclarer    . Declare (New fields ("word", "count")); }}

implement word count topology

Package Com.zhch.v2;import Backtype.storm.config;import Backtype.storm.localcluster;import Backtype.storm.stormsubmitter;import Backtype.storm.topology.topologybuilder;import Backtype.storm.tuple.Fields;    public class Wordcounttopology {private static final String sentence_spout_id = "Sentence-spout";    private static final String split_bolt_id = "Split-bolt";    private static final String count_bolt_id = "Conut-bolt";    private static final String Topology_name = "Word-count-topology-v2";        public static void Main (string[] args) throws Exception {sentencespout spout = new Sentencespout ();        Splitsentencebolt Spiltbolt = new Splitsentencebolt ();        Wordcountbolt Countbolt = new Wordcountbolt ();        Topologybuilder builder = new Topologybuilder (); Builder.setspout (sentence_spout_id, SPOUT, 2);                Use 2 spout executor Builder.setbolt (split_bolt_id, Spiltbolt, 2). Setnumtasks (4)//Use 2 Spiltbolt executor,4 task . shufflegrouping (sentence_spOUT_ID); Builder.setbolt (count_bolt_id, Countbolt, 2)//use 2 Countbolt executor. fieldsgrouping (split_bolt_id, New Fi        ELDs ("word"));        Config config = new config ();        Config.put ("Wordsfile", Args[0]); if (args! = null && args.length > 1) {config.setnumworkers (2);//Use of 2 worker processes//        Cluster mode start Stormsubmitter.submittopology (args[1], config, builder.createtopology ());            } else {Localcluster cluster = new Localcluster ();            Cluster.submittopology (topology_name, config, builder.createtopology ());            try {thread.sleep (5 * 1000);            } catch (Interruptedexception e) {} cluster.killtopology (topology_name);        Cluster.shutdown (); }    }}

after writing the program, use the command mvn clean install package, and then submit to the Storm cluster

[Email protected] stormtarget]$ storm jar Storm02-1.0-snapshot.jar Com.zhch.v2.wordcounttopology/home/grid/ Stormdata/input.txt Word-count-topology-v2

Operation Result:

[[email protected] stormdata]$ cat result.txt Can:1second:1simple:1set:1data:2unbounded:1has:1apache: 1open:1over:1free:1easy:2fast:: 1reliably:1any:1with:1million:1is:6learning:1analytics:1torm:1nod E:1processed:2what:1batch:1operate:1will:1language:1fault-tolerant:1[[email protected] stormData]$ CA T result.txt to:3for:2distributed:2use:2used:1storm:4it:1online:1cases:: 1of:2programming:1more:1cl Ocked:1scalable:1processing:2guarantees:1be:2etl:1continuous:1it:2hadoop:1makes:1your:1a:4at:1di D:1fun:1machine:1up:1and:5process:1rpc:1many:1system:1source:1realtime:3benchmark:1per:2doing: 1lot:1streams:1computation:2tuples:1[[email protected] stormdata]$ cat result.txt to:3for:2distributed: 2use:2used:1storm:4it:1online:1cases:: 1of:2programming:1more:1clocked:1scalable:1processing:2guara Ntees:1be:2etl:1coNtinuous:1it:2hadoop:1makes:1your:1a:4at:1did:1fun:1machine:1up:1and:5process:1rpc:1many:1sy Stem:1source:1realtime:3benchmark:1per:2doing:1lot:1streams:1computation:2tuples:1

Storm Experiment--word Count 2

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Storm Experiment--word Count 2

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Storm Experiment--word Count 2

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support