先寫個java的,近期會對照實現clojure版的 並提供clojure實現中宏的介紹
入口類
package jvm.storm.starter;import jvm.storm.starter.wordcount.SplitSentence;import jvm.storm.starter.wordcount.WordCount;import jvm.storm.starter.wordcount.WordCountSpout;import backtype.storm.Config;import backtype.storm.StormSubmitter;import backtype.storm.generated.AlreadyAliveException;import backtype.storm.generated.InvalidTopologyException;import backtype.storm.topology.InputDeclarer;import backtype.storm.topology.TopologyBuilder;import backtype.storm.tuple.Fields;/** * @author guiqiangl E-mail:larry.lv.word@gmail.com * @version 建立時間:2011-11-24 下午04:40:26 * */public class WordCountStart {public static void main (String[] args){ TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("1", new WordCountSpout(""), 5);//發射器 InputDeclarer fieldsGrouping = builder.setBolt("2", new SplitSentence(), 5); fieldsGrouping.fieldsGrouping("1", new Fields("word")); builder.setBolt("3", new WordCount(), 5) .fieldsGrouping("2", new Fields("word")); Config conf = new Config(); conf.setDebug(false); // 本地模式// LocalCluster cluster = new LocalCluster(); // cluster.submitTopology("rolling-demo", conf, builder.createTopology()); //遠程啟動 conf.setNumWorkers(20); conf.setMaxSpoutPending(5000); try {StormSubmitter.submitTopology("rolling-demo", conf, builder.createTopology());} catch (AlreadyAliveException e) {e.printStackTrace();} catch (InvalidTopologyException e) {e.printStackTrace();} //結束// cluster.killTopology("rolling-demo");// cluster.shutdown();}}
發射器
package jvm.storm.starter.wordcount;import java.util.Map;import java.util.Random;import org.apache.log4j.Logger;import backtype.storm.spout.SpoutOutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.IRichSpout;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Values;/** * @author guiqiangl E-mail:larry.lv.word@gmail.com * @version 建立時間:2011-11-24 下午04:41:34 * */public class WordCountSpout implements IRichSpout {private static final long serialVersionUID = -620768344883063619L;public static Logger LOG = Logger.getLogger(WordCountSpout.class); SpoutOutputCollector _collector; public WordCountSpout(String string) { }public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; } public void close() { } public void nextTuple() { String[] words = new String[] {"nathan", "mike", "jackson", "golda", "bertels"}; Random rand = new Random(); String word = words[rand.nextInt(words.length)]; _collector.emit(new Values(word)); try {Thread.sleep(1000);} catch (InterruptedException e) {e.printStackTrace();} } public void ack(Object msgId) { } public void fail(Object msgId) { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); }@Overridepublic boolean isDistributed() {return false;}}
單詞拆分:
package jvm.storm.starter.wordcount;import java.util.Map;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.IRichBolt;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;/** * @author guiqiangl E-mail:larry.lv.word@gmail.com * @version 建立時間:2011-11-24 下午04:48:29 * */public class SplitSentence implements IRichBolt {private static final long serialVersionUID = -424523368294777576L;OutputCollector _collector; public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; } public void execute(Tuple tuple) { String sentence = tuple.getString(0); for(String word: sentence.split(" ")) { _collector.emit(tuple, new Values(word));//anchoring //_collector.emit(new Values(word));//unanchoring } _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); }}
計數:
package jvm.storm.starter.wordcount;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.IRichBolt;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;/** * @author guiqiangl E-mail:larry.lv.word@gmail.com * @version 建立時間:2011-11-24 下午04:56:13 * */public class WordCount implements IRichBolt {private static final long serialVersionUID = -6706714875516091987L;public Map<String, Integer> counterMap = new HashMap<String, Integer>();OutputCollector _collector; BufferedWriter output = null; public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; try { output = new BufferedWriter(new FileWriter("/home/hadoop/案頭/wordcount.txt" , true));} catch (IOException e) {e.printStackTrace();try {output.close();} catch (IOException e1) {e1.printStackTrace();}} } public void execute(Tuple tuple) { String sentence = tuple.getString(0); Integer count = counterMap.get(sentence); if(count == null){ count = 0; } count ++; counterMap.put(sentence, count); Iterator<String> iterator = counterMap.keySet().iterator(); while(iterator.hasNext()){ String next = iterator.next(); try { System.out.print(next + ":" + counterMap.get(next) + " ");output.write(next + ":" + counterMap.get(next) + " ");output.flush();} catch (IOException e) {e.printStackTrace();try {output.close();} catch (IOException e1) {e1.printStackTrace();}} } System.out.println(); _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); }}