Java implementation Spark streaming and Kafka integration for streaming computing2017/6/26 added: Took over the search system, this six months have a lot of new experience, lazy change this vulgar text, we look at the comprehensive read this article New Boven to understand the following vulgar code, http://blog.csdn.net/yujishi2/article/details/73849237. Background: Online about spark streaming article or more, but most of the implementation of Scala, because our electric dealer real-time recommended projects to Java-oriented, stepped on some pits, wrote the Java version of the implementation of the code to compare the stream of consciousness, light spray, welcomed the discussion. Process: Spark streaming from Kafka read the user real-time click Data, filtered data from the Redis to read the commodity similarity matrix, read user history from DB, real-time computing interest, and write the results to a redis, for the API layer read display, Write HDFs a copy for off-line calculation accuracy rate recall. Add: It is understood that the large-scale real-time recommendation system, collaborative filtering is generally used to generate candidate sets, the calculation of interest reading will be CTR and other strategies Rerank replaced, in the calculateinterest call online rerank service ordering.
12/13 supplements: Recall unchanged, the current use of CTR estimates plus the ordering of rules, followed by LTR.
Nonsense less to say, on the code:
public class Main {static final String Zk_quorum = "*.*.*.*:2181,*.*.*.*:2181,*.*.*.*:2181/kafka";
Static final String GROUP = "Test-consumer-group";
Static final String topicss = "User_trace";
Static final String Num_thread = "64"; public static void Main (string[] args) {sparkconf sparkconf = new sparkconf (). Setappname ("Main.java.computingCent
Er "); Create the context with 2 seconds batch size//Every two seconds read Kafka Javastreamingcontext jssc = new Javastreami
Ngcontext (sparkconf, New Duration (2000));
int numthreads = Integer.parseint (Num_thread);
map<string, integer> topicmap = new hashmap<string, integer> ();
string[] topics = Topicss.split (",");
for (String topic:topics) {topicmap.put (topic, numthreads); } javapairreceiverinputdstream<string, string> messages = Kafkautils.createstream (JSSC, ZK_
QUORUM, GROUP, Topicmap); JavadstreaM<string> lines = Messages.map (new function<tuple2<string, String>, string> () {public Strin
G Call (tuple2<string, string> tuple2) {return tuple2._2 ();
}
}); javadstream<string> words = Lines.flatmap (new flatmapfunction<string, string> () {public iterable& Lt String> Call (String lines) {//KAFKA data format: "{\ topic\": \ "user_trace\", \ "partitionkey\": \ "0\", \ "Timestamp\": 14 71524044018,\ "data\": \ "0=163670589171371918%3a196846178238302087\", \ "logid\": \ "0\", \ "contenttype\": \ "
Application/x-www-form-urlencoded\ "}";
list<string> arr = new arraylist<string> ();
For (String s:lines.split ("")) {Map J = json.parseobject (s);
String S1 = "";
String s2 = "";
try {S1 = Urldecoder.decode (J.get ("Data"). ToString (), "UTF-8"); S2 = s1.split ("=") [1];
catch (Unsupportedencodingexception e) {e.printstacktrace ();
} arr.add (S2);
return arr;
}
});
javapairdstream<string, string> goodssimilaritylists = Words.filter (New function<string, Boolean> () { @Override Public Boolean Call (String s) throws Exception {//filter illegal data if (s)
. Split (":"). Length = = 2) {return true;
return false; }). Mappartitionstopair (new Pairflatmapfunction<iterator<string>, String, string> () {//This disposition partition to handle each pair @Override public iterable<tuple2<string, string>> call (iterator <String> s) throws Exception {arraylist<tuple2<string, string>> result = new ArrayList<tuple2<string, string>> ();
while (S.hasnext ()) {String x = S.next ();
String userId = X.split (":") [0];
String Goodsid = X.split (":") [1];
SYSTEM.OUT.PRINTLN (x);
Linkedhashmap<long, double> recommendmap = null; try {//This service reads the data from the Redis, carries on the real-time interest computation, the recommendation result writes Redis, for the API layer uses calculateinterests
Ervice calculateinterestservice = new Calculateinterestservice ();
try {recommendmap = Calculateinterestservice.calculateinterest (userId, Goodsid);
catch (Exception e) {e.printstacktrace ();
} String Text = "";
int count = 0;
For (Map.entry<long, double> entry:recommendMap.entrySet ()) { Text = text + Entry.getkey ();
if (count = = Recommendmap.size ()-1) {break;
Count = count + 1;
Text = text + "{/c}";
Text = System.currenttimemillis () + ":" + text;
Result.add (New tuple2<string, string> (userId, text));
catch (Exception e) {e.printstacktrace ();
} return result;
}
});
Goodssimilaritylists.foreachrdd (new function<javapairrdd<string, String>, void> () {@Override
Public Void Call (javapairrdd<string, string> rdd) throws Exception {//print Rdd, easy to debug
System.out.println (Rdd.collect ());
return null;
}
}); Javapairdstream<text, text> goodssimilarityliststext = Goodssimilaritylists.maptopair (New PairFunction< Tuple2<string, String>, Text, text> () {@Override public tuple2<text, text> call (tup Le2<string, string> Ori) throws Exception {//here to convert Tuple2 to org.apache.hadoop.io.Text format, use Saveashadoo
The Pfiles method writes to HDFs return new Tuple2 (new text (ori._1), new text (ori._2));
}
}); Written to HDFs goodssimilarityliststext.saveashadoopfiles ("/user/hadoop/recommend_list/rl", "123", Text.class, Text.clas
s, Sequencefileoutputformat.class);
Jssc.start ();
Jssc.awaittermination (); }
}
public class Calculateinterestservice {private String Dictkey = "greate_item_sim_2.0";
Private String recommendtable = "great_recommend_table_2.0";
Static final String Higo_base_url = "jdbc:mysql://*.*.*.*:3212/*";
Static final String Higo_base_user = "*";
Static final String Higo_base_pass = "*"; Public Linkedhashmap<long, Double> calculateinterest (String userId, String tracegoodsid) {linkedhashmap<
Long, double> sortedmap = new Linkedhashmap<long, double> ();
string[] Simgoods = Redishelper.getinstance (). Hget (Dictkey, Tracegoodsid). Split (",");
The user's history, should be stored in action:goodsId:timestamp format, to refactor, bi written in a separate datasheet hashmap<long, string> usertrace = null;
try {usertrace = Getusertrace (userId);
catch (ClassNotFoundException e) {e.printstacktrace ();
return sortedmap;
} hashmap<long, double> recommendmap = new Hashmap<long, double> (); string[] Simgoodsids = new String[simgoods.length];
for (int i = 0; i < simgoods.length i++) {Simgoodsids[i] = Simgoods[i].split (":") [0];
} list<string> Psimgoodsids = Redishelper.getinstance (). Hmget (Dictkey, simgoodsids);
Hashmap<long, string> predictsimgoodsids = new Hashmap<long, string> (); for (int i = 0; i < simgoodsids.length i++) {predictsimgoodsids.put (Long.parselong (simgoodsids[i)), psimg
Oodsids.get (i));
for (String item:simgoods) {//need optimised Double totalSum = 0.0;
Double sum = 0.0;
Long Origingoodsid = Long.parselong (Item.split (":") [0]); For (String predictGoods:predictSimGoodsIds.get (ORIGINGOODSID). Split (",")) {Long Goodsid = Long.parselo
Ng (Predictgoods.split (":") [0].tostring ());
Double sim = double.valueof (Predictgoods.split (":") [1].tostring ()); TotalSum = TotalSum + sim;
Double score = 0.0;
if (!usertrace.containskey (Goodsid)) {//todo user scoring matrix is too sparse, need SVD supplemental score, temporarily no score score defaults 0.1
Usertrace.put (Goodsid, "Default");
String action = Usertrace.get (GOODSID);
if (Action.equals ("click")) {score = 0.2; else if (action.equals ("Favorate")) {} else if (Action.equals ("Add_cart")) {Score
= 0.6;
else if (Action.equals ("Order")) {score = 0.8;
else if (action.equals ("Default")) {score = 0.1;
The//similarity dictionary should be stored in Goodsid:sim format, to refactor sum = sum + score * SIM;
Double Predictresult = sum/totalsum;
Recommendmap.put (Origingoodsid, Predictresult); }//sort recommend list LIST< Map.entry<long, double>> list = new Arraylist<map.entry<long, double>> (RecommendMap.entrySet ())
; Collections.sort (list, new Comparator<map.entry<long, double>> () {@Override public in T compare (Map.entry<long, double> O1, Map.entry<long, double> O2) {return O2.getvalue (). Compa
ReTo (O1.getvalue ());
}
});
Map.entry<long, double> tmpentry = null;
Iterator<map.entry<long, double>> iter = List.iterator ();
while (Iter.hasnext ()) {tmpentry = Iter.next ();
Sortedmap.put (Tmpentry.getkey (), Tmpentry.getvalue ());
} writerecommendlisttoredis (UserId, SortedMap);
return sortedmap; Private Hashmap<long, string> getusertrace (String userId) throws ClassNotFoundException {//sqlcontex
T sqlcontext = new Org.apache.spark.sql.SQLContext (SC); Class.forName ("Com.mysql.jdbC.driver ");
PreparedStatement stmt = null;
Connection conn = null;
Usertrace usertrace = new Usertrace ();
try {conn = drivermanager.getconnection (Higo_base_url, Higo_base_user, Higo_base_pass);
String sql = "SELECT * from T_pandora_goods_record where account_id=" + userId;
stmt = (preparedstatement) conn.preparestatement (SQL);
ResultSet rs = Stmt.executequery ();
while (Rs.next ()) {Usertrace.setid (Long.parselong (rs.getstring (1)));
Usertrace.setaccountid (Long.parselong (rs.getstring (2)));
Usertrace.setgoodsids (Rs.getstring (3));
Usertrace.setmtime (Rs.getstring (4));
} stmt.close ();
Conn.close ();
catch (Exception e) {e.printstacktrace ();
} string[] Goodsactiontimestamp = Usertrace.getgoodsids (). Split (","); Hashmap<long, string> HM = new Hashmap<loNg, string> ();
for (String ac:goodsactiontimestamp) {Long goodsid = Long.parselong (Ac.split (":") [0]);
String action = Ac.split (":") [1];
String timestamp = Ac.split (":") [2];
Hack the next step for bi to write user history behavior to the table, Action:goodsId:timestamp format, timestamp later will participate in the weight calculation String action = "click";
Hm.put (Goodsid, action);
return HM; } private void Writerecommendlisttoredis (String userId, Linkedhashmap<long, double> sortedmap) {string
Recommendlist = "";
int count = 0; For (Map.entry<long, double> entry:sortedMap.entrySet ()) {recommendlist = recommendlist + Entry.getke
Y ();
if (count = = Sortedmap.size ()-1) {break;
Count = count + 1;
Recommendlist = Recommendlist + ",";
} redishelper.getinstance (). Hset (Recommendtable, UserId, recommendlist); }
}