The
Kafka can implement data collection and write to a variety of LOTP databases through custom sink, and the following example is written to a distributed K-v database Aerospike through custom source implementation data. 1. Custom sink code is as follows
Package KAFKA_SINK.ASD;
Import java.io.IOException;
Import java.net.ConnectException;
Import java.util.ArrayList;
Import java.util.List;
Import Java.util.Map;
Import java.util.Properties;
Import Org.apache.flume.Channel;
Import org.apache.flume.Constants;
Import Org.apache.flume.Context;
Import org.apache.flume.Event;
Import org.apache.flume.EventDeliveryException;
Import org.apache.flume.Transaction;
Import org.apache.flume.conf.Configurable;
Import Org.apache.flume.sink.AbstractSink;
Import com.aerospike.client.AerospikeException;
Import Com.aerospike.client.Bin;
Import Com.aerospike.client.Key;
Import Com.aerospike.client.Record;
Import com.aerospike.client.async.AsyncClient;
Import Com.aerospike.client.listener.RecordListener;
Import Com.aerospike.client.listener.WriteListener;
Import Com.aerospike.client.policy.WritePolicy;
Import Com.aerospike.client.async.AsyncClientPolicy;
Import Com.aerospike.client.policy.Policy;
Import Com.aerospike.client.Host;
Import Org.slf4j.Logger; ImpORT org.slf4j.LoggerFactory;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern;
public class Asdsink extends Abstractsink implements configurable {//private String myprop;
public static final String TOPIC_HDR = "TOPIC";
public static final String KEY_HDR = "KEY";
Private String mz_tag_topic;
Private Aerospikeclient asd_client;
Private String Asd_host1;
Private String Asd_host2;
private int asd_port;
Private String Asd_name_space = "cm";
Private String Mz_set_name;
Private String Mz_bin_name;
private int batchsize;//A transaction of the number of event, the overall submission of private writepolicy write_policy;
Private Policy Policy;
Async Read and Write private asyncclient asd_async_client;
Private Asyncclientpolicy Async_client_policy;
Private Boolean completed;
@Override public void Configure {//string MyProp = context.getstring ("MyProp", "DefaultValue"); Process the MyProp value (e.g. validation)//Store MyProp for later RetrievAl by Process () method//this.myprop = MyProp;
Asd_host1 = context.getstring ("Asd_host1", "127.0.0.1");
Asd_host2 = context.getstring ("Asd_host2", "127.0.0.1");
Asd_port = Context.getinteger ("Asd_port", 3000);
Set_name = context.getstring ("Set_name", "xxx");
Bin_name = context.getstring ("Bin_name", "xxx");
BatchSize = Context.getinteger ("BatchSize", 1000);
System.out.printf ("asd_host1:%s\n", asd_host1);
System.out.printf ("asd_host2:%s\n", asd_host2);
System.out.printf ("asd_port:%d\n", Asd_port);
System.out.printf ("set_name:%s\n", set_name);
System.out.printf ("bin_name:%s\n", bin_name);
System.out.printf ("batchsize:%d\n", batchsize); @Override public void Start () {//Initialize the "connection to" external repository (e.g. HDFS) that//
This Sink would forward Events to..
Host[] hosts = new host[] {new host (ASD_HOST1, 3000), new host (ASD_HOST2, 3000)};
Async_client_policy = new Asyncclientpolicy (); Async_client_policy.asyncmaxCommands = 300;
Async_client_policy.failifnotconnected = true;
Asd_async_client = new Asyncclient (Async_client_policy, hosts);
Policy = new policy ();
Policy.timeout = 20;
Write_policy = new Writepolicy ();
Write_policy.timeout = 20; @Override public void Stop () {//Disconnect from the external respository and does any//additional Cleanu
P (e.g. releasing resources or nulling-out//field values).
Asd_async_client.close ();
@Override public Status process () throws Eventdeliveryexception {status status = NULL;
Start transaction Channel ch = getchannel ();
Transaction Txn = Ch.gettransaction ();
Txn.begin ();
try {//This try clause includes whatever Channel operations you want to do long processedevent = 0;
for (; processedevent < batchsize; processedevent++) {Event event = Ch.take ();
Byte[] Eventbody; if (event!= null) {eventbody = event.geTbody ();
String line= new String (Eventbody, "UTF-8");
if (line.length () > 0) {string[] Key_tag = Line.split ("T");
if (key_tag.length = = 2) {String tmp_key = key_tag[0];
String Tmp_tag = key_tag[1];
Key As_key = new key (Asd_name_space, Set_name, Tmp_key);
Bin Ad_bin = new Bin (bin_name, Tmp_tag);
Try{completed = false;
Asd_async_client.get (policy,new readhandler (Asd_async_client,policy,write_policy, As_key, Ad_bin), As_key);
Waittillcomplete ();
catch (Throwable t) {System.out.println ("[error][process]" + t.tostring ());
}}}//Send the Event to the external repository.
Storesomedata (e);
status = Status.ready;
Txn.commit ();
catch (Throwable t) {txn.rollback (); Log exception, handle individual exceptions as needed status = Status.backoff;
Re-throw all Errors if (T instanceof Error) {System.out.println ("[error][process]" + t.tostring ());
throw (Error) t;
} txn.close ();
return status;
Private class Writehandler implements Writelistener {private final asyncclient client;
Private final writepolicy policy;
Private final key key;
Private Final Bin bin;
private int failcount = 0;
Public Writehandler (asyncclient client, Writepolicy policy, key key, Bin bin) {this.client = client;
This.policy = policy;
This.key = key;
This.bin = bin;
}//Write success callback.
public void onsuccess (key key) {try {//Write succeeded. catch (Exception e) {System.out.printf ("[Error][writehandler]failed to put:namespace=%s set=%s key=%s
Tion=%s\n ", Key.namespace, Key.setname, Key.userkey, E.getmessage ());
} notifycompleted (); } public void OnFailure (AEROSPIKEEXCEption e) {//retry up to 2.
if (++failcount <= 2) {Throwable t = e.getcause ();
Check for common socket errors. if (t!= null && (t instanceof connectexception | | t instanceof ioexception)) {//console.info ("
Retrying put: "+ Key.userkey);
try {client.put (policy, this, key, bin);
Return
The catch (Exception ex) {//Fall through to the error case. System.out.printf ("[Error][writehandler]failed to put:namespace=%s set=%s key=%s bin_name=% bin_value=%s exception=%s
\ n ", Key.namespace, Key.setname, key.userkey,bin.name,bin.value.tostring (), E.getmessage ());
}} notifycompleted ();
} Private class Readhandler implements Recordlistener {private final asyncclient client;
Private final Policy Policy; Private FInal Writepolicy Write_policy;
Private final key key;
Private Final Bin bin;
private int failcount = 0; Public Readhandler (asyncclient client, Policy policy,writepolicy Write_policy, key key, Bin bin) {this.client = client
;
This.policy = policy;
This.write_policy = Write_policy;
This.key = key;
This.bin = bin;
}//Read success callback. public void onsuccess (key key, record record) {try {//Read succeeded.
Now call write.
if (record!= null) {String str = record.getstring ("Mz_tag");
if (str!= null && str.length () > 0) {Pattern p101 = Pattern.compile ("(101\\d{4})");
Pattern p102 = Pattern.compile ("(102\\d{4})");
Pattern p103 = Pattern.compile ("(103\\d{4})");
String tags= "";
Matcher m101 = P101.matcher (str);
while (M701.find ()) {tags = = ("," + m701.group (1)); } Matcher m102 = P102.matcher (str);
while (M102.find ()) {tags = = ("," + m102.group (1));
} Matcher m103 = P103.matcher (str);
while (M103.find ()) {tags = = ("," + m103.group (1)); } if (Tags.length () > 0) {String value_new = (bin.value.toString ()
+ tags);
Bin New_bin = new Bin ("Mz_tag", value_new);
Client.put (write_policy,new writehandler (Client,write_policy, Key, New_bin), key,new_bin); else {client.put (write_policy,new writehandler (Client,write_policy, Key, bin), key
, bin); } else {client.put (write_policy,new writehandler (CLI
Ent,write_policy, Key, bin), Key,bin); } else {Client.put (write_pOlicy,new Writehandler (Client,write_policy, Key, bin), Key,bin); The catch (Exception e) {System.out.printf ("[Error][readhandler]failed to get:namespace=%s set=%s key
=%s exception=%s\n ", Key.namespace, Key.setname, Key.userkey, E.getmessage ());
}//Error callback.
public void OnFailure (Aerospikeexception e) {//retry up to 2.
if (++failcount <= 2) {Throwable t = e.getcause ();
Check for common socket errors. if (t!= null && (t instanceof connectexception | | t instanceof ioexception)) {//console.info ("
Retrying get: "+ Key.userkey);
try {client.get (policy, this, key);
Return
The catch (Exception ex) {//Fall through to the error case. System.out.printf ("[Error][readhandler]failed to get:namespace=%s set=%s key=%s exceptioN=%s\n ", Key.namespace, Key.setname, Key.userkey, E.getmessage ());
}} notifycompleted ();
Private synchronized void Waittillcomplete () {while (! Completed) {try {super.wait ();
' catch (Interruptedexception IE) {}} ' private synchronized void notifycompleted () {completed = true;
Super.notify (); }
}
2. Using MAVEN to package the custom sink into a jar package, the MAVEN XML configuration file is as follows
<?xml version= "1.0" encoding= "UTF-8"?> <project xmlns= "http://maven.apache.org/POM/4.0.0" HT Tp://www.w3.org/2001/xmlschema-instance "xsi:schemalocation=" http://maven.apache.org/POM/4.0.0 Http://maven.apac He.org/xsd/maven-4.0.0.xsd "> <modelVersion>4.0.0</modelVersion> <groupId> Org.apache.flume.flume-ng-sinks</groupid> <artifactid>flume-ng-aerospike-master-sink</artifactid > <name>flume Kafka sink</name> <version>1.0.0</version> <build> <plugins>
; <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactid>maven-jar-plugin& lt;/artifactid> </plugin> </plugins> </build> <dependencies> <dependency&
Gt <groupId>org.apache.flume</groupId> <artifactId>flume-ng-sdk</artifactId> <version& Gt;1.5.2</version> </DEPendency> <dependency> <groupId>org.apache.flume</groupId> <artifactid>flume-ng
-core</artifactid> <version>1.5.2</version> </dependency> <dependency> <groupId>org.apache.flume</groupId> <artifactId>flume-ng-configuration</artifactId> &L t;version>1.5.2</version> </dependency> <dependency> <groupid>org.slf4j</grou pid> <artifactId>slf4j-api</artifactId> <version>1.6.1</version> </dependenc
y> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.10</version> <scope>test</scope> </dependency> <de Pendency> <groupId>com.aerospike</groupId> <artifactid>aerospike-client</artifactid>
; <version>[3.0.0,) </version> </dependency> </dependencies> </project>
3. Put the packaged jar package into the flume Lib directory
4. Add a custom sink configuration file to the flume's conf directory
A1.sinks.k1_1.type = Kafka_sink.asd.Asdsink
a1.sinks.k1_1.asd_host1 = 127.0.0.1
a1.sinks.k1_1.asd_host2 = 192.168.0.1
a1.sinks.k1_1.asd_port = 3000
a1.sinks.k1_1.set_name = Test_set_name
a1.sinks.k1_1.bin_ Name = Test_bin_name
a1.sinks.k1_1.batchSize = 10000
Related information:
1. Flume official website Document: http://flume.apache.org/FlumeDeveloperGuide.html
2. Apache Maven Primer (top): http://blog.csdn.net/yanshu2012/article/details/50722088
3. Apache Maven Primer (Next): http://blog.csdn.net/yanshu2012/article/details/50722621