This paper introduces the method of flume reading Kafka data.
Code:
/*******************************************************************************
? * Licensed to the Apache software Foundation (ASF) under one
? * or more contributor license agreements.? See the NOTICE file
? * Distributed with the work for additional information
? * regarding copyright ownership.? The ASF licenses this file
? * to you under the Apache License, Version 2.0 (the
? * "License"); Except in compliance
? * with the License.? Obtain a copy of the License at
?* ?
? * http://www.apache.org/licenses/LICENSE-2.0
?* ?
? * unless required by applicable law or agreed to in writing,
? * Software distributed under the License is distributed in an
? * ' As is ' BASIS, without warranties OR CONDITIONS of any
? * KIND, either express or implied.? See the License for the
? * Specific language governing permissions and limitations
? * under the License.
?*******************************************************************************/
Package Org.apache.flume.source.kafka;
Import java.io.IOException;
Import Java.nio.ByteBuffer;
Import java.util.ArrayList;
Import Java.util.HashMap;
Import java.util.List;
Import Java.util.Map;
Import Kafka.consumer.ConsumerIterator;
Import Kafka.consumer.KafkaStream;
Import Kafka.javaapi.consumer.ConsumerConnector;
Import Kafka.message.Message;
Import Kafka.message.MessageAndMetadata;
Import org.apache.flume.*;
Import org.apache.flume.conf.Configurable;
Import org.apache.flume.conf.ConfigurationException;
Import org.apache.flume.event.SimpleEvent;
Import Org.apache.flume.source.AbstractSource;
Import Org.apache.flume.source.SyslogParser;
Import Org.slf4j.Logger;
Import Org.slf4j.LoggerFactory;
/**
? * A Source for Kafka which reads messages from Kafka. I Use the Company production environment
? * and its performance are good. Over 100k messages per second can is read from Kafka in one source.<p>
? * <tt>zookeeper.connect: </tt> The zookeeper IP Kafka use.<p>
? * <tt>topic: </tt> The topic to read from kafka.<p>
? * <tt>group.id: </tt> the groupid of consumer group.<p>
?*/
public class Kafkasource extends Abstractsource implements configurable, Pollablesource {
?? Private static final Logger log = Loggerfactory.getlogger (Kafkasource.class);
?? ? Private Consumerconnector consumer;
?? Private consumeriterator<byte[], byte[]> it;
?? ? private String Topic;
?? ?
?? ? public Status process () throws Eventdeliveryexception {
?? ??? ? list<event> eventlist = new arraylist<event> ();
??????? Messageandmetadata<byte[],byte[]> message;
?? ??? ? Event event;
?? ??? ? Map<string, string> headers;
??????? String strmessage;
??????? try {
?? ??? ??? ? if (It.hasnext ()) {
?? ??? ??? ??? ? message = It.next ();
?? ??? ??? ??? ? event = new SimpleEvent ();
?? ??? ??? ??? ? headers = new hashmap<string, string> ();
?? ??? ??? ??? Headers.put ("timestamp", String.valueof (System.currenttimemillis ()));
???????????????strmessage =? String.valueof (System.currenttimemillis ()) + "|" + New String (Message.message ());
??????????????? Log.debug ("Message: {}", strmessage);
??????????????? Event.setbody (Strmessage.getbytes ());
??????????????? Event.setbody (Message.message ());
?? ??? ??? ??? ? event.setheaders (headers);
?? ??? ??? ??? ? Eventlist.add (event);
?? ??? ??? ?}
?? ??? ??? ? Getchannelprocessor (). Processeventbatch (EventList);
?? ??? ??? ? return Status.ready;
?? ??? ?} catch (Exception e) {
?? ??? ??? ? log.error ("Kafkasource EXCEPTION, {}", E.getmessage ());
?? ??? ??? ? return Status.backoff;
?? ??? ?}
?? ?}
?? ? public void Configure (context context) {
?? ??? ? topic = context.getstring ("topic");
?? ??? ? if (topic = = NULL) {
?? ??? ??? ? throw new ConfigurationException ("Kafka topic must be specified.");
?? ??? ?}
?? ??? ? try {
?? ??? ??? ? this.consumer = Kafkasourceutil.getconsumer (context);
?? ??? ?} catch (IOException e) {
?? ??? ??? ? Log.error ("IOException occur, {}", E.getmessage ());
?? ??? ?} catch (Interruptedexception e) {
?? ??? ??? ? Log.error ("interruptedexception occur, {}", E.getmessage ());
?? ??? ?}
?? ??? ? map<string, integer> topiccountmap = new hashmap<string, integer> ();
?? ??? ? topiccountmap.put (Topic, New Integer (1));
?? ??? ? Map<string, list<kafkastream<byte[], byte[]>>> consumermap = Consumer.createmessagestreams ( TOPICCOUNTMAP);
?? ??? ? if (Consumermap = = null) {
?? ??? ??? ? throw new ConfigurationException ("Topiccountmap is null");
?? ??? ?}
?? ??? ? List<kafkastream<byte[], byte[]>> topiclist = consumermap.get (topic);
?? ??? if (topiclist = = NULL | | topiclist.isempty ()) {
?? ??? ??? ? throw new ConfigurationException ("topiclist is null or empty");
?? ??? ?}
?? ???? Kafkastream<byte[], byte[]> stream =? Topiclist.get (0);
?? ???? it = Stream.iterator ();
?? ?}
?? [Email protected]
?? ? public synchronized void Stop () {
?? ??? ? Consumer.shutdown ();
?? ??? ? Super.stop ();
?? ?}
}
/*******************************************************************************
? * Licensed to the Apache software Foundation (ASF) under one
? * or more contributor license agreements.? See the NOTICE file
? * Distributed with the work for additional information
? * regarding copyright ownership.? The ASF licenses this file
? * to you under the Apache License, Version 2.0 (the
? * "License"); Except in compliance
? * with the License.? Obtain a copy of the License at
?* ?
? * http://www.apache.org/licenses/LICENSE-2.0
?* ?
? * unless required by applicable law or agreed to in writing,
? * Software distributed under the License is distributed in an
? * ' As is ' BASIS, without warranties OR CONDITIONS of any
? * KIND, either express or implied.? See the License for the
? * Specific language governing permissions and limitations
? * under the License.
?*******************************************************************************/
Package Org.apache.flume.source.kafka;
Import java.io.IOException;
Import Java.util.Map;
Import java.util.Properties;
Import Com.google.common.collect.ImmutableMap;
Import Kafka.consumer.Consumer;
Import Kafka.consumer.ConsumerConfig;
Import Kafka.javaapi.consumer.ConsumerConnector;
Import Org.apache.flume.Context;
Import Org.slf4j.Logger;
Import Org.slf4j.LoggerFactory;
public class Kafkasourceutil {
?? Private static final Logger log = Loggerfactory.getlogger (Kafkasourceutil.class);
?? ? public static Properties Getkafkaconfigproperties (context context) {
?? ??? ? Log.info ("context={}", context.tostring ());
?? ??? ? Properties Props = new properties ();
??????? immutablemap<string, string> contextmap = Context.getparameters ();
??????? For (map.entry<string,string> Entry:contextMap.entrySet ()) {
??????????? String key = Entry.getkey ();
??????????? if (!key.equals ("type") &&!key.equals ("channel")) {
??????????????? Props.setproperty (Entry.getkey (), Entry.getvalue ());
??????????????? Log.info ("key={},value={}", Entry.getkey (), Entry.getvalue ());
??????????? }
??????? }
?? ??? ? return props;
?? ?}
?? ? public static Consumerconnector Getconsumer (context context) throws IOException, Interruptedexception {
?? ??? ? Consumerconfig consumerconfig = new Consumerconfig (getkafkaconfigproperties (context));
?? ??? ? Consumerconnector consumer = consumer.createjavaconsumerconnector (Consumerconfig);
?? ??? ? return consumer;
?? ?}
}
Configuration file: (/etc/flume/conf/flume-kafka-file.properties)
Agent_log.sources = kafka0
Agent_log.channels = Ch0
Agent_log.sinks = Sink0
Agent_log.sources.kafka0.channels = Ch0
Agent_log.sinks.sink0.channel = Ch0
Agent_log.sources.kafka0.type = Org.apache.flume.source.kafka.KafkaSource
Agent_log.sources.kafka0.zookeeper.connect = node3:2181,node4:2181,node5:2181
Agent_log.sources.kafka0.topic = Kkt-test-topic
Agent_log.sources.kafka0.group.id= Test
Agent_log.channels.ch0.type = Memory
Agent_log.channels.ch0.capacity = 2048
agent_log.channels.ch0.transactionCapacity = 1000
Agent_log.sinks.sink0.type=file_roll
Agent_log.sinks.sink0.sink.directory=/data/flumeng/data/test
agent_log.sinks.sink0.sink.rollinterval=300
Startup script:
sudo su? -l-s/bin/bash? Flume? -C '/usr/lib/flume/bin/flume-ng agent--conf/etc/flume/conf--conf-file/etc/flume/conf/flume-kafka-file.properties -name Agent_log-dflume.root.logger=info,console '
Note: The function of the red font is to increase the timestamp of the original data
??????????? Version number flume-1.4.0.2.1.1.0 + kafka2.8.0-0.8.0
??????????? References: Https://github.com/baniuyao/flume-kafka
???????????? Libraries used for compilation:
??????????? flume-ng-configuration-1.4.0.2.1.1.0-385
??????????? flume-ng-core-1.4.0.2.1.1.0-385
????????? ? flume-ng-sdk-1.4.0.2.1.1.0-385
??????????? flume-tools-1.4.0.2.1.1.0-385
??????????? guava-11.0.2
??????????? kafka_2.8.0-0.8.0
??????????? log4j-1.2.15
??????????? Scala-compiler
??????????? Scala-library
??????????? slf4j-api-1.6.1
??????????? slf4j-log4j12-1.6.1
??????????? zkclient-0.3
??????????? zookeeper-3.3.4
????????????????
Flume Reading Kafka data