標籤:大資料 Kafka Java
[TOC]
Kafka筆記整理(二):Kafka Java API使用
下面的測試代碼使用的都是下面的topic:
$ kafka-topics.sh --describe hadoop --zookeeper uplooking01:2181,uplooking02:2181,uplooking03:2181Topic:hadoop PartitionCount:3 ReplicationFactor:3 Configs: Topic: hadoop Partition: 0 Leader: 103 Replicas: 103,101,102 Isr: 103,101,102 Topic: hadoop Partition: 1 Leader: 101 Replicas: 101,102,103 Isr: 101,102,103 Topic: hadoop Partition: 2 Leader: 102 Replicas: 102,103,101 Isr: 102,103,101
Kafka Java API之producer
關於producer API的使用說明,可以查看org.apache.kafka.clients.producer.KafkaProducer
這個類的代碼注釋,有非常詳細的說明,下面就直接給出程式碼及測試。
程式碼KafkaProducerOps.java
package com.uplooking.bigdata.kafka.producer;import com.uplooking.bigdata.kafka.constants.Constants;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.Producer;import org.apache.kafka.clients.producer.ProducerRecord;import java.io.IOException;import java.io.InputStream;import java.util.Properties;import java.util.Random;/** * 通過這個KafkaProducerOps向Kafka topic中生產相關的資料 * <p> * Producer */public class KafkaProducerOps { public static void main(String[] args) throws IOException { /** * 專門載入設定檔 * 設定檔的格式: * key=value * * 在代碼中要盡量減少寫入程式碼 * 不要將代碼寫死,要可配置化 */ Properties properties = new Properties(); InputStream in = KafkaProducerOps.class.getClassLoader().getResourceAsStream("producer.properties"); properties.load(in); /** * 兩個泛型參數 * 第一個泛型參數:指的就是kafka中一條記錄key的類型 * 第二個泛型參數:指的就是kafka中一條記錄value的類型 */ String[] girls = new String[]{"姚慧瑩", "劉向前", "周 新", "楊柳"}; Producer<String, String> producer = new KafkaProducer<String, String>(properties); String topic = properties.getProperty(Constants.KAFKA_PRODUCER_TOPIC); String key = "1"; String value = "今天的姑娘們很美"; ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>(topic, key, value); producer.send(producerRecord); producer.close(); }}
Constants.java
package com.uplooking.bigdata.kafka.constants;public interface Constants { /** * 生產的key對應的常量 */ String KAFKA_PRODUCER_TOPIC = "producer.topic";}
producer.properties
############################# Producer Basics ############################## list of brokers used for bootstrapping knowledge about the rest of the cluster# format: host1:port1,host2:port2 ...bootstrap.servers=uplooking01:9092,uplooking02:9092,uplooking03:9092# specify the compression codec for all data generated: none, gzip, snappy, lz4compression.type=none# name of the partitioner class for partitioning events; default partition spreads data randomly# partitioner.class=# the maximum amount of time the client will wait for the response of a request#request.timeout.ms=# how long `KafkaProducer.send` and `KafkaProducer.partitionsFor` will block for#max.block.ms=# the producer will wait for up to the given delay to allow other records to be sent so that the sends can be batched together#linger.ms=# the maximum size of a request in bytes#max.request.size=# the default batch size in bytes when batching multiple records sent to a partition#batch.size=# the total bytes of memory the producer can use to buffer records waiting to be sent to the server#buffer.memory=#####設定自訂的topicproducer.topic=hadoopkey.serializer=org.apache.kafka.common.serialization.StringSerializervalue.serializer=org.apache.kafka.common.serialization.StringSerializer
其實這個設定檔就是kafka conf目錄下的設定檔,只是這裡要做相應的修改,關於每個欄位的含義,可以查看org.apache.kafka.clients.producer.KafkaProducer
這個類的代碼注釋。
測試
在終端中啟動消費者監聽topic的訊息:
[[email protected] ~]$ kafka-console-consumer.sh --topic hadoop --zookeeper uplooking01:2181
然後執行生產者程式,再查看終端輸出:
[[email protected] ~]$ kafka-console-consumer.sh --topic hadoop --zookeeper uplooking01:2181 今天的姑娘們很美
Kafka Java API之consumer程式碼KafkaConsumerOps.java
package com.uplooking.bigdata.kafka.consumer;import org.apache.kafka.clients.consumer.Consumer;import org.apache.kafka.clients.consumer.ConsumerRecord;import org.apache.kafka.clients.consumer.ConsumerRecords;import org.apache.kafka.clients.consumer.KafkaConsumer;import java.io.IOException;import java.io.InputStream;import java.util.Arrays;import java.util.Collection;import java.util.Properties;public class KafkaConsumerOps { public static void main(String[] args) throws IOException { Properties properties = new Properties(); InputStream in = KafkaConsumerOps.class.getClassLoader().getResourceAsStream("consumer.properties"); properties.load(in); Consumer<String, String> consumer = new KafkaConsumer<String, String>(properties); Collection<String> topics = Arrays.asList("hadoop"); // 消費者訂閱topic consumer.subscribe(topics); ConsumerRecords<String, String> consumerRecords = null; while (true) { // 接下來就要從topic中拉取資料 consumerRecords = consumer.poll(1000); // 遍曆每一條記錄 for (ConsumerRecord consumerRecord : consumerRecords) { long offset = consumerRecord.offset(); int partition = consumerRecord.partition(); Object key = consumerRecord.key(); Object value = consumerRecord.value(); System.out.format("%d\t%d\t%s\t%s\n", offset, partition, key, value); } } }}
consumer.properties
# Zookeeper connection string# comma separated host:port pairs, each corresponding to a zk# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"zookeeper.connect= uplooking01:2181,uplooking02:2181,uplooking03:2181bootstrap.servers=uplooking01:9092,uplooking02:9092,uplooking03:9092# timeout in ms for connecting to zookeeperzookeeper.connection.timeout.ms=6000#consumer group idgroup.id=test-consumer-group#consumer timeout#consumer.timeout.ms=5000key.deserializer=org.apache.kafka.common.serialization.StringDeserializervalue.deserializer=org.apache.kafka.common.serialization.StringDeserializer
測試
先執行消費者的代碼,然後再執行生產者的代碼,在消費者終端可以看到如下輸出:
2 0 1 今天的姑娘們很美(分別是:offset partition key value)
Kafka Java API之partition
可以通過自訂partitioner來決定我們的訊息應該存到哪個partition上,只需要在我們的代碼上實現Partitioner介面即可。
程式碼MyKafkaPartitioner.java
package com.uplooking.bigdata.kafka.partitioner;import org.apache.kafka.clients.producer.Partitioner;import org.apache.kafka.common.Cluster;import java.util.Map;import java.util.Random;/** * 建立自訂的分區,根據資料的key來進行劃分 * <p> * 可以根據key或者value的hashCode * 還可以根據自己業務上的定義將資料分散在不同的分區中 * 需求: * 根據使用者輸入的key的hashCode值和partition個數求模 */public class MyKafkaPartitioner implements Partitioner { public void configure(Map<String, ?> configs) { } /** * 根據給定的資料設定相關的分區 * * @param topic 主題名稱 * @param key key * @param keyBytes 序列化之後的key * @param value value * @param valueBytes 序列化之後的value * @param cluster 當前叢集的中繼資料資訊 */ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) { Integer partitionNums = cluster.partitionCountForTopic(topic); int targetPartition = -1; if (key == null || keyBytes == null) { targetPartition = new Random().nextInt(10000) % partitionNums; } else { int hashCode = key.hashCode(); targetPartition = hashCode % partitionNums; System.out.println("key: " + key + ", value: " + value + ", hashCode: " + hashCode + ", partition: " + targetPartition); } return targetPartition; } public void close() { }}
KafkaProducerOps.java
package com.uplooking.bigdata.kafka.producer;import com.uplooking.bigdata.kafka.constants.Constants;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.Producer;import org.apache.kafka.clients.producer.ProducerRecord;import java.io.IOException;import java.io.InputStream;import java.util.Properties;import java.util.Random;/** * 通過這個KafkaProducerOps向Kafka topic中生產相關的資料 * <p> * Producer */public class KafkaProducerOps { public static void main(String[] args) throws IOException { /** * 專門載入設定檔 * 設定檔的格式: * key=value * * 在代碼中要盡量減少寫入程式碼 * 不要將代碼寫死,要可配置化 */ Properties properties = new Properties(); InputStream in = KafkaProducerOps.class.getClassLoader().getResourceAsStream("producer.properties"); properties.load(in); /** * 兩個泛型參數 * 第一個泛型參數:指的就是kafka中一條記錄key的類型 * 第二個泛型參數:指的就是kafka中一條記錄value的類型 */ String[] girls = new String[]{"姚慧瑩", "劉向前", "周 新", "楊柳"}; Producer<String, String> producer = new KafkaProducer<String, String>(properties); Random random = new Random(); int start = 1; for (int i = start; i <= start + 9; i++) { String topic = properties.getProperty(Constants.KAFKA_PRODUCER_TOPIC); String key = i + ""; String value = "今天的<--" + girls[random.nextInt(girls.length)] + "-->很美很美哦~"; ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>(topic, key, value); producer.send(producerRecord); } producer.close(); }}
繼續使用前面的消費者的代碼,同時需要在producer.properties中指定我們定義的partitioner,如下:
partitioner.class=com.uplooking.bigdata.kafka.partitioner.MyKafkaPartitioner
測試
先執行消費者代碼,然後再執行生產者代碼,查看終端輸出。
生產者終端輸出(主要是自訂partitioner中的輸出):
key: 1, value: 今天的<--劉向前-->很美很美哦~, hashCode: 49, partition: 1key: 2, value: 今天的<--楊柳-->很美很美哦~, hashCode: 50, partition: 2key: 3, value: 今天的<--姚慧瑩-->很美很美哦~, hashCode: 51, partition: 0key: 4, value: 今天的<--周 新-->很美很美哦~, hashCode: 52, partition: 1key: 5, value: 今天的<--劉向前-->很美很美哦~, hashCode: 53, partition: 2key: 6, value: 今天的<--周 新-->很美很美哦~, hashCode: 54, partition: 0key: 7, value: 今天的<--周 新-->很美很美哦~, hashCode: 55, partition: 1key: 8, value: 今天的<--劉向前-->很美很美哦~, hashCode: 56, partition: 2key: 9, value: 今天的<--楊柳-->很美很美哦~, hashCode: 57, partition: 0key: 10, value: 今天的<--姚慧瑩-->很美很美哦~, hashCode: 1567, partition: 1
消費者終端輸出:
3 0 3 今天的<--姚慧瑩-->很美很美哦~4 0 6 今天的<--周 新-->很美很美哦~5 0 9 今天的<--楊柳-->很美很美哦~0 2 2 今天的<--楊柳-->很美很美哦~1 2 5 今天的<--劉向前-->很美很美哦~2 2 8 今天的<--劉向前-->很美很美哦~1 1 1 今天的<--劉向前-->很美很美哦~2 1 4 今天的<--周 新-->很美很美哦~3 1 7 今天的<--周 新-->很美很美哦~4 1 10 今天的<--姚慧瑩-->很美很美哦~(分別是:offset partition key value)
Kafka筆記整理(二):Kafka Java API使用