Use Apache Flume to read the JMS Message Queuing message and write the message to the Hdfs,flume agent configuration as follows:
Flume-agent.conf
#name the components in this agent
agenthdfs.sources = Jms_source
Agenthdfs.sinks = Hdfs_sink
Agenthdfs.channels = Mem_channel
# Describe/configure The source
AgentHdfs.sources.jms_source.type = JMS
# Bind to all interfaces
AgentHdfs.sources.jms_source.initialContextFactory = Org.apache.activemq.jndi.ActiveMQInitialContextFactory
AgentHdfs.sources.jms_source.connectionFactory = ConnectionFactory
AgentHdfs.sources.jms_source.destinationName = business_data #AMQ queue
AgentHdfs.sources.jms_source.providerURL = tcp://hadoop-master:61616
AgentHdfs.sources.jms_source.destinationType = QUEUE
# Describe The sink
AgentHdfs.sinks.hdfs_sink.type = HDFs
AgentHdfs.sinks.hdfs_sink.hdfs.path hdfs://hadoop-master/data/flume/%y-%m-%d/%h
AgentHdfs.sinks.hdfs_sink.hdfs.filePrefix =%{hostname}/events-
AgentHdfs.sinks.hdfs_sink.hdfs.maxOpenFiles = 5000
Agenthdfs.sinks.hdfs_sink.hdfs.batchsize= 500
AgentHdfs.sinks.hdfs_sink.hdfs.fileType = DataStream
AgentHdfs.sinks.hdfs_sink.hdfs.writeFormat =text
AgentHdfs.sinks.hdfs_sink.hdfs.rollSize = 0
AgentHdfs.sinks.hdfs_sink.hdfs.rollCount = 1000000
AgentHdfs.sinks.hdfs_sink.hdfs.rollInterval = 600
AgentHdfs.sinks.hdfs_sink.hdfs.useLocalTimeStamp = True
# Use a channel which buffers events in memory
AgentHdfs.channels.mem_channel.type = Memory
agentHdfs.channels.mem_channel.capacity = 1000
agentHdfs.channels.mem_channel.transactionCapacity = 100
# Bind the source and sink to the channel
AgentHdfs.sources.jms_source.channels = Mem_channel
AgentHdfs.sinks.hdfs_sink.channel = Mem_channel
Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.
Flume reads the JMS Message Queuing message and writes the message to HDFs