#receive the file
Flume-ng agent--conf conf--conf-file conf1.conf--name A1
Flume-ng agent--conf conf--conf-file conf2.conf--name hdfs-agent
Flume-ng agent--conf conf--conf-file conf3.conf--name file-agent
Conf1.conf
A1.sources = Tail
A1.channels = C1
A1.sinks = Avro-forward-sink
A1.channels.c1.type = File
#a1. channels.c1.capacity = 1000
#a1. channels.c1.transactionCapacity = 100
A1.sources.tail.type = Spooldir
A1.sources.tail.spoolDir =/path/to/folder/
A1.sinks.avro-forward-sink.type = Avro
A1.sinks.avro-forward-sink.hostname =HOSTNAME/IP
A1.sinks.avro-forward-sink.port = 12345
# Bind the source and sink to the channel
A1.sources.tail.channels = C1
A1.sinks.avro-forward-sink.channel = C1
Conf2.conf
hdfs-agent.sources= Avro-collect
Hdfs-agent.sinks = Hdfs-write
Hdfs-agent.channels=ch1
Hdfs-agent.channels.ch1.type = File
#hdfs-agent.channels.ch1.capacity = 1000
#hdfs-agent.channels.ch1.transactioncapacity = 100
Hdfs-agent.sources.avro-collect.type = Avro
Hdfs-agent.sources.avro-collect.bind = 10.59.123.69
Hdfs-agent.sources.avro-collect.port = 12345
Hdfs-agent.sinks.hdfs-write.type = HDFs
Hdfs-agent.sinks.hdfs-write.hdfs.path = hdfs://namenode/user/usera/test/
Hdfs-agent.sinks.hdfs-write.hdfs.writeformat=text
# Bind the source and sink to the channel
Hdfs-agent.sources.avro-collect.channels = Ch1
Hdfs-agent.sinks.hdfs-write.channel = Ch1
Start the conf2.conf first, then start conf1.conf agent.
Because The Avro source should start first then Avro Sink can connect to it.
#when use memory change, issue is:
Org.apache.flume.ChannelException:Unable to put batch on required channel:
ORG.APACHE.FLUME.CHANNEL.MEMORYCHANNEL{NAME:CH1}
#change to FileChannel
Ok...
#batched change the filename, remove. completed
For f in *;
Do
MV $f ${f%.completed*};
Done
Sqoop load data from hive to Oracle:
Sqoop export-d oraoop.disabled=true \
--connect "jdbc:oracle:thin:@ (description= (address= (protocol=tcp) (Host=hostname)) (Port=port ( service_name=sname))) "\
--username user_user \
--password pwd \
--table evan_test \
--fields-terminated-by ' \001 ' \
-M 1 \
--export-dir/path/to/folder/
# # # #table name should in upper case. Or else, report exception not found columns information.
Flume practices and Sqoop hive 2 Oracle