1. Write Avro files to HDFs via Java
1 ImportJava.io.File;2 Importjava.io.IOException;3 ImportJava.io.OutputStream;4 ImportJava.nio.ByteBuffer;5 6 ImportOrg.apache.avro.Schema;7 Importorg.apache.avro.file.CodecFactory;8 ImportOrg.apache.avro.file.DataFileWriter;9 ImportOrg.apache.avro.generic.GenericData;Ten ImportOrg.apache.avro.generic.GenericDatumWriter; One ImportOrg.apache.avro.generic.GenericRecord; A Importorg.apache.commons.io.FileUtils; - Importorg.apache.hadoop.conf.Configuration; - ImportOrg.apache.hadoop.fs.FileSystem; the ImportOrg.apache.hadoop.fs.Path; - Importorg.apache.hadoop.io.IOUtils; - - Public classHdfsavrotest { + - Public Static FinalString Schema_json = "{\" type\ ": \" record\ ", \" name\ ": \" smallfilestest\ "," ++ "\" fields\ ": [" A+ "{\" name\ ": \" " at+ "username" -+ "\", \ "type\": \ "string\"}, " -+ "{\" name\ ": \" " -+ "Password" -+ "\", \ "type\": \ "string\"}]} "; - Public Static FinalSchema schema =NewSchema.parser (). Parse (Schema_json); in - Public Static voidWritetoavro (File srcpath, OutputStream outputstream) to throwsIOException { +datafilewriter<object> writer =NewDatafilewriter<object>( - NewGenericdatumwriter<object> ()). Setsyncinterval (100); the Writer.setcodec (Codecfactory.snappycodec ()); * writer.create (SCHEMA, outputstream); $ for(Object obj:FileUtils.listFiles (Srcpath,NULL,false)) {Panax NotoginsengFile File =(File) obj; -String filename =File.getabsolutepath (); the byteContent[] =Fileutils.readfiletobytearray (file); +Genericrecord record =NewGenericdata.record (SCHEMA); ARecord.put ("username", filename); theRecord.put ("Password", Bytebuffer.wrap (content)); + Writer.append (record); - } $Ioutils.cleanup (NULL, writer); $Ioutils.cleanup (NULL, outputstream); - } - the Public Static voidMain (string[] args)throwsException { -Configuration config =NewConfiguration ();WuyiFileSystem HDFs =filesystem.get (config); theFile SourceDir =NewFile (args[0]); -Path DestFile =NewPath (args[1]); WuOutputStream OS =hdfs.create (destfile); - Writetoavro (SourceDir, OS); About } $}
2. Java read Avro file on HDFs
1 Importjava.io.IOException;2 ImportJava.io.InputStream;3 4 ImportOrg.apache.avro.file.DataFileStream;5 ImportOrg.apache.avro.generic.GenericDatumReader;6 ImportOrg.apache.avro.generic.GenericRecord;7 Importorg.apache.hadoop.conf.Configuration;8 ImportOrg.apache.hadoop.fs.FileSystem;9 ImportOrg.apache.hadoop.fs.Path;Ten Importorg.apache.hadoop.io.IOUtils; One A Public classHdfsreadavro { - - the Public Static voidReadfromavro (InputStream is)throwsIOException { -datafilestream<object> reader =NewDatafilestream<object>(IS, - NewGenericdatumreader<object>()); - for(Object o:reader) { +Genericrecord r =(Genericrecord) o; -System.out.println (R.get ("username") + ":" +r.get ("password")); + } AIoutils.cleanup (NULL, is); atIoutils.cleanup (NULL, reader); - } - - Public Static voidMain (string[] args)throwsException { -Configuration config =NewConfiguration (); -FileSystem HDFs =filesystem.get (config); inPath DestFile =NewPath (args[0]); -InputStream is =Hdfs.open (destfile); to Readfromavro (IS); + } -}
Java read-write Avro file on HDFs