HadoopMapRduce rewrite DBOutputFormat to update mysql database _ MySQL

Source: Internet
Author: User
HadoopMapRduce override DBOutputFormat update mysql database written in the http://blog.csdn.net/sunflower_cao/article/details/28266939 can be inherited by Writable, DBWritable implementation in the reduce process to write the results into the mysql database, however, there has always been a problem that only insert cannot update existing data, as a result, the data obtained by different mapreduce programs can only be inserted into different databases. when using the data, you need to create a view or use complex SQL statements to query the data. Today, we have investigated, DBOutputFormat can be rewritten.

Code:

TblsWritable. java

Import java. io. dataInput; import java. io. dataOutput; import java. io. IOException; import java. SQL. preparedStatement; import java. SQL. resultSet; import java. SQL. SQLException; import org. apache. hadoop. io. writable; import org. apache. hadoop. mapreduce. lib. db. DBWritable;/*** rewrite DBWritable ** @ author caozw TblsWritable write data to mysql */public class TblsWritable implements Writable, DBWritable {String initTime; String success; String new_ios_user; string random; String iosUserTotal; String androidUserTotal; String userTotal; public TblsWritable () {} public TblsWritable (String initTime, String random, String new_ios_user, String random, String iosUserTotal, String androidUserTotal, string userTotal) {this. initTime = initTime; this. new_android_user = new_android_user; this. new_ios_user = new_ios_user; this. new_total_user = new_total_user; this. iosUserTotal = iosUserTotal; this. androidUserTotal = androidUserTotal; this. userTotal = userTotal;} @ Overridepublic void write (PreparedStatement statement) throws SQLException {statement. setString (1, this. new_android_user); statement. setString (2, this. new_ios_user); statement. setString (3, this. new_total_user); statement. setString (4, this. androidUserTotal); statement. setString (5, this. iosUserTotal); statement. setString (6, this. userTotal); statement. setString (7, this. initTime) ;}@ Overridepublic void readFields (ResultSet resultSet) throws SQLException {this. new_android_user = resultSet. getString (1); this. new_ios_user = resultSet. getString (2); this. new_total_user = resultSet. getString (3); this. androidUserTotal = resultSet. getString (4); this. iosUserTotal = resultSet. getString (5); this. userTotal = resultSet. getString (6); this. initTime = resultSet. getString (7) ;}@ Overridepublic void write (DataOutput out) throws IOException {out. writeUTF (this. new_android_user); out. writeUTF (this. new_ios_user); out. writeUTF (this. new_total_user); out. writeUTF (this. androidUserTotal); out. writeUTF (this. iosUserTotal); out. writeUTF (this. userTotal); out. writeUTF (this. initTime) ;}@ Overridepublic void readFields (DataInput in) throws IOException {this. new_android_user = in. readUTF (); this. new_ios_user = in. readUTF (); this. new_total_user = in. readUTF (); this. androidUserTotal = in. readUTF (); this. iosUserTotal = in. readUTF (); this. userTotal = in. readUTF (); this. initTime = in. readUTF ();} public String toString () {return new String (this. initTime + "" + this. new_android_user + "" + this. new_ios_user + "" + this. new_total_user + "" + this. androidUserTotal + "" + this. iosUserTotal + "" + this. userTotal );}}


WriteDataToMysql. java

Import java. io. IOException; import java. text. parseException; import java. text. simpleDateFormat; import java. util. date; import java. util. iterator; import org. apache. hadoop. conf. configuration; import org. apache. hadoop. fs. path; import org. apache. hadoop. io. intWritable; import org. apache. hadoop. io. longWritable; import org. apache. hadoop. io. text; import org. apache. hadoop. mapreduce. job; import org. apache. hadoop. mapreduce. mapper; import org. apache. hadoop. mapreduce. reducer; import org. apache. hadoop. mapreduce. lib. db. DBConfiguration; import org. apache. hadoop. mapreduce. lib. input. fileInputFormat; import org. apache. hadoop. mapreduce. lib. input. textInputFormat;/*** write mapreduce result data to mysql ** @ author caozw */public class WriteDataToMysql {public static String [] fieldNames = {"INITTIME ", "NEW_ANDROID_USER", "NEW_IOS_USER", "NEW_USER_TOTAL", "comment", "TOTAL_USER"}; public static String table = "USER_INFO_STATIC"; public static class ConnMysqlMapper extendsMapper
 
  
{Enum Counter {LINESKIP,} private final static IntWritable one = new IntWritable (1); private final static IntWritable zero = new IntWritable (0); public void map (LongWritable key, text value, Context context) throws IOException, InterruptedException {try {String line = value. toString (); String [] strings = line. split ("/t"); String initTime = strings [1]; String devType = strings [4]; if (initTime. length () = 19) {SimpleDateFormat sdf = new SimpleDateFormat ("yyyy-MM-dd HH: mm: ss"); Date date = sdf. parse (initTime); if ("1 ". equals (devType) {context. write (new Text (initTime. substring (0, 10), one); context. write (new Text (initTime. substring (0, 10), zero) ;}} else {// System. err. println (initTime); context. getCounter (Counter. LINESKIP ). increment (1);} //} catch (ArrayIndexOutOfBoundsException e) {context. getCounter (Counter. LINESKIP ). increment (1); return;} catch (ParseException e) {context. getCounter (Counter. LINESKIP ). increment (1); return ;}} public static class ConnMysqlReducer extendsReducer
  
   
{Static int iosUserTotal = 0; static int androidUserTotal = 0; public void reduce (Text key, Iterable
   
    
Values, Context context) throws IOException, InterruptedException {int android = 0; int ios = 0; int total = 0; for (Iterator
    
     
Itr = values. iterator (); itr. hasNext ();) {total ++; if (0 = itr. next (). get () {android ++;} else {ios ++;} iosUserTotal + = ios; androidUserTotal + = android;/** System. err. println (key. toString () + ":" + String. valueOf (android) * + ":" + String. valueOf (ios) + ":" + String. valueOf (total); */context. write (new TblsWritable (key. toString (), String. valueOf (android), String. valueOf (ios), String. valueOf (total ), String. valueOf (androidUserTotal), String. valueOf (iosUserTotal), String. valueOf (androidUserTotal + iosUserTotal), null) ;}} public static void main (String args []) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration (); DBConfiguration. configureDB (conf, "com. mysql. jdbc. driver "," jdbc: mysql: // 127.0.0.1: 3306/XINGXUNTONG "," hadoop "," 123456 "); Job job = ne W Job (conf, "test mysql connection"); job. setJarByClass (WriteDataToMysql. class); job. setMapperClass (ConnMysqlMapper. class); job. setReducerClass (ConnMysqlReducer. class); job. setOutputKeyClass (Text. class); job. setOutputValueClass (IntWritable. class); job. setSpeculativeExecution (false); job. setInputFormatClass (TextInputFormat. class); job. setOutputFormatClass (MysqlDBOutputFormat. class); // job. setoutputformatcia Ss (DBOutputFormat. class); FileInputFormat. addInputPath (job, new Path (args [0]); // DBOutputFormat. setOutput (job, "test", "initTime", "new_user_total"); // DBOutputFormat. setOutput (job, table, fieldNames); MysqlDBOutputFormat. setOutput (job, table, fieldNames); System. exit (job. waitForCompletion (true )? 0: 1 );}}
    
   
  
 


MysqlDBOutputFormat. java

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements.  See the NOTICE file * distributed with this work for additional information * regarding copyright ownership.  The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.classification.InterfaceStability;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.OutputCommitter;import org.apache.hadoop.mapreduce.OutputFormat;import org.apache.hadoop.mapreduce.RecordWriter;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.db.DBWritable;import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.StringUtils;/** * A OutputFormat that sends the reduce output to a SQL table. * 

* {@link MysqlDBOutputFormat} accepts pairs, where * key has a type extending DBWritable. Returned {@link RecordWriter} * writes only the key to the database with a batch SQL query. * */@InterfaceAudience.Public@InterfaceStability.Stablepublic class MysqlDBOutputFormat extends OutputFormat { private static final Log LOG = LogFactory.getLog(MysqlDBOutputFormat.class); public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {} public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); } /** * A RecordWriter that writes the reduce output to a SQL table */ @InterfaceStability.Evolving public class DBRecordWriter extends RecordWriter { private Connection connection; private PreparedStatement statement; public DBRecordWriter() throws SQLException { } public DBRecordWriter(Connection connection , PreparedStatement statement) throws SQLException { this.connection = connection; this.statement = statement; this.connection.setAutoCommit(false); } public Connection getConnection() { return connection; } public PreparedStatement getStatement() { return statement; } /** {@inheritDoc} */ public void close(TaskAttemptContext context) throws IOException { try { statement.executeBatch(); connection.commit(); } catch (SQLException e) { try { connection.rollback(); } catch (SQLException ex) { LOG.warn(StringUtils.stringifyException(ex)); } throw new IOException(e.getMessage()); } finally { try { statement.close(); connection.close(); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } } /** {@inheritDoc} */ public void write(K key, V value) throws IOException { try { key.write(statement); statement.addBatch(); } catch (SQLException e) { e.printStackTrace(); } } } /** * Constructs the query used as the prepared statement to insert data. * * @param table * the table to insert into * @param fieldNames * the fields to insert into. If field names are unknown, supply an * array of nulls. */ public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:" + fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;} } /** {@inheritDoc} */ public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } } /** * Initializes the reduce-part of the job with * the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldNames The field names in the table. */ public static void setOutput(Job job, String tableName, String... fieldNames) throws IOException { if(fieldNames.length > 0 && fieldNames[0] != null) { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldNames(fieldNames); } else { if (fieldNames.length > 0) { setOutput(job, tableName, fieldNames.length); } else { throw new IllegalArgumentException( "Field names must be greater than 0"); } } } /** * Initializes the reduce-part of the job * with the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldCount the number of fields in the table. */ public static void setOutput(Job job, String tableName, int fieldCount) throws IOException { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldCount(fieldCount); } private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(MysqlDBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }}


MysqlDBOutputFormat. java uses DBOutputFormat. java in the hadoop source code to overwrite the constructQuery method, and generates the update SQL statement when generating the SQL statement.

ConstructQuery in DBOutputFormat. java

public String constructQuery(String table, String[] fieldNames) {    if(fieldNames == null) {      throw new IllegalArgumentException("Field names may not be null");    }    StringBuilder query = new StringBuilder();    query.append("INSERT INTO ").append(table);    if (fieldNames.length > 0 && fieldNames[0] != null) {      query.append(" (");      for (int i = 0; i < fieldNames.length; i++) {        query.append(fieldNames[i]);        if (i != fieldNames.length - 1) {          query.append(",");        }      }      query.append(")");    }    query.append(" VALUES (");    for (int i = 0; i < fieldNames.length; i++) {      query.append("?");      if(i != fieldNames.length - 1) {        query.append(",");      }    }    query.append(");");    return query.toString();  }


After rewriting:

public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:" + fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;}  }


According to the java idea, I can use extends DBOutputFomat to update mysql, but I tried to survive or survive. now I will write down the time and try again.

Code for heavy-load mode (test failed) please kindly advise if you know what it is

import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;import org.apache.hadoop.mapreduce.lib.db.DBWritable;public class MysqlOutputFormat1
 
   extends DBOutputFormat
  
    {public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:"+fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;}}}
  
 


In addition, I feel that you can generate the merge statement through the merge method of the SQL statement to achieve if there is an update, there is no insert reference page http://en.wikipedia.org/wiki/Merge_%28SQL%29

--http://en.wikipedia.org/wiki/Merge_%28SQL%29 MERGE INTO tablename USING table_reference ON (condition)   WHEN MATCHED THEN   UPDATE SET column1 = value1 [, column2 = value2 ...]   WHEN NOT MATCHED THEN   INSERT (column1 [, column2 ...]) VALUES (value1 [, value2 ...

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.