Dual-embedded LSTM for QA match: dual embedding LSTM chat Matching Model, Dual-embeddedlstm

Source: Internet
Author: User

Dual-embedded LSTM for QA match: dual embedding LSTM chat Matching Model, Dual-embeddedlstm

First, go to the model structure,

For the LSTM model, rnn is generally used as the sequence model's encoding. There are a lot of paper about LSTM Google;

The following model is tested by myself and works well. It can be used for deep learning training of chatbots, as long as there is a corpus;

Embedding + bidirectional LSTM + full connected + max-pooling

Finally, the Code of the previous tensorflow section, the model implementation section

def model_imp(ques, ques_len, ans, ans_len, ans_f, ans_f_len, batch_size):    n_characters = 2    w_embed = tf.get_variable('w_embed', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))    w_embed_2 = tf.get_variable('w_embed_2', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))    # 1.2 --- rnn for question ---    ques_1 = tf.nn.embedding_lookup(w_embed, ques, name = 'ques_1')    ques_2 = tf.nn.embedding_lookup(w_embed_2, ques, name = 'ques_2')    # 1.2.0 --- calculate the distribution for the question ---     with tf.variable_scope('character') as vs_latent_character:        cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        output, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, tf.concat(2, [ques_1, ques_2]), sequence_length = ques_len, dtype = tf.float32)        character_information = tf.reduce_max(tf.concat(2, [output[0], output[1]]), 1)        T = tf.get_variable('T', shape = [hparams.rnn_dim*2, n_characters])        character_dist = tf.expand_dims(tf.nn.softmax(tf.matmul(character_information, T), -1), 1)        #character = tf.argmax(tf.matmul(character_information, T), 1)        #character_dist = tf.expand_dims(tf.one_hot(character, n_characters, on_value = 1.0, off_value = 0.0), 1)        print character_dist.get_shape()    # 1.2.1 -- Three different ques-ans combinations ---     with tf.variable_scope('rnn_ques') as vs_ques:        cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        output_ques, state_ques = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ques_1, sequence_length = ques_len, dtype = tf.float32)    with tf.variable_scope('rnn_ques2') as vs_ques:        cell_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        cell_r_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)        output_ques_2, state_ques = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ques_2, sequence_length = ques_len, dtype = tf.float32)            ques_output_1 = tf.reduce_max(tf.concat(2, [output_ques[0], output_ques[1]]), 1)    ques_output_2 = tf.reduce_max(tf.concat(2, [output_ques_2[0], output_ques_2[1]]), 1)    ques_output = tf.batch_matmul(character_dist ,tf.pack([ques_output_1, ques_output_2], axis = 1))    ques_output = tf.squeeze(ques_output, [1])     M = tf.get_variable('M', shape = [hparams.rnn_dim*2, hparams.rnn_dim*2], initializer = tf.random_uniform_initializer(-1.0, 1.0))    ques_output = tf.matmul(ques_output, M)    # 1.3 --- rnn for ans ---    ans_1 = tf.nn.embedding_lookup(w_embed, ans, name = 'ans')    ans_f_1 = tf.nn.embedding_lookup(w_embed, ans_f, name = 'ans_f')    ans_2 = tf.nn.embedding_lookup(w_embed_2, ans, name = 'ans')    ans_f_2 = tf.nn.embedding_lookup(w_embed_2, ans_f, name = 'ans_f')    with tf.variable_scope('rnn_ques', reuse=True) as vs_ans:            output_1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_1, sequence_length = ans_len,dtype = tf.float32)        output_f1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_f_1, sequence_length = ans_f_len,dtype = tf.float32)    with tf.variable_scope('rnn_ques2', reuse=True) as vs_ans:        output_2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_2, sequence_length = ans_len,dtype = tf.float32)        output_f2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_f_2, sequence_length = ans_f_len,dtype = tf.float32)    ans_output_1 = tf.reduce_max(tf.concat(2, [output_1[0], output_1[1]]), 1)    ans_output_2 = tf.reduce_max(tf.concat(2, [output_2[0], output_2[1]]), 1)    ans_output =tf.batch_matmul(character_dist ,tf.pack([ans_output_1, ans_output_2], axis=1))    ans_output = tf.squeeze(ans_output, [1])    ans_output_f1 = tf.reduce_max(tf.concat(2, [output_f1[0], output_f1[1]]), 1)    ans_output_f2 = tf.reduce_max(tf.concat(2, [output_f2[0], output_f2[1]]), 1)    ans_output_f =tf.batch_matmul(character_dist ,tf.pack([ans_output_f1, ans_output_f2], axis=1))    ans_output_f = tf.squeeze(ans_output_f, [1])    # 1.4 -----------------    the prediction part ---------------------------    ques_output = tf.nn.l2_normalize(ques_output, 1)    ans_output = tf.nn.l2_normalize(ans_output, 1)    ans_output_f = tf.nn.l2_normalize(ans_output_f, 1)    prob = [ques_output, ans_output]    simi = tf.reduce_sum(tf.mul(ques_output, ans_output), 1)    simi_f = tf.reduce_sum(tf.mul(ques_output, ans_output_f), 1)    loss = tf.maximum(0.0, 0.25 - simi + simi_f)    loss_ = tf.reduce_mean(loss)    return prob, loss_

 

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.