Dual-embedded LSTM for QA match: dual embedding LSTM chat Matching Model, Dual-embeddedlstm
First, go to the model structure,
For the LSTM model, rnn is generally used as the sequence model's encoding. There are a lot of paper about LSTM Google;
The following model is tested by myself and works well. It can be used for deep learning training of chatbots, as long as there is a corpus;
Embedding + bidirectional LSTM + full connected + max-pooling
Finally, the Code of the previous tensorflow section, the model implementation section
def model_imp(ques, ques_len, ans, ans_len, ans_f, ans_f_len, batch_size): n_characters = 2 w_embed = tf.get_variable('w_embed', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0)) w_embed_2 = tf.get_variable('w_embed_2', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0)) # 1.2 --- rnn for question --- ques_1 = tf.nn.embedding_lookup(w_embed, ques, name = 'ques_1') ques_2 = tf.nn.embedding_lookup(w_embed_2, ques, name = 'ques_2') # 1.2.0 --- calculate the distribution for the question --- with tf.variable_scope('character') as vs_latent_character: cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) output, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, tf.concat(2, [ques_1, ques_2]), sequence_length = ques_len, dtype = tf.float32) character_information = tf.reduce_max(tf.concat(2, [output[0], output[1]]), 1) T = tf.get_variable('T', shape = [hparams.rnn_dim*2, n_characters]) character_dist = tf.expand_dims(tf.nn.softmax(tf.matmul(character_information, T), -1), 1) #character = tf.argmax(tf.matmul(character_information, T), 1) #character_dist = tf.expand_dims(tf.one_hot(character, n_characters, on_value = 1.0, off_value = 0.0), 1) print character_dist.get_shape() # 1.2.1 -- Three different ques-ans combinations --- with tf.variable_scope('rnn_ques') as vs_ques: cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) output_ques, state_ques = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ques_1, sequence_length = ques_len, dtype = tf.float32) with tf.variable_scope('rnn_ques2') as vs_ques: cell_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) cell_r_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True) output_ques_2, state_ques = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ques_2, sequence_length = ques_len, dtype = tf.float32) ques_output_1 = tf.reduce_max(tf.concat(2, [output_ques[0], output_ques[1]]), 1) ques_output_2 = tf.reduce_max(tf.concat(2, [output_ques_2[0], output_ques_2[1]]), 1) ques_output = tf.batch_matmul(character_dist ,tf.pack([ques_output_1, ques_output_2], axis = 1)) ques_output = tf.squeeze(ques_output, [1]) M = tf.get_variable('M', shape = [hparams.rnn_dim*2, hparams.rnn_dim*2], initializer = tf.random_uniform_initializer(-1.0, 1.0)) ques_output = tf.matmul(ques_output, M) # 1.3 --- rnn for ans --- ans_1 = tf.nn.embedding_lookup(w_embed, ans, name = 'ans') ans_f_1 = tf.nn.embedding_lookup(w_embed, ans_f, name = 'ans_f') ans_2 = tf.nn.embedding_lookup(w_embed_2, ans, name = 'ans') ans_f_2 = tf.nn.embedding_lookup(w_embed_2, ans_f, name = 'ans_f') with tf.variable_scope('rnn_ques', reuse=True) as vs_ans: output_1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_1, sequence_length = ans_len,dtype = tf.float32) output_f1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_f_1, sequence_length = ans_f_len,dtype = tf.float32) with tf.variable_scope('rnn_ques2', reuse=True) as vs_ans: output_2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_2, sequence_length = ans_len,dtype = tf.float32) output_f2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_f_2, sequence_length = ans_f_len,dtype = tf.float32) ans_output_1 = tf.reduce_max(tf.concat(2, [output_1[0], output_1[1]]), 1) ans_output_2 = tf.reduce_max(tf.concat(2, [output_2[0], output_2[1]]), 1) ans_output =tf.batch_matmul(character_dist ,tf.pack([ans_output_1, ans_output_2], axis=1)) ans_output = tf.squeeze(ans_output, [1]) ans_output_f1 = tf.reduce_max(tf.concat(2, [output_f1[0], output_f1[1]]), 1) ans_output_f2 = tf.reduce_max(tf.concat(2, [output_f2[0], output_f2[1]]), 1) ans_output_f =tf.batch_matmul(character_dist ,tf.pack([ans_output_f1, ans_output_f2], axis=1)) ans_output_f = tf.squeeze(ans_output_f, [1]) # 1.4 ----------------- the prediction part --------------------------- ques_output = tf.nn.l2_normalize(ques_output, 1) ans_output = tf.nn.l2_normalize(ans_output, 1) ans_output_f = tf.nn.l2_normalize(ans_output_f, 1) prob = [ques_output, ans_output] simi = tf.reduce_sum(tf.mul(ques_output, ans_output), 1) simi_f = tf.reduce_sum(tf.mul(ques_output, ans_output_f), 1) loss = tf.maximum(0.0, 0.25 - simi + simi_f) loss_ = tf.reduce_mean(loss) return prob, loss_