This article is a reference to some notes written by http://i.youku.com/deeplearning101, the great God's video.
This is a simple two classification problem, the purpose is to distinguish between cat and dog pictures, data set in this link: http://pan.baidu.com/s/1dFd8kmt Password: psor
First write a input_data.py this file, the purpose is to return
input_data.py
#coding =utf-8 import tensorflow as TF import numpy as NP import OS # file_dir = '/home/hjxu/pycharmprojects/tf_examples/
dog_cat/data/train/' # Get file path and label Def Get_files (File_dir): # File_dir: Folder path # return: Post-order pictures and tags cats = [] Label_cats = [] dogs = [] Label_dogs = [] # Load data path and write tag value for file in Os.listdir (file_dir): Name
= File.split ('. ') If name[0] = = ' Cat ': cats.append (file_dir + file) label_cats.append (0) else:d Ogs.append (file_dir + file) label_dogs.append (1) print ("There is%d Cats\nthere is%d dogs"% (len (CATS)
, Len (dogs)) # scrambled file Order image_list = Np.hstack ((cats, dogs)) Label_list = Np.hstack ((label_cats, label_dogs)) temp = Np.array ([Image_list, label_list]) temp = Temp.transpose () # transpose Np.random.shuffle (temp) Image_ List = List (temp[:, 0]) label_list = List (temp[:, 1]) label_list = [Int (i) for I in Label_list] return imaGe_list, label_list # img_list,label_list = Get_files (file_dir) # Generate batches of the same size def get_batch (image, label, Image_w, Image_h , batch_size, capacity): # image, Label: image and label to generate batch list # Image_w, Image_h: Width of picture # Batch_size: How many per batch
Picture # Capacity: Queue capacity # return: Batch # of images and labels converts the python.list type to a format that TF can recognize image = Tf.cast (image, Tf.string) Label = tf.cast (Label, tf.int32) # Generate Queue Input_queue = Tf.train.slice_input_producer ([image, label]) ima ge_contents = Tf.read_file (input_queue[0]) label = input_queue[1] image = Tf.image.decode_jpeg (image_contents, Cha
nnels=3) # Unified Picture Size # Video Method # image = Tf.image.resize_image_with_crop_or_pad (image, Image_w, Image_h) # my method Image = Tf.image.resize_images (image, [Image_h, Image_w], method=tf.image.resizemethod.nearest_neighbor) image = Tf.cast (Image, Tf.float32) # image = Tf.image.per_image_standardization (image) # normalized data image_batch, Label_batch = Tf.train.batch ([Image, Label], Batch_size=batch_size,
num_threads=64, # thread capacity=capacity) # This line is superfluous. # Label_batch = Tf.reshape (Label_batch, [batch_size]) return image_batch, Label_batch # import Matplotlib.pyplot as PLT # # batch_size = 2 # capacity = # img_w = 208 # img_h = 208 # # # image_list, label_list = Get_files (file_dir) # Image_batch, Label_batch = Get_batch (Image_list, Label_list, img_w, Img_h, batch_size, capacity) # # with TF. Session () as sess: # i = 0 # coord = tf.train.Coordinator () # threads = Tf.train.start_queue_runners (Coord=coo RD) # Try: # while not coord.should_stop () and I < 5: # img, label = Sess.run ([Image_batch, Lab El_batch]) # # for J in Np.arange (batch_size): # Print ("Label:%d"% label[j]) # Plt.imshow (Img[j,:,:,:]) # PLT. Show () # i + = 1 # except Tf.errors.OutOfRangeError: # print ("done!") # Finally: # COO Rd.request_stop () # Coord.join (Threads)
function Get_files (File_dir) is the ability to get all the training data (including pictures and labels) under the file_dir of a given path, which is returned as a list.
Since the training data of the first 12500 is a cat, after 12500 is a dog, if directly in this order to train, the training effect may be affected (I guess), so need to be broken in order to read the data when the chaos sequence or training when the chaotic sequence can be their own choice (video said in the chaotic sequence speed relatively fast). Because the picture and the label are one by one corresponding, so to be integrated into a disorderly sequence.
The Np.hstack () method is used to integrate the cat and dog pictures and labels together, so that the function of Image_list and Label_list,hstack (A, B) is to connect the A and For example, the original cats and dogs is a length of 12500 vector, the execution of hstack (cats, dogs), image_list length of 25000, the same label_list length is 25000. The corresponding image_list and label_list are then merged once. The size of temp is 2x25000, after transpose (into 25000x2), and then using the Np.random.shuffle () method to do the disorderly order.
Finally, the image_list and label_list column vectors are removed from the temp, respectively, as the return value of the function. It is important to note that because the data type inside the label_list is a string type, the addition of label_list = [Int (i) for I in Label_list] is a line that converts it to the int type.
model.py
#coding =utf-8 Import TensorFlow as tf def inference (images, batch_size, n_classes): With Tf.variable_scope (' Conv1 ')
As Scope:weights = tf.get_variable (' Weights ', shape=[3, 3, 3, 16], Dtype=tf.float32, Initializer=tf.truncated_normal_initializer (STDD
ev=0.1, dtype=tf.float32)) biases = tf.get_variable (' biases ', shape=[16],
Dtype=tf.float32, Initializer=tf.constant_initializer (0.1)) CONV = tf.nn.conv2d (images, weights, strides=[1, 1, 1, 1], padding= ' same ') Pre_activation = Tf.nn.bias_add (co NV, biases) CONV1 = Tf.nn.relu (pre_activation, Name=scope.name) with Tf.variable_scope (' POOLING1_LRN ') as SCO
Pe:pool1 = Tf.nn.max_pool (Conv1, Ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding= ' same ', name= ' pooling1 ') Norm1 = Tf.nn. LRN (Pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75, name= ' Norm1 ') with Tf.variable_scope (' Conv2 ') as Scope:weights = tf.get_variable (' Weights ', shape=[3, 3, 16, 16] , Dtype=tf.float32, Initializer=tf.tru
Ncated_normal_initializer (stddev=0.1, dtype=tf.float32)) biases = tf.get_variable (' biases ',
SHAPE=[16], Dtype=tf.float32, Initializer=tf.constant_initializer (0.1)) conv = tf.nn.conv2d (Norm1, Weights, strides=[1, 1, 1, 1], padding= ' same ') Pre_activation = Tf.nn.bias_add (conv, biases) Conv2 = Tf.nn.rel U (pre_activation, name= ' Conv2 ') # Pool2 and Norm2 with Tf.variable_scope (' Pooling2_lrn ') as Scope:norm2 = TF.NN.LRN (conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75, name= ' norm2 ') pool2 = Tf.nn.max_pool (Norm2, Ksize=[1, 3, 3, 1), Strides=[1, 1, 1, 1], padding= ' same ', name= ' pooling2 ') with Tf.variable_scope (' Local3 ') as Scope:reshape = Tf.reshape (Pool2, Shape=[batch_size,-1]) Dim = Reshape.get_shape () [1].value weights = tf.get_variable (' We
Ights ', Shape=[dim, [+], Dtype=tf.float32, Initializer=tf.truncated_normal_initializer (stddev=0.005, dtype=tf.float32)) biases = TF.G
Et_variable (' biases ', shape=[128], Dtype=tf.float32, Initializer=tf.constant_initializer (0.1)) Local3 = Tf.nn.relu (Tf.matmul (reshape, weights ) + biases, name=scope.name) # Local4 with Tf.variable_scope (' Local4 ') as Scope:weights = Tf.get_variabl
E (' weights ', shape=[128, Dtype=tf.float32, Initializer=tf.truncated_normal_initializer (stddev=0.005, dtype=tf.float32)) biases = tf.get_variable (' Biases
', shape=[128], Dtype=tf.float32, Initializer=tf.constant_initializer (0.1)) Local4 = Tf.nn.relu (Tf.matmul (LOCAL3, weights) + biases, name = ' Local4 ') # Softmax with Tf.variable_scope (' Softmax_linear ') as Scope:weights = tf.get_variable (' Softma
X_linear ', shape=[128, n_classes], Dtype=tf.float32, Initializer=tf.truncated_normal_initializer (stddev=0.005, dtype=tf.float32)) bias es = tf.get_variable (' biases ', shape=[n_classes], dtype=
Tf.float32, Initializer=tf.constant_initializer (0.1)) Softmax_linear = Tf.add (Tf.matmul (LOCAL4, weights), Biases, name= ' softmax_linear ') return softmax_linear def losses (Logits, labels): With Tf.variable_scope (' loss ') as Scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \ (logits=logits, labels=l Abels, name= ' xentropy_per_example ') loss = Tf.reduce_mean (cross_entropy, name= ' loss ') Tf.summary.scalar (SC
Ope.name + '/loss ', loss) return loss def trainning (loss, learning_rate): With Tf.name_scope (' Optimizer '): Optimizer = Tf.train.AdamOptimizer (learning_rate= learning_rate) Global_step = tf.
Variable (0, name= ' global_step ', trainable=false) Train_op = optimizer.minimize (loss, global_step= Global_step) return Train_op def evaluation (logits, labels): With Tf.variable_scope (' accuracy ') as Scope:correct = Tf.nn . In_top_k (Logits, labels, 1) correct = Tf.cast(correct, tf.float16) accuracy = Tf.reduce_mean (correct) tf.summary.scalar (scope.name + '/accuracy ', accur ACY) return accuracy
The function Get_batch () is used to batch pictures in batches, because it is not realistic or necessary to load all 25000 pictures into memory at once, so the pictures are divided into batches for training. The image and label parameters passed in here are the Image_list and label_list returned by the function Get_files (), which is the list type in Python, so it needs to be converted to the tensor format that TensorFlow can recognize.
Here to use the queue to get the data, because the queue operation involved in the thread, I do not understand this piece, so I only understand from a general point of view, want to system learning can go to the official document to see, here quoted a picture explanation.
I think it's generally understandable that every training session takes a batch from the queue to the network for training, and then a new picture is injected into the queue from the training library, which repeats itself. The queue acts as a training library to the data pipeline between the network models, and the training data is sent to the network through the queue. (I'm not sure that's correct, please.)
To continue with the program, we use Slice_input_producer () to create a queue that puts the image and the label in a list as arguments to the function. Then get the image and label from the queue, note that after reading the picture with Read_file (), it is decoded according to the image format. The training data in this routine is JPG format, so use Decode_jpeg () decoder, if it is other format, it is necessary to use other decoder, the specific can be queried from the official API. Note that the Decode data type is uint8, after which the model convolution layer conv2d () requires the input data to be of type float32, so the type conversion is required after the normalization step is deleted.
Because the size of the pictures in the training library is different, you also need to crop the pictures to the same size (img_w and Img_h). The video is to use the Resize_image_with_crop_or_pad () method to crop the picture, this method is from the center of the image to cut around, if the picture exceeds the specified size, the end will only be part of the middle area, maybe a dog left the torso, the head is gone, Training results with such images will certainly be affected. So here I changed a little bit, using resize_images () to scale the image, instead of clipping, using the Nearest_neighbor interpolation method (the results of several other interpolation methods of the image is a flower, the specific reasons do not know).
After zooming in the video also carried out the per_image_standardization (standardized) step, but after adding this step, the picture is spent, although each channel alone is normal, three channels together is wrong, deleted the standardization this step results normal, So here's a comment on the normalization step.
Then use the Tf.train.batch () method to get batch, there is another way is tf.train.shuffle_batch (), because we have been out of order, here with the normal batch () is good. In the video to get batch after the reshape () operation of the label, in my opinion, this step is redundant, from the batch () method to obtain the size has been in line with our requirements, commented out also has no effect, can normally get the picture.
Finally, the resulting image_batch and Label_batch are returned. Image_batch is a 4D tensor,[batch, width, height, channels],label_batch is a 1D tensor,[batch].
The following code can be used to test the success of getting a picture, because the picture has been converted to float32, so here imshow () out of the picture color will be a bit strange, because originally Imshow () is to display the uint8 type of data (gray value under the Uint8 type is 0~ 255, after turning to float32 will exceed this range, so the color is a bit strange), but this does not affect the training of the model behind.
training.py
#coding =utf-8 import OS import numpy as NP import TensorFlow as TF import input_data import Model n_classes = 2 #猫和狗 img_ W = 208 # Resize image, too big if the training time is long img_h = 208 Batch_size = Capacity = 10000 Max_step = 0.000 # generally greater than 10K learning_rate 1 # generally less than 0.0001 train_dir = '/home/hjxu/pycharmprojects/tf_examples/dog_cat/data/train/' logs_train_dir = '/home/hjxu/ Pycharmprojects/tf_examples/dog_cat/log/train/' train, Train_label = Input_data.get_files (train_dir) Train_batch,
Train_label_batch=input_data.get_batch (Train, Train_label,
Img_w, Img_h, Batch_size, capacity) Train_logits = Model.inference (Train_batch, Batch_size, n_classes) Train_loss = model.losses (Train_logits, Train_label_batch) Train_op = model.trainning (Train_loss, learning_rate) TRAIN__ACC = Model.evaluation (Train_logits, Train_label_batch) Summary_op = tf.summary.meRge_all () #这个是log汇总记录 #产生一个会话 sess = tf.
Session () #产生一个writer来写log文件 train_writer = Tf.summary.FileWriter (Logs_train_dir, sess.graph) #产生一个saver来存储训练好的模型
Saver = Tf.train.Saver () #所有节点初始化 Sess.run (Tf.global_variables_initializer ()) #队列监控 coord = Tf.train.Coordinator () Threads = Tf.train.start_queue_runners (sess=sess, Coord=coord) Try: #执行MAX_STEP步的训练, one batch for step in Np.aran
GE (max_step): If Coord.should_stop (): Break #启动以下操作节点, there is a question why train_logits is not open here.
_, Tra_loss, Tra_acc = Sess.run ([Train_op, Train_loss, TRAIN__ACC]) #每隔50步打印一次当前的loss以及acc, record log, write to writer If step% = = 0:print (' Step%d, train loss =%.2f, train accuracy =%.2f%% '% (step, Tra_loss, tra_acc*1 00.0)) Summary_str = Sess.run (summary_op) train_writer.add_summary (summary_str, Step) #每隔2 000 steps to save a trained model if step% = = 0 or (step + 1) = = Max_step:checkpoint_path = Os.path.join (logs_Train_dir, ' model.ckpt ') saver.save (Sess, Checkpoint_path, global_step=step) except Tf.errors.OutOfRangeError : Print (' Done training--epoch Limit reached ') finally:coord.request_stop ()
This time will be in the logs folder to produce some files, we can use Tensorboard to display, the next blog note Tensorboard Simple use
Reference Blog http://blog.csdn.net/qq_16137569/article/details/72802387
http://blog.csdn.net/xinyu3307/article/details/74943033