Introducing libraries, defining various parameters
From __future__ import Absolute_import to __future__ Import division from __future__ import print_function import OS im Port re import sys import tarfile from six.moves import urllib import tensorflow as tf import cifar10_input FLAGS = tf.
App.flags.FLAGS # Basic model parameters.
Tf.app.flags.DEFINE_integer (' Batch_size ', 128, "" "number of images to process in a batch." ") Tf.app.flags.DEFINE_string (' Data_dir ', '/tmp/cifar10_data ', "" "" Path to the CIFAR-10 data Direc
Tory. "" ")
Tf.app.flags.DEFINE_boolean (' use_fp16 ', False, "" "Train the Model using FP16." ")
# Global Constants describing the CIFAR-10 data set. Image_size = Cifar10_input. Image_size num_classes = cifar10_input. Num_classes Num_examples_per_epoch_for_train = cifar10_input. Num_examples_per_epoch_for_train num_examples_per_epoch_for_eval = cifar10_input.
Num_examples_per_epoch_for_eval # Constants describing the training process. Moving_avErage_decay = 0.9999 # The decay to use for the moving average.
Num_epochs_per_decay = 350.0 # epochs after which learning rate.
Learning_rate_decay_factor = 0.1 # LEARNING RATE decay FACTOR.
Initial_learning_rate = 0.1 # INITIAL LEARNING RATE. # If A model is trained with multiple GPUs, prefix all Op names with Tower_name # to differentiate the operations.
Note that this prefix is removed from the # names to the summaries when visualizing a model.
Tower_name = ' TOWER ' data_url = ' http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz '
Create a summary function to record histogram and scalar, note that scalar records the proportions of x 0 to measure the sparse sparsity of X.
def _activation_summary (x): "" "
Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measures the sparsity of activations.
Args:
x:tensor
Returns: Nothing ""
"
# removes ' tower_[0-9]/' from the '" ', " Training
# session. This is helps the clarity of presentation on Tensorboard.
Tensor_name = re.sub ('%s_[0-9]*/'% tower_name, ', X.op.name ')
tf.summary.histogram (tensor_name + '/activations '), x)
tf.summary.scalar (tensor_name + '/sparsity ',
tf.nn.zero_fraction (x))
Creates a variable build function that is stored in the cpu:0 On The input parameter initializer is used to specify how the variable is initialized, for example: Initializer=tf.constant_initializer (0.0)/tf.truncated_normal_initializer ( Stddev,dtype)).
def _variable_on_cpu (name, shape, initializer): "" "
Helper to create a variable stored on CPU memory.
Args:
name:name of the variable
shape:list of ints initializer:initializer for
variable
:
Variable Tensor ""
"with
tf.device ('/cpu:0 '):
dtype = tf.float16 if flags.use_fp16 else Tf.float32
var = tf.get_variable (name, shape, Initializer=initializer, Dtype=dtype) return
var
Create a variable build function that is different from the function above to generate a normal distribution variable that makes the standard deviation, in addition, if you want to weight the variable decay, you need to specify the parameters wd, and the weight decay item to use Tf.add_to_ Collection added to ' losses '.
def _variable_with_weight_decay (name, shape, StdDev, wd): "" "
Helper to create a initialized variable with weight deca Y.
The Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args: Name:name of the variable shape:list of ints stddev:standard deviation of
a truncated Gaussian
WD : Add L2loss weight decay multiplied by this float. If None, weight decay is isn't added for this
Variable.
Returns:
Variable Tensor
"" "
Dtype = tf.float16 if flags.use_fp16 else tf.float32
var = _variable_ ON_CPU (
name,
shape,
tf.truncated_normal_initializer (Stddev=stddev, Dtype=dtype))
#计算weight decay items and add them to the loss.
if WD is not None:
Weight_decay = tf.multiply (Tf.nn.l2_loss (VAR), WD, Name= ' Weight_loss ')
tf.add_to_ Collection (' Losses ', Weight_decay) return
var
Call cifar10_input.distorted_inputs to generate distorted images to augment the training set.
Def distorted_inputs (): "" "
construct distorted input for CIFAR training using the Reader OPS.
Returns:
images:images 4D tensor of [Batch_size, Image_size, Image_size, 3] size.
Labels:labels. 1D tensor of [batch_size] size.
Raises:
valueerror:if no Data_dir
"" "
If not flags.data_dir:
raise ValueError (' please supply a data_ Dir ')
Data_dir = Os.path.join (Flags.data_dir, ' cifar-10-batches-bin ')
images, labels = cifar10_ Input.distorted_inputs (Data_dir=data_dir,
batch_size=flags.batch_size)
if flags.use_fp16:
images = Tf.cast (images, tf.float16)
labels = tf.cast (labels, tf.float16) return
images, labels
If you do not want to use the distorted image, call cifar10_input.inputs for testing rather than training.
def inputs (Eval_data): "" "
construct input for CIFAR evaluation using the Reader OPS.
Args:
Eval_data:bool, indicating if one should use the train or eval data set.
Returns:
images:images 4D tensor of [Batch_size, Image_size, Image_size, 3] size.
Labels:labels. 1D tensor of [batch_size] size.
Raises:
valueerror:if no Data_dir
"" "
If not flags.data_dir:
raise ValueError (' please supply a data_ Dir ')
Data_dir = Os.path.join (Flags.data_dir, ' cifar-10-batches-bin ')
images, labels = cifar10_ Input.inputs (Eval_data=eval_data,
data_dir=data_dir,
batch_size=flags.batch_size)
if FLAGS.use_ FP16:
images = tf.cast (images, tf.float16)
labels = tf.cast (labels, tf.float16) return
images, labels
The following constructs the network model, the first layer convolution layer tf.nn.conv2d
def inference (Images): "" "Build the CIFAR-10 model.
Args:images:Images returned from distorted_inputs () or inputs ().
Returns:logits. "" "# We instantiate all variables using tf.get_variable () instead of # TF.
Variable () in order to share variables across multiple GPU training runs. # If We only ran the ' a single GPU ', we could simplify this function # by replacing all instances of Tf.get_vari Able () with TF.
Variable ().
# # CONV1 with Tf.variable_scope (' Conv1 ') as Scope:kernel = _variable_with_weight_decay (' Weights ',
Shape=[5, 5, 3, $], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d (images, kernel, [1, 1, 1, 1], padding= ' SAME ') biases = _variable_ On_cpu (' biases ', [+], Tf.constant_initializer (0.0)) Pre_activation = Tf.nn.bias_add (conv, biases) Conv1 = TF.NN.R Elu (Pre_activation, Name=scope.name) _activation_sUmmary (CONV1)
Pooling and normalization
# pool1
pool1 = Tf.nn.max_pool (Conv1, Ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding= ' SAME ', name= ' pool1 ')
# norm1
Norm1 = TF.NN.LRN (Pool1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name=
' Norm1 ')
The following took layer is similar to above, except that the weight parameter of the convolution layer in LOCAL4 is introduced into weight decay.
# Conv2 with Tf.variable_scope (' Conv2 ') as Scope:kernel = _variable_with_weight_decay (' weights '),
Shape=[5, 5, Stddev=5e-2, wd=0.0) conv = tf.nn.conv2d (Norm1, kernel, [1, 1, 1, 1], padding= ' SAME ') biases = _variable_on _cpu (' biases ', [+], Tf.constant_initializer (0.1)) Pre_activation = Tf.nn.bias_add (conv, biases) Conv2 = Tf.nn.rel U (pre_activation, name=scope.name) _activation_summary (conv2) # Norm2 Norm2 = TF.NN.LRN (Conv2, 4, bias=1.0, alpha
=0.001/9.0, beta=0.75, name= ' Norm2 ') # pool2 pool2 = Tf.nn.max_pool (Norm2, Ksize=[1, 3, 3, 1], Strides=[1, 2, 2, 1], padding= ' SAME ', name= ' pool2 ') # Local3 with Tf.variable_scope (' Local3 ')
As scope: # move everything to depth so we can perform a single matrix multiply.
Reshape = Tf.reshape (pool2, [Flags.batch_size,-1]) Dim = Reshape.get_shape () [1].value weights = _variable_with_weight_decay (' Weights ', Shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu (' biases ', [384], Tf.constant_initializer ( 0.1)) Local3 = Tf.nn.relu (Tf.matmul (reshape, weights) + biases, name=scope.name) _activation_summary (LOCAL3) #
Local4 with Tf.variable_scope (' Local4 ') as Scope:weights = _variable_with_weight_decay (' Weights ', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu (' biases ', [), Tf.consta Nt_initializer (0.1)) Local4 = Tf.nn.relu (Tf.matmul (LOCAL3, weights) + biases, name=scope.name) _activation_summary (LOCAL4)
Output Softmax layer, there is no direct use of the heap layer function because the following loss function uses the tf.nn.sparse_softmax_cross_entropy_with_logits, which computes the softmax, so the direct output should be as The tensor of the Softmax layer input.
With Tf.variable_scope (' softmax_linear ') as scope:
weights = _variable_with_weight_decay (' Weights ', [num_ CLASSES],
stddev=1/192.0, wd=0.0)
biases = _variable_on_cpu (' biases ', [num_classes],
tf.constant_ Initializer (0.0))
softmax_linear = Tf.add (Tf.matmul (LOCAL4, weights), biases, name=scope.name)
_activation_ Summary (softmax_linear) return
softmax_linear
Constructs the function that calculates the loss, uses the cross entropy as the loss function, simultaneously adds the parameter loss term which needs weight decay.
def loss (logits, labels): "" "Add L2loss to all trainable
.
ADD Summary for "Loss" and "Loss/avg".
Args:
logits:logits from Inference ().
Labels:labels from distorted_inputs or inputs (). 1-d tensor of
shape [batch_size]
Returns:
Loss tensor of type float.
"" " # Calculate The average cross entropy loss across the batch.
Labels = tf.cast (labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits (
labels =labels, Logits=logits, name= ' cross_entropy_per_example ')
Cross_entropy_mean = Tf.reduce_mean (Cross_entropy, Name= ' cross_entropy ')
tf.add_to_collection (' losses ', Cross_entropy_mean) # The total loss is defined as the
Cross entropy loss plus all of the weight
# decay terms (L2 loss).
Return Tf.add_n (Tf.get_collection (' losses '), Name= ' Total_loss ')
The following function is used to record the loss, and the tf.get_collection (' losses ') and Total_loss are treated with a sliding mean to record the smoothed value. This function returns the OP with the mean value.
def _add_loss_summaries (Total_loss): "" "" add summaries for losses in CIFAR-10 model.
Generates moving average for all losses and associated for summaries the visualizing.
Args:total_loss:Total loss from loss ().
Returns:loss_averages_op:op for generating moving averages of losses.
"" "# Compute the moving average of all individual losses and the total loss. Loss_averages = Tf.train.ExponentialMovingAverage (0.9, name= ' avg ') losses = tf.get_collection (' losses ') loss_averages _op = loss_averages.apply (losses + [Total_loss]) # Attach A scalar summary to all individual losses and the total loss;
Do the # Same for the averaged version of the losses. For L in losses + [Total_loss]: # Name each loss as ' (raw) ' and Name the moving average version of the loss # as T
He original loss name. Tf.summary.scalar (L.op.name + ' (raw) ', L) tf.summary.scalar (L.op.name, Loss_averages.average (L)) return Loss_avera Ges_op
The following function is used to train the model
Def train (Total_loss, Global_step): "" "
train CIFAR-10 model.
Create a optimizer and apply to all trainable variables. ADD moving
average for all trainable variables.
Args:
total_loss:total loss from loss ().
Global_step:integer Variable counting the number of training steps processed
.
Returns:
train_op:op for training.
"" " # Variables that affect learning rate.
Num_batches_per_epoch = num_examples_per_epoch_for_train/flags.batch_size
decay_steps = Int (num_batches_per_ Epoch * Num_epochs_per_decay)
Use Tf.train.exponential_decay to learning rate to decline.
# decay The learning rate exponentially based on the number of steps.
LR = Tf.train.exponential_decay (initial_learning_rate,
global_step,
decay_steps,
Learning_rate_decay _factor,
staircase=true)
tf.summary.scalar (' learning_rate ', LR) # Generate Moving averages of all
losses and associated summaries.
Loss_averages_op = _add_loss_summaries (Total_loss)
Tf.control_dependencies is used to set control dependencies, the OPS contained under ' with ' can only be executed after the OPS and VARs in the dependency list are executed and calculated. That is, calculate the loss-compute gradient-update parameters.
# Compute gradients.ly with
tf.control_dependencies ([Loss_averages_op]):
opt = Tf.train.GradientDescentOptimizer (LR)
grads = opt.compute_gradients (Total_loss)
# Apply gradients.
Apply_gradient_op = opt.apply_gradients (grads, Global_step=global_step)
Records the vars/grads and generates a variable value (variable_average.apply) that the shadow variable uses to store the sliding mean value.
# ADD histograms for trainable variables.
for Var in tf.trainable_variables ():
Tf.summary.histogram (Var.op.name, Var)
# Add histograms for gradients.
For Grad, Var in grads:
if grad isn't None:
tf.summary.histogram (var.op.name + '/gradients ', grad)
# Track th E moving averages of all trainable variables.
Variable_averages = Tf.train.ExponentialMovingAverage (
moving_average_decay, global_step)
Variables_ Averages_op = Variable_averages.apply (Tf.trainable_variables ())
Here Tf.no_op is actually an op that doesn't do anything, but because there are dependencies on it, you can use it to control the flow, using it as a train_op to trigger gradient calculations and parameter updates for training, but he doesn't do anything. Eventually the training function returns an OP for training (Apply_grad, Var_movave).
With Tf.control_dependencies ([Apply_gradient_op, Variables_averages_op]):
train_op = tf.no_op (name= ' train ') return
Train_op