1 Vgg Network Summary
The feeling is alex-net on the basis of the study of how to deepen the network to improve performance. The overall five-story convolution plus three-layer full-link, but the five-layer convolution will be pooling to split, and five-layer convolution attempt to overlay multilayer convolution together, and try to use a smaller core and increase the number of cores to improve the performance of the network, such as alex-net the size of the nucleus of 11x11x96 range, Vgg networks are generally used in 3x3 nuclei, but the number of her cores has improved a lot, with 3x3x256 ranging to improve performance. That is, by reducing the size of the filter, increase the number of layers to achieve the same effect.
Vgg model is much larger than alex-net, training out of the parameters of about 500m, and training time is long, fortunately there are training parameters can be used, such as vgg-16,vgg-19, and so on, these two effects are good, and can be downloaded on the Internet use. 2 Vgg Network model
A total of several types, from a to E of these types, from top to bottom are five-ply convolution plus 3-layer full link.
3 Implementation of VGG network
Source: Https://github.com/boyw165/tensorflow-vgg
Here you can train your own VGG model, you can also load the existing VGG model to classify the image, in which the code of the VGG19 model is as follows, it is very nice to write, put everything in this class.
Import NumPy as NP import TensorFlow as tf _vgg_mean = [103.939, 116.779, 123.68] class Vgg19: "" "A VGG-19 Ne
Twork implementation using TensorFlow Library.
The network takes an image of size 224x224 with RGB channels and returns category scores of size 1000.
The network configuration:-rgb:224x224x3-bgr:224x224x3-conv1:224x224x64-conv2:112x112x128
-conv3:56x56x256-conv4:28x28x512-conv5:14x14x512-fc6:25088 (=7x7x512) x4096-fc7:4096x4096
-fc8:4096x1000 "" "width = 224" The fixed WIDTH of the input image. "
HEIGHT = 224 "The fixed height of the input image."
CHANNELS = 3 "The fixed CHANNELS number of the input image."
Model = {} "the model storing the kernels, weights and biases."
Model_save_path = None "The model save path, especially for the training process." Model_save_freq = 0 "" The frequency to save the model in the training process. E.G. Save the model every iteration.
"" "Learning_rate = 0.05" learning rate for the gradient descent. " _inputrgb = None _inputbgr = None _inputnormalizedbgr = None _conv1_1 = None _conv1_2 = None _pool1 = None _conv2_1 = None _conv2_2 = None _pool2 = None _conv3_1 = None _conv3_2 = None _conv3_3 = N
One _conv3_4 = None _pool3 = None _conv4_1 = None _conv4_2 = None _conv4_3 = None _conv4_4 = None
_pool4 = None _conv5_1 = None _conv5_2 = None _conv5_3 = None _conv5_4 = None _pool5 = None _fc6 = None _relu6 = None _fc7 = None _relu7 = None _fc8 = None _preds = None "the predictions tensor, shape of [?,] "_loss = none _optimizer = None _train_labels = None" The train labels tensor
, a placeholder. " def __init__ (self, model=none, Model_save_path=none, Model_save_freq=0): "" ":p Aram Model:the model either for Back-propagation or:p Aram model_save_path:the MoD
El path for training process.
:p Aram Model_save_freq:save the model (in training process) every N iterations.
Forward-propagation.
"" "Self.model = Self._init_empty_model () If not model else model Self.model_save_path = Model_save_path
Self.model_save_freq = model_save_freq # Define the train labels. Self._train_labels = Tf.placeholder (Tf.float32, [None, +]) # Defin
E the input placeholder with RGB channels.
# size:224x224x3 Self._inputrgb = Tf.placeholder (Tf.float32, [None,
Vgg19.width, Vgg19.height, Vgg19.channels] # Convert RGB to BGR Order # SIZE:224X224X3 red, green, blue = Tf.split (3, 3, Self._inputrgb) SELF._INPUTBGR = Tf.concat (3, [ Blue, green, Red,] # normalize the input so and the elements all has NEA
Rly equal # variances. # size:224x224x3 SELF._INPUTNORMALIZEDBGR = Tf.concat (3, [blue-_vgg_mean[0], green-_
VGG_MEAN[1], red-_vgg_mean[2],]) # Setup the vgg-net graph. # size:224x224x64 self._conv1_1 = Self._conv_layer (SELF._INPUTNORMALIZEDBGR, "conv1_1") Self._conv1_2 = S Elf._conv_layer (self._conv1_1, "Conv1_2") # size:112x112x64 self._pool1 = Self._max_pool (Self._conv1_2, ' Pool1 ') # size:112x112x128 self._conv2_1 = Self._conv_layer (self._pool1, "Conv2_1") self._conv2_ 2 = Self._conv_layer (Self._conv2_1, "Conv2_2") # size:56x56x128 self._pool2 = Self._max_pool (self._conv2_ 2, ' pooL2 ') # size:56x56x256 self._conv3_1 = Self._conv_layer (self._pool2, "conv3_1") Self._conv3_2 = S Elf._conv_layer (Self._conv3_1, "conv3_2") Self._conv3_3 = Self._conv_layer (Self._conv3_2, "Conv3_3") self. _conv3_4 = Self._conv_layer (Self._conv3_3, "Conv3_4") # size:28x28x256 self._pool3 = Self._max_pool (self. _conv3_4, ' Pool3 ') # size:28x28x512 self._conv4_1 = Self._conv_layer (self._pool3, "Conv4_1") sel
F._conv4_2 = Self._conv_layer (Self._conv4_1, "conv4_2") Self._conv4_3 = Self._conv_layer (Self._conv4_2, "Conv4_3") Self._conv4_4 = Self._conv_layer (Self._conv4_3, "Conv4_4") # size:14x14x512 Self._pool4 = self._ Max_pool (self._conv4_4, ' Pool4 ') # size:14x14x512 Self._conv5_1 = Self._conv_layer (Self._pool4, "Conv5_1 ") Self._conv5_2 = Self._conv_layer (Self._conv5_1," conv5_2 ") Self._conv5_3 = Self._conv_layer (self._conv5 _2, "Conv5_3") sElf._conv5_4 = Self._conv_layer (Self._conv5_3, "Conv5_4") # size:7x7x512 SELF._POOL5 = Self._max_pool (sel
F._conv5_4, ' Pool5 ') # size:25088 (=7x7x512) x4096 self._fc6 = Self._fc_layer (Self._pool5, "Fc6")
Self._relu6 = Tf.nn.relu (self._fc6) # size:4096x4096 self._fc7 = Self._fc_layer (Self._relu6, "Fc7")
SELF._RELU7 = Tf.nn.relu (self._fc7) # size:4096x1000 Self._fc8 = Self._fc_layer (Self._relu7, "Fc8")
# for predicting.
Self._preds = Tf.nn.softmax (Self._fc8, name= "prediction") # for training.
Self._loss = Tf.nn.softmax_cross_entropy_with_logits (Self._fc8, Self._train_labels) Self._optimizer = tf.train \.
Gradientdescentoptimizer (self.learning_rate) \ Minimize (self._loss) @property def Inputrgb (self): "" "the input RGB images tensor of channels in RGB OrdeR. Shape must be of [?, 224, 224, 3] "" Return SELF._INPUTRGB @property def inputbgr (sel
f): "" "the input RGB images tensor of channels in BGR order. Shape must is of [?, 224, 224, 3] "" Return SELF._INPUTBGR @property def preds (self): ""
"The prediction (s) tensor, shape of [?, 1000]." "" "Return self._preds @property def train_labels (self):" "" The train labels tensor, sha
PE of [?, 1000].
"" "Return Self._train_labels @property def loss (self):" "" the loss tensor. "" "Return Self._loss @property def optimizer (self):" "" the optimizer tensor, used for t
He training. "" "Return Self._optimizer def _avg_pool (self, value, name): Return Tf.nn.avg_pool (Value, ksize=[1, 2 , 2, 1], strides=[1, 2, 2, 1], padding= ' same ', Name=name) def _max_pool (self, value, name): Return Tf.nn.max_pool (Value, Ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding= ' same ', Name=name) def _conv_layer (self, value, name): with TF. Variable_scope (name): filt = Self._get_conv_filter (name) conv = tf.nn.conv2d (value, filt, [1, 1, 1, 1], padding= ' same ') conv_biases = Self._get_bias (name) bias = Tf.nn.bias_add (CONV, conv_biases ) Relu = Tf.nn.relu (bias) return Relu def _fc_layer (self, value, name): With Tf.vari
Able_scope (name): Shape = Value.get_shape (). As_list () Dim = 1 for D in Shape[1:]:
Dim *= D x = tf.reshape (value, [-1, dim]) Weights = self._get_fc_weight (name) biases = Self._get_bias (name) # Fully connected layer. Note that the ' + ' operation automatically # broadcasts the BIASEs. FC = Tf.nn.bias_add (Tf.matmul (x, Weights), biases) return FC Def _get_conv_filter (self, name): R Eturn TF. Variable (self.model[name][0], name= "filter") def _get_bias (self, name): Return TF. Variable (self.model[name][1], name= "biases") def _get_fc_weight (self, name): Return TF. Variable (self.model[name][0], name= "weights") def _init_empty_model (self): Self.model = {# all T
He following things follows [0] = weights, [1] = biases.
# conv-layer 1. "Conv1_1": [Np.ndarray ([3, 3, 3, 3]), Np.ndarray ([+])], "conv1_2": [Np.ndarray ([3,
), Np.ndarray ([+])], # conv-layer 2. "Conv2_1": [Np.ndarray ([3, 3, 3, +]), Np.ndarray ([+])], "conv2_2": [Np.ndarray ([
, 3, [+]]), Np.ndarray ([+])], # Conv-layer 3. "Conv3_1": [Np.ndarray ([3, 3, +,]), Np.ndarray ([[[]]], "conv3_2": [Np.ndarra
Y ([3, 3, [Np.ndarray]), "Conv3_3": [Np.ndarray ([3, 3, 256, 256]), Np.ndarray ([[+]]), "Conv3_4": [Np.ndarray ([3, 3, []]), NP.
Ndarray ([[+]]), # Conv-layer 4. "Conv4_1": [Np.ndarray ([3, 3, +, +]), Np.ndarray ([+])], "Conv4_2": [Np.ndarray ([
3, 3, (+), Np.ndarray]), "Conv4_3": [Np.ndarray ([3, 3, 512, 512]), Np.ndarray ([+])], "Conv4_4": [Np.ndarray ([3, 3, +]), Np.nda
Rray ([+])], # Conv-layer 5.