# Batch Normalizationimport NumPy as Npimport TensorFlow as Tfimport matplotlib.pyplot as Pltactivation = Tf.nn.tanhN_L AYERS = 7 N_hidden_units = def fix_seed (seed=1): # reproducible Np.random.seed (seed) Tf.set_ra Ndom_seed (SEED) def plot_his (inputs, inputs_norm): # Plot histogram for the inputs of every layer for J, ALL_INP UTS in enumerate ([inputs, Inputs_norm]): For I, input in Enumerate (all_inputs): Plt.subplot (2, Len (all_i nputs), J*len (all_inputs) + (i+1)) Plt.cla () if i = = 0:the_range = (-7, 10) Else:the_range = ( -1, 1) plt.hist (Input.ravel (), bins=15, Range=the_range, color= ' #0000FF ') Plt.yticks (()) If j = = 1:plt.xticks (the_range) Else:plt.xti Cks (()) ax = PLT.GCA () ax.spines[' right '].set_color (' none ') ax.spines[' top '].set_color (' n One ') Plt.title ("%s Normalizing "% (" without "if j = = 0 Else" with ")) Plt.title (' Matplotlib,bn,histogram--jason Niu ') Plt.draw () p Lt.pause (0.001) def built_net (XS, YS, Norm): def add_layer (inputs, in_size, Out_size, Activation_function=none, norm=f alse): # Weights and biases (bad initialization for the case) weights = tf. Variable (Tf.random_normal ([In_size, Out_size], mean=0., stddev=1.)) biases = tf. Variable (Tf.zeros ([1, out_size]) + 0.1) # fully connected product Wx_plus_b = Tf.matmul (inputs, Weights) + b Iases # Normalize fully connected product if norm: # Batch normalize Fc_mean, Fc_var = Tf.nn.moments (Wx_plus_b, axes=[0],) Scale = TF. Variable (Tf.ones ([out_size])) shift = TF. Variable (Tf.zeros ([out_size])) epsilon = 0.001 # apply moving average for mean and Var when train on Batch EMA = tF.train.exponentialmovingaverage (decay=0.5) def mean_var_with_update (): Ema_apply_op = ema.apply ([Fc_mean, Fc_var]) with tf.control_dependencies ([Ema_apply_op]): Return tf.identity (fc_ Mean), tf.identity (Fc_var) mean, var = mean_var_with_update () Wx_plus_b = Tf.nn.batch_normalization ( Wx_plus_b, mean, VAR, shift, scale, epsilon) # wx_plus_b = (Wx_plus_b-fc_mean)/tf.sqrt (Fc_var + 0.001) #进行BN一下 # wx_plus_b = wx_plus_b * scale + SHIFT # Activation if Activation_function is None:outputs = Wx_plus_b else:outputs = activation_function (wx_plus_b) return OUTPU TS #输出激活结果 fix_seed (1) If norm: # BN for the first input fc_mean, Fc_var = tf.nn.moments ( XS, axes=[0],) scale = TF. Variable (Tf.ones ([1])) shift = TF. Variable (Tf.zeros ([1])) epsilon = 0.001 # Apply moving average for mean and Var when train on batch EMA = Tf.train.ExponentialMovingAverage (decay=0.5) Def mean_var_with_update (): Ema_apply_op = ema.apply ([Fc_mean, Fc_var]) with Tf.control_dependen Cies ([Ema_apply_op]): Return tf.identity (Fc_mean), tf.identity (Fc_var) mean, var = mean_var_with_upd Ate () xs = tf.nn.batch_normalization (xs, mean, VAR, shift, scale, epsilon) # Record inputs for every layer LA Yers_inputs = [XS] # build hidden layers for L_n in range (n_layers): Layer_input = Layers_inputs[l_n] In_size = Layers_inputs[l_n].get_shape () [1].value output = Add_layer (layer_input, # Input In_size, # Input size n_hidden_units, # output size ACTIVATION, # ACTIVATION function Norm, # Normalize before activation) layers_inputs.append (output) # build OUTPU T Layer prediction =Add_layer (Layers_inputs[-1], 1, activation_function=none) cost = Tf.reduce_mean (Tf.reduce_sum (Tf.square (Ys-predi ction), reduction_indices=[1]) Train_op = Tf.train.GradientDescentOptimizer (0.001). Minimize (cost) return [Train_op, Cost, Layers_inputs] Fix_seed (1) x_data = Np.linspace ( -7, ten, 2500) [:, Np.newaxis] #水平轴 -7~10np.random.shuffle (x_data) No ise = np.random.normal (0, 8, x_data.shape) Y_data = Np.square (x_data)-5 + Noisexs = Tf.placeholder (Tf.float32, [None, 1]) # [Num_samples, Num_features]ys = Tf.placeholder (Tf.float32, [None, 1]) #建立两个神经网络作对比train_op, cost, layers_inputs = built _net (XS, YS, Norm=false) Train_op_norm, cost_norm, layers_inputs_norm = Built_net (XS, YS, norm=true) Sess = tf. Session () if int ((TF.__VERSION__). Split ('. ') [1]) < and int ((TF.__VERSION__). Split ('. ') [0]) < 1:init = Tf.initialize_all_variables () Else:init = Tf.global_variables_initializer () sess.run (init) # Recor D costcost_his = [] Cost_his_norm = [] Record_step = 5 Plt.ion () plt.figure (figsize= (7, 3)) for I in range: if i% = = 0: # Plot histogram all_inputs , All_inputs_norm = Sess.run ([Layers_inputs, Layers_inputs_norm], Feed_dict={xs:x_data, ys:y_data}) Plot_his (All_ Inputs, All_inputs_norm) # Train on batch run for each step Sess.run ([Train_op, Train_op_norm], feed_dict={xs:x_data[i*10:i* 10+10], ys:y_data[i*10:i*10+10]}) If I% Record_step = = 0: # record cost Cost_his.append (Sess.run, Feed_dict={xs:x_data, Ys:y_data})) Cost_his_norm.append (Sess.run (Cost_norm, Feed_dict={xs:x_data, ys:y_data}) # The following is a method for plotting the error value cost error curve Plt.ioff () plt.figure () plt.title (' Matplotlib,bn,error_curve--jason Niu ') Plt.plot (Np.arange ( Len (cost_his)) *record_step, Np.array (cost_his), label= ' no BN ') # no Normplt.plot (Np.arange (len (cost_his)) *record_ Step, Np.array (cost_his_norm), label= ' BN ') # normplt.legend () plt.show ()
The BN:BN algorithm of TF accelerates the error_curve of learning quadraticfunction_inputdata+histogram+bn for each layer of neural network in multilayer