Refer to:
Machine Learning Public Course notes (5): Neural Network (neural networks)
CS224D Notes 3--Neural network
Deep learning and natural Language processing (4) _ Stanford cs224d Big Homework Quiz 1 with solution
cs224d Problem Set 1 jobs
Softmax
def softmax (x): assert Len (x.shape) > 1 X-= Np.max (x, Axis=1, keepdims=true) x = np.exp (x)/Np.sum (np.exp (x), Axis=1, keepdims=true) return x
Sigmoid & Sigmoid_grad:
def sigmoid (x): result = 1.0/(1.0 + NP.EXP (x)) return resultdef Sigmoid_grad (f): f=f* (1.0-f) return F
Gradcheck_naive:
Def gradcheck_naive (f, x): "" "Gradient Check for a function f-f should bes a function that takes a single arg Ument and outputs the cost and its gradients-x are the point (numpy array) to check the gradient at "" " Rndstate = Random.getstate () random.setstate (rndstate) FX, Grad = f (x) # Evaluate function value at original Poin t h = 1e-4 # Iterate over all indexes in X it = Np.nditer (x, flags=[' Multi_index '], op_flags=[' ReadWrite ') WH Ile not It.finished:ix = it.multi_index # # Try modifying X[ix] with H defined above to compute numerical g Radients # # # Make sure-random.setstate (rndstate) before calling F (x) Each # # # time, this'll make It # # possible to test cost functions with built in randomness later # # YOUR CODE here:old_val = X[ix] X[ix] = old_val-h random.setstate (rndstate) (Fxh1, _) = f (x) X[ix] = Old_val + H Random.setstate (RndState) (Fxh2, _) = f (x) Numgrad = (FXH2-FXH1)/(2*h) X[ix] = old_val # # # END YOUR CODE # Compare Gradients Reldiff = ABS (Numgrad-grad[ix])/MAX (1, ABS (Numgrad), ABS (Grad[ix])) if Reldiff &G T 1e-5: print "Gradient check failed." Print "First gradient error found at index%s"% str (ix) print "Your gradient:%f \ t numerical gradient:%f"% (Grad[ix], Numgrad) return It.iternext () # Step to next dimension print "Gradient check passed!"
neural.py
Import NumPy as Npimport randomfrom q1_softmax import softmaxfrom q2_sigmoid import sigmoid, Sigmoid_gradfrom Q2_gradcheck Import gradcheck_naivedef forward_backward_prop (data, labels, params, dimensions): "" "forward and backward Propag ation for a two-layer sigmoidal network Compute The forward propagation and for the cross entropy cost, and BAC Kward propagation for the gradients to all parameters. "" "# # # Unpack network parameters (do not modify) OFS = 0 Dx, H, Dy = (Dimensions[0], dimensions[1], dimensions[2 ]) W1 = np.reshape (params[ofs:ofs+ DX * h], (DX, h)) OFS + = Dx * H B1 = np.reshape (Params[ofs:ofs + h], (1, h)) OFS + = h W2 = Np.reshape (params[ofs:ofs + h * dy], (h, dy)) OFS + = h * dy b2 = np.reshape (Params[ofs:ofs + dy], (1, Dy)) N, D = data.shape # Data--and N x D # W1--D x H # B1-1 x H # W2-H x V # B2-1 x V # Labels-N x v # # YOUR CODE Here:forward Propagation Z1 = Np.dot (data, W1) + B1 # N x H A1 = sigmoid (Z1) # n x H Z2 = Np.dot (A1, W2) + b2 # N x V A2 = Softmax (Z2) # N x v # Cross Entropy cost #first method #B = Np.exp (Z2) # n x V #b = Np.sum (B, Axis=1) + 1e-8 # N x 1 # Z = np.log (b) # N x 1 #cost = Np.sum (z)-np.sum (Z2 * labels) #cost/= N #second method Cost =-Np.sum (np.log (A2[labels = = 1])) /N # # # END YOUR Code #cost = b2[0,-1] # # YOUR code Here:backward Propagation Formula:delta2 = a2-labels # N x v delta2=a2-y gradb2 = np.sum (Delta2, axis=0) # 1 x v Gradb2<--delta2 gradb2/= N # 1 x V gradW2 = Np.dot (A1. T, Delta2) # H x V gradw2=a1. T*delta2 gradW2/= N # H x V delta1 = Sigmoid_grad (A1) * Np.dot (DELTA2, W2. T) # N x H delta1=f ' (A1) *delta2*w2. T gradb1 = Np.sum (Delta1, axis=0) # 1 x H gradb1<--delta1 gradb1/= N # 1 x H gradW1 = Np.dot (data. T, Delta1) # D x H gradw1=x.t*delta1 gradW1/= N # D x H # # END YOUR CODE # # # # Stack Gradients (do not modify) Grad = Np.concatenate ((Gradw1.flatten (), Gradb1.flatten (), Gradw2.flatten (), GRADB2. Flatten ())) return cost, Graddef Sanity_check (): "" "Set up fake data and parameters for the neural network, and test using Gradcheck. "" "Print" Running sanity check ... " n = Dimensions = [5, ten] data = Np.random.randn (N, dimensions[0]) # each row would be a datum 20*10 label s = Np.zeros ((n, dimensions[2])) for I in Xrange (n): Labels[i,random.randint (0,dimensions[2]-1)] = 1 #one-hot ve ctor params = Np.random.randn ((dimensions[0] + 1) * Dimensions[1] + (Dimensions[1] + 1) * dimensions[2],) Gradcheck_naive (lambda params:forward_backward_prop (data, labels, params, dimensions), params) if __name__ = = "_ _main__ ": Sanity_check ()
cs224d Assignment 1 "neural Network Basics"