Caffe Source Code Analysis--softmax_layer.cpp
Copyright yangqing jia//#include <algorithm> #include <vector> #include "caffe/layer.hpp" #include " Caffe/vision_layers.hpp "#include" caffe/util/math_functions.hpp "using Std::max;namespace Caffe {/** * build Softmax Network layer */ Template <typename dtype>void softmaxlayer<dtype>::setup (const vector<blob<dtype>*>& Bottom, vector<blob<dtype>*>* top) {check_eq (Bottom.size (), 1) << "Softmax Layer takes a single b LOB as input. "; Check_eq (Top->size (), 1) << "Softmax Layer takes a single blob as output."; Output allocation space (*top) [0]->reshape (Bottom[0]->num (), Bottom[0]->channels (), Bottom[0]->height (), bottom[0]-& Gt;width ()); Sum_multiplier_ here are 1, which can be used as a row vector for auxiliary calculations. or a matrix sum_multiplier_ with a row number of 1. Reshape (1, bottom[0]->channels (), Bottom[0]->height (), Bottom[0]->width ()); dtype* multiplier_data = Sum_multiplier_.mutable_cpu_data (); for (int i = 0; i < Sum_multiplier_.count (); ++i) {Multiplier_data[i] = 1.; }//Temporary variable Scale_ allocate space. The size is num and can be seen as a column vector scale_. Reshape (Bottom[0]->num (), 1, 1, 1); Template <typename dtype>void softmaxlayer<dtype>::forward_cpu (const vector<blob<dtype>*> & Bottom, vector<blob<dtype>*>* top) {Const dtype* bottom_data = Bottom[0]->cpu_data (); dtype* Top_data = (*top) [0]->mutable_cpu_data (); dtype* scale_data = Scale_.mutable_cpu_data (); Consider the output as a num layer, with the dim element int num = Bottom[0]->num () per layer; int Dim = Bottom[0]->count ()/Bottom[0]->num (); memcpy (Top_data, Bottom_data, sizeof (Dtype) * Bottom[0]->count ()); We need to subtract the max-avoid numerical issues, compute the EXP,//and then normalize. Find the maximum value for each layer for (int i = 0; i < num; ++i) {scale_data[i] = Bottom_data[i*dim]; for (int j = 0; J < Dim; ++j) {Scale_data[i] = max (Scale_data[i], Bottom_data[i * Dim + j]); }}//subtraction is calculated by multiplying the matrix with the top_data of the NUM layer, minus the maximum value of the layer for each layer element. It's so ingenious. Caffe_cpu_gemm<dtype> (cblasnOtrans, Cblasnotrans, num, Dim, 1, -1., Scale_data, Sum_multiplier_.cpu_data (), 1., top_data); C = Alpha*op (A) *op (B) + beta*c//Perform exponentiation calculates natural logarithm caffe_exp<dtype> (num * Dim, Top_data, Top_d ATA); SUM after exp each layer is summed into Scale_data caffe_cpu_gemv<dtype> (Cblasnotrans, num, Dim, 1, Top_data, sum_multipli Er_.cpu_data (), 0., scale_data); Do division each layer is divided by the layer's and for (int i = 0; i < num; ++i) {caffe_scal<dtype> (Dim, Dtype (1.)/scale_data[i], Top_data + i * dim); }}template <typename dtype>dtype softmaxlayer<dtype>::backward_cpu (const vector<Blob<Dtype>* >& top, const bool Propagate_down, vector<blob<dtype>*>* bottom) {Const dtype* Top_diff = top[0 ]->cpu_diff (); Const dtype* Top_data = Top[0]->cpu_data (); dtype* Bottom_diff = (*bottom) [0]->mutable_cpu_diff (); dtype* scale_data = Scale_.mutable_cpu_data (); int num = Top[0]->num (); int Dim = Top[0]->count ()/Top[0]->num (); memcpy (Bottom_diff, Top_diff, sizeof (Dtype) * Top[0]->count ()); Compute inner1d (Top_diff, top_data) and subtract them from the bottom diff for (int i = 0; i < num; ++i) {scale _data[i] = caffe_cpu_dot<dtype> (Dim, Top_diff + I * Dim, Top_data + I * dim);//each layer, Top_diff and top_data compute the inner product} Subtraction the Bottom_diff element of each layer minus the corresponding inner product caffe_cpu_gemm<dtype> of the layer (Cblasnotrans, Cblasnotrans, num, Dim, 1,-1., Scale_data, Sum_multiplier_.cpu_data (), 1., Bottom_diff); Elementwise multiplication elements are multiplied caffe_mul<dtype> (Top[0]->count (), Bottom_diff, Top_data, Bottom_diff); Return Dtype (0);} Instantiate_class (Softmaxlayer);} Namespace Caffe
This article linger
This article link: http://blog.csdn.net/lingerlanlan/article/details/32700431
Caffe Source Code Analysis--softmax_layer.cpp