a brief introduction to the role of filler
The function of the filler layer is actually to initialize the weights according to the parameters given in the proto, and there are many kinds of initialization, such as constant initialization (constant), Gaussian distribution initialization (Gaussian), Positive_unitball initialization, Homogeneous distribution Initialization (uniform), Xavier initialization, MSRA initialization, bilinear initialization (bilinear) of these several.
second, the filler class of detailed introductionFirst, take a look at the first function of the filler class: The function puts the entire filler class on the clear.
Template <typename dtype>
filler<dtype>* getfiller (const fillerparameter& param) {
const std: :string& type = Param.type ();
if (type = = "Constant") {return
new constantfiller<dtype> (param);
} else if (type = = "Gaussian") {
Retu RN new gaussianfiller<dtype> (param);
} else if (type = = "Positive_unitball") {return
new positiveunitballfiller<dtype> (param);
} else if (type = = "Uniform") {return
new uniformfiller<dtype> (param);
} else if (type = = "Xavier") {return
new Xavier Filler<dtype> (param);
} else if (type = = "Msra") {return
new msrafiller<dtype> (param);
} else if (type = = "Bilinear") {
Retu RN new bilinearfiller<dtype> (param);
} else {
CHECK (false) << "Unknown filler name:" << param.type ();
}
Return (filler<dtype>*) (NULL);
Obtains the corresponding filler according to the given parameter, by this code can see the proto file inside to have the specified initialization way for the weight.
1) base class filler
Template <typename dtype>
class Filler {public
:
//Constructor
explicit filler (const fillerparameter & param): FILLER_PARAM_ (param) {}//
destructor, and is virtual function
virtual ~filler () {}
//pure virtual function, inheriting subclasses must implement
virtual void Fill (blob<dtype>* Blob) = 0;
Protected:
fillerparameter filler_param_; Class Filler
2 inherits the filler class
2-1 constant Initialization class
Template <typename dtype>
class constantfiller:public filler<dtype> {public
:
Explicit Constantfiller (const fillerparameter& param)
: filler<dtype> (param) {}
virtual void Fill (blob< dtype>* blob) {
//Get data pointer
dtype*/blob->mutable_cpu_data ();
Gets the data length
const int count = Blob->count ();
Gets the constant value of the constant initialization
const DTYPE value = This->filler_param_.value ();
CHECK (count);
for (int i = 0; i < count; ++i) {
Data[i] = value;//is initialized for each element to constant value
}
check_eq (this->filler_param_.s Parse (),-1)
<< "sparsity not supported by this filler.";
2-2 Homogeneous distribution initialization class
Template <typename dtype>
class uniformfiller:public filler<dtype> {public
:
Explicit Uniformfiller (const fillerparameter& param)
: filler<dtype> (param) {}
virtual void Fill (blob< dtype>* blob) {
//check whether the element in the Blob is 0
check (blob->count ());
Invoke Caffe_rng_uniform to initialize
caffe_rng_uniform<dtype> (Blob->count (), Dtype (THIS->FILLER_PARAM_. Min ()),
Dtype (This->filler_param_.max ()), Blob->mutable_cpu_data ());
Uniform distribution initialization is a
check_eq (This->filler_param_.sparse (),-1) << "sparsity not supported by the sparse feature" ().
Filler. ";
}
};
2-3 Gaussian distribution initialization class (support for sparse features)
Template <typename dtype> class gaussianfiller:public filler<dtype> {public:explicit GaussianFiller (con St fillerparameter& param): filler<dtype> (param) {} virtual void Fill (blob<dtype>* Blob) {Dt
ype* data = Blob->mutable_cpu_data ();
CHECK (Blob->count ()); Invokes Caffe_rng_gaussian initialization, in which the mean value of the Gaussian distribution is entered and the standard deviation caffe_rng_gaussian<dtype> (Blob->count (), Dtype (this->
Filler_param_.mean ()), Dtype (THIS->FILLER_PARAM_.STD ()), Blob->mutable_cpu_data ());
int sparse = This->filler_param_.sparse ();
Check sparse > 1 check_ge (Sparse,-1);
if (sparse >= 0) {//If sparse is enabled//sparse initialization is implemented for "weight" blobs; i.e. matrices. These have num = = Channels = 1; Width is number of inputs; The height is//number of outputs.
The ' sparse ' variable specifies the mean number//of Non-zero input weights for a given output.
Check_ge (Blob->num_axes (), 1); Assume that the weight of the shape is the number of output units x input unit//blob->shape (0) = number of output cells const int num_outputs = blob->shape (0); Probability of not being 0 = 1/Output cell number//So 0 probability = 1-1/output Unit number Dtype non_zero_probability = dtype (sparse)/Dtype (num_output
s);
Creates a new Rand_vec, where the user holds the value generated by the Bernoulli distribution (two-item distribution) Rand_vec_.reset (new Syncedmemory (Blob->count () * sizeof (int)));
int* mask = reinterpret_cast<int*> (Rand_vec_->mutable_cpu_data ());
Caffe_rng_bernoulli (Blob->count (), non_zero_probability, mask); for (int i = 0; i < Blob->count (); ++i) {data[i] *= mask[i];//Each data element is multiplied by the sample value of the generated two-item distribution}}}
Rotected:shared_ptr<syncedmemory> rand_vec_; };
Initialization of 2-4positiveunitballfillerDon't understand can see Http://math.stackexchange.com/questions/520002/unit-ball-with-p-norm is equivalent to a unit ball
Positiveunitballfiller First fills w//with a uniform distribution and then sums the elements in W by rows, then each element of the row is divided by the row's and template <typename dtype> class Positiveunitballfiller:public filler<dtype> {public:explicit positiveunitballfiller (const FILLERPARAMETER&AM P param): filler<dtype> (param) {} virtual void Fill (blob<dtype>* Blob) {dtype* data = Blob->mut
Able_cpu_data (); Dcheck (Blob->count ());//I wonder why this place is distributed evenly to the weight caffe_rng_uniform<dtype> (blob->count (), 0, 1, b, Dcheck//first).
Lob->mutable_cpu_data ()); We expect the filler to is called very frequently, so we'll//just use a simple implementation//Count
/num = input Dimension int Dim = Blob->count ()/Blob->num ();
Check (Dim),//Verify that the input dimension is less than 0 for (int i = 0; i < blob->num (); ++i) {//traverse the number of hidden cells (or the number of output cells) Dtype sum = 0; for (int j = 0; J < Dim; ++j) {sum + = Data[i * Dim + j];//sum + = Data[i][j] that is, to sum the rows by line} for ( int j = 0; J < Dim;
++J) { Data[i * Dim + j]/= sum;//each row is divided by the row's and} check_eq (This->filler_param_.sparse (),-1) <<
"Sparsity not supported by this filler." }
};
2-5 Xavierfiller initialization (for convolution cores)
What is not understood here is shape (num, a, B, c) where A * b * c = fan_in and num * b * c = fan_out//fan in and fan out defined//thanks to Wang Feng, it was later learned that B*c=kernel s Ize//A is the input channel//num is the output of the channel template <typename dtype> class Xavierfiller:public filler<dtype> {p Ublic:explicit xavierfiller (const fillerparameter& param): filler<dtype> (param) {} virtual void Fill (blob<dtype>* Blob)
{CHECK (Blob->count ());
int fan_in = Blob->count ()/Blob->num ();
int fan_out = Blob->count ()/blob->channels (); Dtype n = fan_in; Default to Fan_in if (This->filler_param_.variance_norm () ==//If the parameter defines variance normalization then n = fan in + fan out Fillerparameter_
Variancenorm_average) {n = (fan_in + fan_out)/Dtype (2);
else if (this->filler_param_.variance_norm () = = Fillerparameter_variancenorm_fan_out) {n = fan_out; Dtype scale = sqrt (Dtype (3)/n);//scale = \frac{sqrt{3}}{n}//then initialized with a uniform distribution of [-scale,scale] caffe_rng_unif Orm<dtyPe> (Blob->count (),-scale, scale, blob->mutable_cpu_data ());
Check_eq (This->filler_param_.sparse (),-1) << "sparsity not supported by this filler."; }
};
2-6 Msrafiller initialization (for convolution cores)
template <typename dtype> class msrafiller:public filler<dtype> {public:explicit MSRAF Iller (const fillerparameter& param): filler<dtype> (param) {} virtual void Fill (blob<dtype>* Blob)
{CHECK (Blob->count ());
int fan_in = Blob->count ()/Blob->num ();
int fan_out = Blob->count ()/blob->channels (); Dtype n = fan_in;
Default to Fan_in if (this->filler_param_.variance_norm () = = Fillerparameter_variancenorm_average) {
n = (fan_in + fan_out)/Dtype (2);
else if (this->filler_param_.variance_norm () = = Fillerparameter_variancenorm_fan_out) {n = fan_out;
}//Standard deviation is \sqrt{\frac{2}{n}} dtype std = sqrt (dtype (2)/n);
Caffe_rng_gaussian<dtype> (Blob->count (), Dtype (0), STD, Blob->mutable_cpu_data ());
Check_eq (This->filler_param_.sparse (),-1) << "sparsity not supported by this filler."; }
};
2-7 Bilinearfiller Initialization (user deconvolution Core)
The initialization used by the deconvolution does not support the sparse feature
//has not been studied ... Also do not know
template <typename dtype>
class bilinearfiller:public filler<dtype> {public
:
Explicit Bilinearfiller (const fillerparameter& param)
: filler<dtype> (param) {}
virtual void Fill ( blob<dtype>* blob) {
check_eq (blob->num_axes (), 4) << "Blob must be 4 dim.";
Check_eq (Blob->width (), Blob->height ()) << "Filter must be square";
dtype* data = Blob->mutable_cpu_data ();
F is width divided by 2
int f = ceil (Blob->width ()/2.);
The meaning of C is not clear
float c = (2 * f-1-F% 2)/(2. * f);
for (int i = 0; i < Blob->count (); ++i) {
float x = i% blob->width ();//x to indicate the index of the column
float y = (I/BLOB-&G T;width ())% blob->height ()///row index% width
data[i] = (1-fabs (x/f-C)) * (1-fabs (y/f-C));
Check_eq (This->filler_param_.sparse (),-1)
<< "sparsity not supported by this filler.";
Iii. Introduction to the related classes of filler
because filler used some methods for generating random numbers, here's a look at the related implementation of Math_function:
(1) Gaussian distribution random number generation:
implementation on the CPU (call Boost's library directly)
Template <typename dtype>
void Caffe_rng_gaussian (const int N, const dtype A,
const Dtype Sigma, dtype* R) {
check_ge (n, 0);
CHECK (r);
Check_gt (sigma, 0);
Direct call in boost is too distributed.
boost::normal_distribution<dtype> Random_distribution (A, sigma);
boost::variate_generator<caffe::rng_t*, boost::normal_distribution<dtype> >
variate_generator ( Caffe_rng (), random_distribution);
for (int i = 0; i < n; ++i) {
R[i] = Variate_generator ();
}
}
implementation of GPU (direct call to Cuda Library)
Template <>
void Caffe_gpu_rng_gaussian (const int n, const float MU, const float sigma,
float* r) {
cura Nd_check (
curandgeneratenormal (Caffe::curand_generator (), R, N, Mu, sigma));
}
Template <>
void Caffe_gpu_rng_gaussian (const int n, const double MU, const double sigma,
double* r) {
C Urand_check (
curandgeneratenormaldouble (Caffe::curand_generator (), R, N, Mu, sigma));
}
(2) The generation of uniform distributed random numbers:
CPU:
Template <typename dtype>
void caffe_rng_uniform (const int N, const dtype A, const Dtype B, dtype* R) {
CHEC K_ge (n, 0);
CHECK (r);
Check_le (A, b);
Call Boost's library
boost::uniform_real<dtype> random_distribution (A, caffe_nextafter<dtype> (b));
boost::variate_generator<caffe::rng_t*, boost::uniform_real<dtype> >
variate_generator (caffe_rng (), random_distribution);
for (int i = 0; i < n; ++i) {
R[i] = Variate_generator ();
}
}
GPU:
void caffe_gpu_rng_uniform (const int n, unsigned int* r) {Curand_check (curandgenerate
Generator (), R, N));
} template <> void caffe_gpu_rng_uniform<float> (const int n, const float A, const float B,
float* r) {Curand_check (Curandgenerateuniform (Caffe::curand_generator (), R, N));
Const FLOAT range = b-a;
if (range!= static_cast<float> (1)) {caffe_gpu_scal (n, range, r);
} if (a!= static_cast<float> (0)) {caffe_gpu_add_scalar (n, a, R);
} template <> void caffe_gpu_rng_uniform<double> (const int n, const double A, const double B,
double* r) {Curand_check (curandgenerateuniformdouble (Caffe::curand_generator (), R, N));
Const double range = b-a;
if (range!= static_cast<double> (1)) {caffe_gpu_scal (n, range, r);
} if (a!= static_cast<double> (0)) {caffe_gpu_add_scalar (n, a, R); }
}
(3) The generation of random numbers of Bernoulli distribution (two-item distribution) (even without the code on the GPU ...) )
Template <typename dtype>
void Caffe_rng_bernoulli (const int n, const dtype p, int* r) {
check_ge (n, 0);
check (r);
Check_ge (p, 0);
Check_le (P, 1);
Boost::bernoulli_distribution<dtype> random_distribution (p);
boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<dtype> >
variate_generator (Caffe_rng (), random_distribution);
for (int i = 0; i < n; ++i) {
R[i] = Variate_generator ();
}
}
void Caffe_rng_bernoulli (const int n, const dtype p, unsigned int* r) {
check_ge (n, 0);
CHECK (r);
Check_ge (p, 0);
Check_le (P, 1);
Boost::bernoulli_distribution<dtype> random_distribution (p);
boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<dtype> >
variate_ Generator (Caffe_rng (), random_distribution);
for (int i = 0; i < n; ++i) {
R[i] = static_cast<unsigned int> (variate_generator ());
}
Iv. Summary
This paper mainly introduces the realization of each algorithm of initialization weight in filler, and the concrete principle can refer to relevant papers. There's really nothing to dig about filler. have been dug almost.