Squeezenet from 2016 theses squeezenet:alexnet-level accuracy with 50X fewer PARAMETERS and <0.5MB MODEL SIZE,
Squeezenet mainly presents the concept of Firemodule, as shown in the above picture, a firemodule consists of a squeeze and a expand, squeeze contains the convolution nucleus of S 1*1, expand contains the e1 of the convolution kernel, 1*1 3 *3, and satisfies the s<e1+e3.
After such a substitution, the model is reduced by about 50 times times, while ensuring accuracy.
Test program:
typedef std::p air<string, float> prediction;
Class Classifier {public:classifier (const string& model_file, const string& Trained_file,
Const string& Mean_file, const string& label_file);
Std::vector<prediction> classify (const cv::mat& img, int N = 5);
Private:void Setmean (const string& mean_file);
Std::vector<float> Predict (const cv::mat& IMG);
void Wrapinputlayer (std::vector<cv::mat>* input_channels);
void preprocess (const cv::mat& img, std::vector<cv::mat>* input_channels);
private:shared_ptr<net<float> > net_;
Cv::size Input_geometry_;
int Num_channels_;
Cv::mat Mean_;
Std::vector<string> Labels_;
};
Classifier::classifier (const string& model_file, const string& Trained_file,
Const string& Mean_file, const string& label_file) {#ifdef cpu_only Caffe::set_mode (CAFFE::CPU);
#else Caffe::set_mode (CAFFE::GPU); #endif/* Load the network.
*/Net_.reset (new Net<float> (Model_file, TEST));
Net_->copytrainedlayersfrom (Trained_file);
Check_eq (Net_->num_inputs (), 1) << "Network should have one input."
Check_eq (Net_->num_outputs (), 1) << "Network should have one output."
blob<float>* Input_layer = net_->input_blobs () [0];
Num_channels_ = Input_layer->channels ();
CHECK (Num_channels_ = 3 | | num_channels_ = 1) << "Input layer should have 1 or 3 channels.";
Input_geometry_ = Cv::size (Input_layer->width (), Input_layer->height ()); /* Load the Binaryproto mean file.
* * Setmean (mean_file); /* Load labels.
* * Std::ifstream labels (label_file.c_str ());
CHECK (labels) << "Unable to open labels file" << label_file;
String line;
while (Std::getline (labels, line)) Labels_.push_back (string); Blob< float>* Output_layer = Net_->output_blobs () [0]; Check_eq (Labels_.size (), Output_layer->channels ()) << "Number of labels is different from the output layer di
Mension. ";} static bool Paircompare (const std::p air<float, int>& lhs, const std::p air<float, int&
gt;& RHS) {return lhs.first > Rhs.first;} /* Return the indices of the top N values of Vector v./static std::vector<int> Argmax (const std::vector<float&
gt;& V, int N) {std::vector<std::p air<float, int> > pairs;
for (size_t i = 0; i < v.size (); ++i) Pairs.push_back (Std::make_pair (v[i), static_cast<int> (i)));
std::p artial_sort (Pairs.begin (), Pairs.begin () + N, Pairs.end (), paircompare);
std::vector<int> result;
for (int i = 0; i < N; ++i) Result.push_back (Pairs[i].second);
return result; }/* Return to top N predictions. * * std::vector<prediction> classifier::classify (const cv::mat& IMG,int N) {std::vector<float> output = Predict (IMG);
n = std::min<int> (Labels_.size (), n);
std::vector<int> MAXN = Argmax (output, N);
Std::vector<prediction> predictions;
for (int i = 0; i < N; ++i) {int idx = maxn[i];
Predictions.push_back (Std::make_pair (Labels_[idx], output[idx));
return predictions; }/* Load the mean file in Binaryproto format.
*/void Classifier::setmean (const string& mean_file) {Blobproto blob_proto;
Readprotofrombinaryfileordie (Mean_file.c_str (), &blob_proto);
* Convert from Blobproto to blob<float> * * blob<float> Mean_blob; Mean_blob.
Fromproto (Blob_proto);
Check_eq (Mean_blob.channels (), Num_channels_) << "Number of channels of mean file doesn ' t match input layer."; /* The format of the mean file is planar 32-bit float BGR or grayscale.
* * std::vector<cv::mat> channels;
float* data = Mean_blob.mutable_cpu_data (); for (int i = 0; i < Num_channels_; ++i) {/* Extract an individual channel. */Cv::mat Channel (Mean_blob.height (), Mean_blob.width (), CV_32FC1, data
);
Channels.push_back (channel);
Data + + mean_blob.height () * Mean_blob.width (); }/* Merge the separate channels into a single image.
* * Cv::mat mean;
Cv::merge (channels, mean); /* Compute The global mean pixel value and create a mean image * filled with this value.
* * Cv::scalar Channel_mean = Cv::mean (mean);
Mean_ = Cv::mat (Input_geometry_, Mean.type (), Channel_mean); std::vector<float> classifier::P redict (const cv::mat& img) {blob<float>* Input_layer = Net_->inpu
T_blobs () [0];
Input_layer->reshape (1, Num_channels_, Input_geometry_.height, input_geometry_.width); /* Forward dimension change to all layers.
* * Net_->reshape ();
Std::vector<cv::mat> Input_channels;
Wrapinputlayer (&input_channels);
Preprocess (IMG, &input_channels);
Net_->forward (); /* Copy the output layer to a std::vector * * blob<float>* Output_layer = net_->output_blobs () [0];
Const float* BEGIN = Output_layer->cpu_data ();
Const float* END = begin + Output_layer->channels ();
Return std::vector<float> (begin, end); }/* Wrap the input layer of the network in separate Cv::mat objects * (one per channel). This is way we save one memcpy operation and we * don ' t need to rely on cudamemcpy2d. The last preprocessing * operation'll write the separate channels directly to the input * layer. */void Classifier::wrapinputlayer (std::vector<cv::mat>* input_channels) {blob<float>* Input_layer = net_
->input_blobs () [0];
int width = input_layer->width ();
int height = input_layer->height ();
float* input_data = Input_layer->mutable_cpu_data ();
for (int i = 0; i < input_layer->channels (); ++i) {Cv::mat channel (height, width, cv_32fc1, input_data);
Input_channels->push_back (channel); Input_data += width * height; } void Classifier::P reprocess (const cv::mat& img, std::vector<cv::mat>* Input_cha
Nnels) {* * Convert the input image to the input image format of the network. */Cv::mat sample;
if (img.channels () = = 3 && num_channels_ = 1) Cv::cvtcolor (IMG, sample, Cv::color_bgr2gray);
else if (img.channels () = = 4 && num_channels_ = 1) Cv::cvtcolor (IMG, sample, Cv::color_bgra2gray);
else if (img.channels () = = 4 && num_channels_ = 3) Cv::cvtcolor (IMG, sample, CV::COLOR_BGRA2BGR);
else if (img.channels () = = 1 && num_channels_ = 3) Cv::cvtcolor (IMG, sample, CV::COLOR_GRAY2BGR);
else sample = IMG;
Cv::mat sample_resized;
if (Sample.size ()!= input_geometry_) cv::resize (sample, sample_resized, Input_geometry_);
else sample_resized = sample;
Cv::mat sample_float;
if (Num_channels_ = = 3) Sample_resized.convertto (Sample_float, CV_32FC3); else Sample_rEsized.convertto (Sample_float, CV_32FC1);
Cv::mat sample_normalized;
Cv::subtract (Sample_float, Mean_, sample_normalized); /* This operation'll write the separate BGR planes directly to the * input layer of the network because it is wrapped By the Cv::mat * objects in Input_channels.
* * Cv::split (sample_normalized, *input_channels);
CHECK (reinterpret_cast<float*> (input_channels->at (0). Data) = = Net_->input_blobs () [0]->cpu_data ()]
<< "input channels are not wrapping the input layer of the network.";
int main (int argc, char** argv) {argc = 6; if (argc!= 6) {std::cerr << "Usage:" << argv[0] << "Deploy.prototxt Network.caffem
Odel "<<" Mean.binaryproto labels.txt img.jpg "<< Std::endl;
return 1;
}:: Google::initgooglelogging (argv[0]);
/*string model_file = argv[1];
String trained_file = Argv[2];
String mean_file = Argv[3]; String Label_file = argv[4];*//*string model_file = ".//caffenet//deploy.prototxt"; String trained_file = ".//caffenet//bvlc_reference_caffenet.caffemodel"; */String Model_file = ".//alexnet//
Deploy.prototxt ";
String trained_file = ".//alexnet//bvlc_alexnet.caffemodel";
/*string model_file = ". \\SqueezeNet_v1.0\\deploy.prototxt"; String trained_file = ". \\SqueezeNet_v1.0\\squeezenet_v1.0.caffemodel"; */String mean_file = "Imagenet_mean.binarypro
To ";
String label_file = "Synset_words.txt";
Classifier classifier (Model_file, Trained_file, Mean_file, label_file);
string file = Argv[5];
string file = "Cat.jpg";
string file = "Fish-bike.jpg";
Std::cout << "----------prediction for" << file << "----------" << Std::endl;
clock_t start, end;
start = Clock ();
Cv::mat img = cv::imread (file,-1);
CHECK (!img.empty ()) << "Unable to decode image" << file; std::vector<prediction> predictions = classifier. Classify (img);
End = (double) (1000 * (Clock ()-start)/clocks_per_sec); Std::cout << std::fixed << std::setprecision (4) << gp.second << "" <<gp.first << en
dl
Std::cout << "Time:" << end << "MS" << Std::endl; /* Print the top N predictions.
* for (size_t i = 0; i < predictions.size (); ++i) {prediction p = predictions[i]; Std::cout << std::fixed << std::setprecision (4) << p.second << "-\" "<< p.fi
RST << "\" "<< Std::endl;
} cv::imshow (File, IMG);
Cv::waitkey (); #else int main (int argc, char** argv) {LOG (FATAL) << "This example requires;
compile with Use_opencv. ";}
Model Size:
Alexnet Model Size: 232M
Caffenet Model Size: 232M
squeezenet_v1.0 Model Size: 4.76M
squeezenet_v1.1 Model Size: 4.72M
Experiment Effect:
Test 1:
Alexnet:
Caffenet:
squeezenet_v1.0:
squeezenet_v1.1:
Test 2:
Alexnet:
Caffenet:
squeezenet_v1.0:
squeezenet_v1.1:
Reference
Https://github.com/BVLC/caffe/tree/master/models
Https://github.com/DeepScale/SqueezeNet