For Caffe networks that are well trained
Input: Color or grayscale image
Do minist under the handwriting recognition classification, can not be used directly, you need to remove the mean image, while the input image pixels normalized to 0-1 directly.
#include <caffe/caffe.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
using namespace Caffe; Nolint (build/namespaces)
Using Std::string;
/* Pair (label, confidence) representing a prediction. */
/* PAIR (label, confidence) forecast */
typedef std::p air<string, float> prediction;
/* Category Interface class Classifier */
Class Classifier {
Public
Classifier (const string& Model_file,
Const string& Trained_file,
Const string& Mean_file,
Const string& label_file);
Std::vector<prediction> classify (const cv::mat& img, int N = 4); Classification, returns the first 4 array of predicted values by default
Private
void Setmean (const string& mean_file);
Std::vector<float> Predict (const cv::mat& img);
void Wrapinputlayer (std::vector<cv::mat>* input_channels);
void preprocess (const cv::mat& IMG,
std::vector<cv::mat>* input_channels);
Private
shared_ptr<net<float> > net_;
Cv::size Input_geometry_;
int Num_channels_; Number of network channels
Cv::mat Mean_; Mean image
Std::vector<string> Labels_; Target label Array
};
The above defines a classification object class classifier
Class is implemented as follows:
Classifier::classifier (const string& Model_file,
Const string& Trained_file,
Const string& Mean_file,
Const string& label_file) {
#ifdef cpu_only
Caffe::set_mode (CAFFE::CPU);
#else
Caffe::set_mode (CAFFE::GPU);
#endif
/* Load the network. */
Net_.reset (New net<float> (Model_file, TEST));
Net_->copytrainedlayersfrom (Trained_file);
Check_eq (Net_->num_inputs (), 1) << "Network should has exactly one input.";
Check_eq (Net_->num_outputs (), 1) << "Network should has exactly one output.";
blob<float>* Input_layer = net_->input_blobs () [0]; Network Layer Template Blob
Num_channels_ = Input_layer->channels (); Number of channels
CHECK (Num_channels_ = = 3 | | num_channels_ = = 1) << "Input layer should has 1 or 3 channels.";
Input_geometry_ = Cv::size (Input_layer->width (), Input_layer->height ());
/* Load the Binaryproto mean file. Load mean files */
Setmean (Mean_file);
/* Load labels. Load category label file */
Std::ifstream labels (label_file.c_str ());
CHECK (labels) << "Unable to open labels file" << label_file;
String line;
while (Std::getline (labels, line))
Labels_.push_back (string);
blob<float>* Output_layer = net_->output_blobs () [0];
Check_eq (Labels_.size (), Output_layer->channels ()) << "Number of labels is different from the output layer Dimens Ion. ";
}
static bool Paircompare (const std::p air<float, int>& LHS,
CONST std::p air<float, int>& RHS) {
return lhs.first > Rhs.first;
}
/* Return The indices of the top N values of Vector v. */
/* Returns the first N ordinal array of the maximum value of the array v[] */
Static std::vector<int> Argmax (const std::vector<float>& v, int N) {
STD::VECTOR<STD::p air<float, int> > pairs;
for (size_t i = 0; i < v.size (); ++i)
Pairs.push_back (Std::make_pair (v[i], i));
std::p artial_sort (Pairs.begin (), Pairs.begin () + N, Pairs.end (), paircompare);
std::vector<int> result;
for (int i = 0; i < N; ++i)
Result.push_back (Pairs[i].second);
return result;
}
/* Return the top N predictions. Classify and return the largest first N predictions */
Std::vector<prediction> classifier::classify (const cv::mat& img, int N) {
std::vector<float> output = Predict (IMG);
std::vector<int> MAXN = Argmax (output, N);
Std::vector<prediction> predictions;
for (int i = 0; i < N; ++i) {
int idx = maxn[i];
Predictions.push_back (Std::make_pair (Labels_[idx], output[idx])); /[(tags, confidence level),...] Array of Predictor values
}
return predictions;
}
/* Load the mean file in Binaryproto format. */
void Classifier::setmean (const string& mean_file) {
Blobproto Blob_proto;
Readprotofrombinaryfileordie (Mean_file.c_str (), &blob_proto);
/* Convert from Blobproto to blob<float> */
Blob<float> Mean_blob;
Mean_blob. Fromproto (Blob_proto);
Check_eq (Mean_blob.channels (), Num_channels_)
<< "Number of channels of mean file doesn ' t match input layer.";
/* The format of the mean file is planar 32-bit float BGR or grayscale. */
Std::vector<cv::mat> channels;
float* data = Mean_blob.mutable_cpu_data ();
for (int i = 0; i < Num_channels_; ++i) {
/* Extract an individual channel. */
Cv::mat Channel (Mean_blob.height (), Mean_blob.width (), CV_32FC1, data);
Channels.push_back (channel);
Data + = Mean_blob.height () * Mean_blob.width ();
}
/* Merge the separate channels into a single image. */
Cv::mat mean;
Cv::merge (channels, mean);
/* Compute The global mean pixel value and create a mean image
* Filled with this value. */
Cv::scalar Channel_mean = Cv::mean (mean);
Mean_ = Cv::mat (Input_geometry_, Mean.type (), Channel_mean);
}
/* Category */
Std::vector<float> Classifier::P redict (const cv::mat& img) {
blob<float>* Input_layer = net_->input_blobs () [0];
Input_layer->reshape (1, Num_channels_,
Input_geometry_.height, Input_geometry_.width);
/* Forward dimension change to all layers. */
Net_->reshape ();
Std::vector<cv::mat> Input_channels;
Wrapinputlayer (&input_channels);
Preprocess (IMG, &input_channels); Data preprocessing
Net_->forwardprefilled ();
/* Copy the output layer to a std::vector */
blob<float>* Output_layer = net_->output_blobs () [0];
Const float* BEGIN = Output_layer->cpu_data ();
Const float* END = begin + Output_layer->channels ();
Return std::vector<float> (begin, end);
}
/* Wrap The input layer of the network in separate Cv::mat objects
* (one per channel). This, we save one memcpy operation and we
* don ' t need to rely on cudamemcpy2d. The last preprocessing
* Operation would write the separate channels directly to the input
* Layer.
*/
void Classifier::wrapinputlayer (std::vector<cv::mat>* input_channels) {
blob<float>* Input_layer = net_->input_blobs () [0];
int width = input_layer->width ();
int height = input_layer->height ();
float* input_data = Input_layer->mutable_cpu_data ();
for (int i = 0; i < input_layer->channels (); ++i) {
Cv::mat channel (height, width, cv_32fc1, input_data);
Input_channels->push_back (channel);
Input_data + = width * height;
}
}
Data preprocessing
void Classifier::P reprocess (const cv::mat& IMG,
std::vector<cv::mat>* input_channels) {
/* Convert the input image to the input image format of the network. */
Cv::mat sample;
Channel data is converted according to Settings
if (img.channels () = = 3 && Num_channels_ = = 1)
Cv::cvtcolor (IMG, sample, Cv_bgr2gray);
else if (img.channels () = = 4 && Num_channels_ = = 1)
Cv::cvtcolor (IMG, sample, Cv_bgra2gray);
else if (img.channels () = = 4 && Num_channels_ = = 3)
Cv::cvtcolor (IMG, sample, CV_BGRA2BGR);
else if (img.channels () = = 1 && num_channels_ = = 3)
Cv::cvtcolor (IMG, sample, CV_GRAY2BGR);
Else
Sample = IMG;
Cv::mat sample_resized;
if (sample.size ()! = Input_geometry_)
Cv::resize (sample, sample_resized, Input_geometry_);
Else
sample_resized = sample;
Cv::mat sample_float;
if (Num_channels_ = = 3)
Sample_resized.convertto (Sample_float, CV_32FC3); Three-channel (color)
Else
Sample_resized.convertto (Sample_float, CV_32FC1); Single channel (grayscale)
Cv::mat sample_normalized;
Cv::subtract (Sample_float, Mean_, sample_normalized);
/* This operation would write the separate BGR planes directly to the
* Input layer of the network because it is wrapped by the Cv::mat
* Objects in Input_channels.
This operation writes data BGR directly to the input-layer object Input_channels */
Cv::split (sample_normalized, *input_channels);
CHECK (reinterpret_cast<float*> (input_channels->at (0). Data)
= = Net_->input_blobs () [0]->cpu_data ())
<< "input channels is not wrapping the input layer of the network.";
}
To make some simple comments on the above code, it is necessary to describe the first 5 types with the highest default confidence of the returned results after categorization,
The call to the classification object is as follows:
//==============================================================
Main ()
//==============================================================
int main (int argc, char** argv) {
if (argc! = 6) {
Std::cerr << "Usage:" << argv[0]
<< "Deploy.prototxt Network.caffemodel"
<< "Mean.binaryproto labels.txt img.jpg" << Std::endl;
return 1;
}
:: Google::initgooglelogging (argv[0]);
String model_file = Argv[1];
String trained_file = Argv[2];
String mean_file = Argv[3];
String label_file = Argv[4];
Classifier Classifier (Model_file, Trained_file, Mean_file, label_file); Creating classifiers
string file = Argv[5];
Std::cout << "----------prediction for" << file << "----------" << Std::endl;
Cv::mat img = cv::imread (file,-1); Reading the image to be classified
CHECK (!img.empty ()) << "Unable to decode image" << file;
std::vector<prediction> predictions = classifier. Classify (IMG); Classification
/* Print the top N predictions. Print the first n predicted values */
for (size_t i = 0; i < predictions.size (); ++i) {
Prediction p = predictions[i];
Std::cout << std::fixed << std::setprecision (4) << p.second << "-\" "
<< p.first << "\" "<< Std::endl;
}
}
Caffe-trained network for image classification