int8 Use sample_ Depth learning

Last Update:2018-08-22 Source: Internet

Author: User

Tags assert cmath

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

#include <assert.h> #include <fstream> #include <sstream> #include <iostream> #include < cmath> #include <sys/stat.h> #include <cmath> #include <time.h> #include <cuda_runtime_api.h > #include <unordered_map> #include <algorithm> #include <float.h> #include <string.h> # Include <chrono> #include <iterator> #include "NvInfer.h" #include "NvCaffeParser.h" #include "common.h" #in
Clude "BatchStream.h" #include "LegacyCalibrator.h" using namespace Nvinfer1;

using namespace Nvcaffeparser1;

Static Logger Glogger;
Stuff we know about the network and the Caffe input/output blobs const CHAR* = "Data";
Const char* output_blob_name = "prob";

Const char* GNETWORKNAME{NULLPTR};
    std::string locatefile (const std::string& input) {std::vector<std::string> dirs;
    Dirs.push_back (std::string ("data/int8/") + Gnetworkname + std::string ("/")); Dirs.push_back (std::string ("Data/") + Gnetworkname + std::string ("/"));
return LocateFile (input, dirs); BOOL Caffetogiemodel (const std::string& Deployfile,//name for Caffe Prototxt Const STD::S            tring& Modelfile,//name for model const std::vector<std::string>& outputs, Network outputs unsigned int maxbatchsize,//batch SIZE-NB-must as at least as LA Rge as the batch we want to run with) DataType DataType, iint8calibrator* calibrator, Nvinfer1::ihostmemory *&

    Amp;giemodelstream) {//Create the builder ibuilder* builder = Createinferbuilder (Glogger); Parse the Caffe model to populate the network, then set the outputs inetworkdefinition* = Network
    Enetwork ();

    icaffeparser* parser = Createcaffeparser (); if ((DataType = = Datatype::kint8 &&!builder->platformhasfastint8 ()) | | (DataType = = Datatype::khalf &&!builder->platformHASFASTFP16 ()) return false; Const iblobnametotensor* blobnametotensor = Parser->parse (LocateFile (deployfile). C_str (), LocateFile (ModelFile) . C_STR (), *network, DataType = = Datatype::kint8?

    Datatype::kfloat:datatype); Specify which tensors are outputs for (auto& s:outputs) network->markoutput (*blobnametotensor->

    Find (S.c_str ()));
    Build the engine builder->setmaxbatchsize (maxbatchsize);
    Builder->setmaxworkspacesize (1 << 30);
    Builder->setaveragefinditerations (1);
    Builder->setminfinditerations (1);
    Builder->setdebugsync (TRUE);
    Builder->setint8mode (DataType = = datatype::kint8);
    Builder->sethalf2mode (DataType = = datatype::khalf);

    Builder->setint8calibrator (calibrator);
    icudaengine* engine = Builder->buildcudaengine (*network);

    ASSERT (engine); We don ' t need the network any more, and we can destroy the parser Network->destroy ();

    Parser->destroy ();
    Serialize the engine, then close everything down Giemodelstream = Engine->serialize ();
    Engine->destroy ();
    Builder->destroy ();
return true; Float Doinference (iexecutioncontext& context, float* input, float* output, int batchsize) {Const ICUDAENGINE&A mp
    engine = Context.getengine ();
    Input and output buffer pointers that we pass to the Engine-the engine requires exactly iengine::getnbbindings (),
    Of these, the But in the case we know the there is exactly one input and one output.
    ASSERT (engine.getnbbindings () = = 2);
    void* buffers[2];

    float ms{0.0f};
    In order to bind the buffers, we need to know the names of the input and output tensors. Note This indices are guaranteed to be less than iengine::getnbbindings () int inputindex = Engine.getbindingindex (I

    Nput_blob_name), Outputindex = Engine.getbindingindex (output_blob_name);
  Create GPU buffers and a stream  DIMSCHW inputdims = static_cast<dimschw&&> (Context.getengine (). Getbindingdimensions (
    Context.getengine (). Getbindingindex (Input_blob_name)); DIMSCHW outputdims = static_cast<dimschw&&> (Context.getengine (). Getbindingdimensions (

    Context.getengine (). Getbindingindex (Output_blob_name)); size_t inputsize = BATCHSIZE*INPUTDIMS.C () *inputdims.h () *INPUTDIMS.W () * sizeof (float), outputsize = BatchSize *
    OUTPUTDIMS.C () * OUTPUTDIMS.H () * OUTPUTDIMS.W () * sizeof (float);
    CHECK (Cudamalloc (&buffers[inputindex], inputsize));

    CHECK (Cudamalloc (&buffers[outputindex], outputsize));

    CHECK (cudamemcpy (Buffers[inputindex], input, inputsize, cudamemcpyhosttodevice));
    cudastream_t stream;
    CHECK (Cudastreamcreate (&stream));
    cudaevent_t start, end;
    CHECK (Cudaeventcreatewithflags (&start, Cudaeventblockingsync));
    CHECK (Cudaeventcreatewithflags (&end, Cudaeventblockingsync));
    Cudaeventrecord (start, stream); Context.enqueUE (batchsize, buffers, stream, nullptr);
    Cudaeventrecord (end, stream);
    Cudaeventsynchronize (end);
    Cudaeventelapsedtime (&ms, start, end);
    Cudaeventdestroy (start);

    Cudaeventdestroy (end);
    CHECK (cudamemcpy (output, Buffers[outputindex], outputsize, cudamemcpydevicetohost));
    CHECK (Cudafree (Buffers[inputindex]));
    CHECK (Cudafree (Buffers[outputindex]));
    CHECK (Cudastreamdestroy (stream));
return MS; int Calculatescore (float* batchprob, float* labels, int batchsize, int outputsize, int threshold) {int success =
    0; for (int i = 0; i < batchsize i++) {float* prob = Batchprob + outputsize*i, correct = prob[(int) labels[i]

        ];
        int better = 0;
        for (int j = 0; J < Outputsize; J +) if (Prob[j] >= correct) better++;
    if (better <= threshold) success++;
return success; Class Int8entropycalibrator:public Iint8entropycalibrator {public:int8entRopycalibrator (batchstream& stream, int firstbatch, bool Readcache = True): Mstream (Stream), Mreadcache (READC
        Ache) {DIMSNCHW dims = Mstream.getdims ();
        Minputcount = Mstream.getbatchsize () * DIMS.C () * DIMS.H () * DIMS.W ();
        CHECK (Cudamalloc (&mdeviceinput, Minputcount * sizeof (float));
    Mstream.reset (Firstbatch);
    Virtual ~int8entropycalibrator () {CHECK (Cudafree (mdeviceinput));

    int getbatchsize () const override {return mstream.getbatchsize ();}
            BOOL Getbatch (void* bindings[], const char* names[], int nbbindings) override {if (!mstream.next ())

        return false;
        CHECK (cudamemcpy (Mdeviceinput, Mstream.getbatch (), Minputcount * sizeof (float), cudamemcpyhosttodevice));
        ASSERT (!STRCMP (names[0], input_blob_name));
        Bindings[0] = mdeviceinput;
    return true; Const void* Readcalibrationcache (size_t& length) override {McalibratiOncache.clear ();
        Std::ifstream input (Calibrationtablename (), std::ios::binary);
        Input >> STD::NOSKIPWS; if (Mreadcache && input.good ()) std::copy (std::istream_iterator<char> (input), Std::istream_itera

        Tor<char> (), Std::back_inserter (Mcalibrationcache));
        Length = Mcalibrationcache.size (); return length?
    &mcalibrationcache[0]: nullptr; } void Writecalibrationcache (const void* cache, size_t length) override {Std::ofstream output (calibratio
        Ntablename (), std::ios::binary);
    Output.write (Reinterpret_cast<const char*> (cache), length);
        } private:static std::string Calibrationtablename () {assert (gnetworkname);
    Return std::string ("calibrationtable") + gnetworkname;
    } Batchstream Mstream;

    BOOL mreadcache{true};
    size_t Minputcount;
    void* mdeviceinput{nullptr};
Std::vector<char> Mcalibrationcache;

}; std::p AIR&LT;FLOat, float> scoremodel (int batchsize, int firstbatch, int nbscorebatches, DataType DataType, iint8calibrator* Calibrat
    OR, bool quiet = False) {ihostmemory *giemodelstream{nullptr};
    bool valid = FALSE; if (Gnetworkname = = std::string ("mnist")) valid = Caffetogiemodel ("Deploy.prototxt", "Mnist_lenet.caffemodel", std
    :: Vector < std::string > {output_blob_name}, batchsize, datatype, calibrator, Giemodelstream); else valid = Caffetogiemodel ("Deploy.prototxt", std::string (gnetworkname) + ". Caffemodel", Std::vector < std::s

    Tring > {output_blob_name}, batchsize, datatype, calibrator, Giemodelstream);
        if (!valid) {std::cout << "Engine could not being created at this precision" << Std::endl;
    Return std::p air<float, float> (0,0);
    //Create engine and deserialize model.
    iruntime* infer = Createinferruntime (Glogger); icudaengine* engine = Infer->deserializecudaengine (giemodelstream->Data (), Giemodelstream->size (), nullptr);
    if (Giemodelstream) Giemodelstream->destroy ();

    iexecutioncontext* context = Engine->createexecutioncontext ();
    Batchstream Stream (batchsize, nbscorebatches);

    Stream.skip (Firstbatch); DIMSCHW outputdims = static_cast<dimschw&&> (Context->getengine (). Getbindingdimensions (context-
    >getengine (). Getbindingindex (Output_blob_name));
    int outputsize = OUTPUTDIMS.C () *outputdims.h () *OUTPUTDIMS.W ();
    int top1{0}, top5{0};
    float totaltime{0.0f};

    std::vector<float> Prob (batchsize * outputsize, 0);

        while (Stream.next ()) {totaltime + = Doinference (*context, Stream.getbatch (), &prob[0], batchsize);
        Top1 + + calculatescore (&prob[0], stream.getlabels (), BatchSize, outputsize, 1);

        TOP5 + + calculatescore (&prob[0], stream.getlabels (), BatchSize, Outputsize, 5); Std::cout << (!quiet && stream.getbatchesread ()% 10 = 0? ".": "") << (!quiet && stream.getbatchesread ()% 800 = 0?)
    \ n ":") << Std::flush;
    int imagesread = Stream.getbatchesread () *batchsize;

    float T1 = float (top1)/float (imagesread), T5 = float (top5)/float (imagesread);
        if (!quiet) {std::cout << "\NTOP1: << T1 <<", TOP5: "<< T5 << Std::endl; Std::cout << "Processing" << imagesread << "Images averaged" << Totaltime/imagesread &
    lt;< "Ms/image and" << totaltime/stream.getbatchesread () << "Ms/batch." << Std::endl;
    } Context->destroy ();
    Engine->destroy ();
    Infer->destroy ();
return Std::make_pair (T1, T5); int main (int argc, char** argv) {if (ARGC < 2) {Std::cout << "please provide the network
        As the argument << Std::endl;
    Exit (0);

    } gnetworkname = argv[1]; int batchsize = Firstscorebatch = m, NbSCorebatches = 400; By default we score over 40K images starting at 10000, so we don ' t score those used to search calibration bool Sear
    ch = false;

    Calibrationalgotype Calibrationalgo = calibrationalgotype::kentropy_calibration;  for (int i = 2; i < argc i++) {if (!strncmp (argv[i), "Batch=", 6)) BatchSize = Atoi (Argv[i] +
        6);
        else if (!strncmp (Argv[i], "start=", 6)) Firstscorebatch = Atoi (Argv[i] + 6);
        else if (!strncmp (Argv[i], "score=", 6)) Nbscorebatches = Atoi (Argv[i] + 6);
        else if (!strncmp (Argv[i], "search", 6)) search = true;
        else if (!strncmp (Argv[i], "legacy", 6)) Calibrationalgo = calibrationalgotype::klegacy_calibration;
            else {std::cout << "unrecognized argument" << argv[i] << Std::endl;
        Exit (0);
      } if (Calibrationalgo = = calibrationalgotype::kentropy_calibration) {  Search = false;
        } if (BatchSize > 128) {std::cout << "Please provide batch size <= 128" << Std::endl;
    Exit (0); } if ((Firstscorebatch + nbscorebatches) *batchsize > 500000) {std::cout << "only 50000 images A
        vailable "<< Std::endl;
    Exit (0);


    } std::cout.precision (6);

    Batchstream Calibrationstream (cal_batch_size, nb_cal_batches); Std::cout << "\nfp32 run:" << nbscorebatches << "Batches of size" << batchsize << "Starti
    ng at "<< Firstscorebatch << Std::endl;

    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kfloat, nullptr); Std::cout << "\nfp16 run:" << nbscorebatches << "Batches of size" << batchsize << "Starti
    ng at "<< Firstscorebatch << Std::endl;

    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::khalf, nullptr); Std::cout << "\ninT8 run: "<< nbscorebatches <<" Batches of size "<< batchsize <<" starting at "<< FIRSTSC
    Orebatch << Std::endl; if (Calibrationalgo = = calibrationalgotype::kentropy_calibration) {int8entropycalibrator calibrator (Calibrati
        OnStream, First_cal_batch);
    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kint8, &calibrator);
        else {std::p air<double, double> parameters = Getquantileandcutoff (gnetworkname, search);
        Int8legacycalibrator calibrator (Calibrationstream, First_cal_batch, Parameters.first, Parameters.second);
    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kint8, &calibrator);
    } shutdownprotobuflibrary ();
return 0; }

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More