int8 Use sample_ Depth learning

Source: Internet
Author: User
Tags assert cmath
#include <assert.h> #include <fstream> #include <sstream> #include <iostream> #include < cmath> #include <sys/stat.h> #include <cmath> #include <time.h> #include <cuda_runtime_api.h > #include <unordered_map> #include <algorithm> #include <float.h> #include <string.h> # Include <chrono> #include <iterator> #include "NvInfer.h" #include "NvCaffeParser.h" #include "common.h" #in
Clude "BatchStream.h" #include "LegacyCalibrator.h" using namespace Nvinfer1;

using namespace Nvcaffeparser1;

Static Logger Glogger;
Stuff we know about the network and the Caffe input/output blobs const CHAR* = "Data";
Const char* output_blob_name = "prob";

Const char* GNETWORKNAME{NULLPTR};
    std::string locatefile (const std::string& input) {std::vector<std::string> dirs;
    Dirs.push_back (std::string ("data/int8/") + Gnetworkname + std::string ("/")); Dirs.push_back (std::string ("Data/") + Gnetworkname + std::string ("/"));
return LocateFile (input, dirs); BOOL Caffetogiemodel (const std::string& Deployfile,//name for Caffe Prototxt Const STD::S            tring& Modelfile,//name for model const std::vector<std::string>& outputs, Network outputs unsigned int maxbatchsize,//batch SIZE-NB-must as at least as LA Rge as the batch we want to run with) DataType DataType, iint8calibrator* calibrator, Nvinfer1::ihostmemory *&

    Amp;giemodelstream) {//Create the builder ibuilder* builder = Createinferbuilder (Glogger); Parse the Caffe model to populate the network, then set the outputs inetworkdefinition* = Network
    Enetwork ();

    icaffeparser* parser = Createcaffeparser (); if ((DataType = = Datatype::kint8 &&!builder->platformhasfastint8 ()) | | (DataType = = Datatype::khalf &&!builder->platformHASFASTFP16 ()) return false; Const iblobnametotensor* blobnametotensor = Parser->parse (LocateFile (deployfile). C_str (), LocateFile (ModelFile) . C_STR (), *network, DataType = = Datatype::kint8?

    Datatype::kfloat:datatype); Specify which tensors are outputs for (auto& s:outputs) network->markoutput (*blobnametotensor->

    Find (S.c_str ()));
    Build the engine builder->setmaxbatchsize (maxbatchsize);
    Builder->setmaxworkspacesize (1 << 30);
    Builder->setaveragefinditerations (1);
    Builder->setminfinditerations (1);
    Builder->setdebugsync (TRUE);
    Builder->setint8mode (DataType = = datatype::kint8);
    Builder->sethalf2mode (DataType = = datatype::khalf);

    Builder->setint8calibrator (calibrator);
    icudaengine* engine = Builder->buildcudaengine (*network);

    ASSERT (engine); We don ' t need the network any more, and we can destroy the parser Network->destroy ();

    Parser->destroy ();
    Serialize the engine, then close everything down Giemodelstream = Engine->serialize ();
    Engine->destroy ();
    Builder->destroy ();
return true; Float Doinference (iexecutioncontext& context, float* input, float* output, int batchsize) {Const ICUDAENGINE&A mp
    engine = Context.getengine ();
    Input and output buffer pointers that we pass to the Engine-the engine requires exactly iengine::getnbbindings (),
    Of these, the But in the case we know the there is exactly one input and one output.
    ASSERT (engine.getnbbindings () = = 2);
    void* buffers[2];

    float ms{0.0f};
    In order to bind the buffers, we need to know the names of the input and output tensors. Note This indices are guaranteed to be less than iengine::getnbbindings () int inputindex = Engine.getbindingindex (I

    Nput_blob_name), Outputindex = Engine.getbindingindex (output_blob_name);
  Create GPU buffers and a stream  DIMSCHW inputdims = static_cast<dimschw&&> (Context.getengine (). Getbindingdimensions (
    Context.getengine (). Getbindingindex (Input_blob_name)); DIMSCHW outputdims = static_cast<dimschw&&> (Context.getengine (). Getbindingdimensions (

    Context.getengine (). Getbindingindex (Output_blob_name)); size_t inputsize = BATCHSIZE*INPUTDIMS.C () *inputdims.h () *INPUTDIMS.W () * sizeof (float), outputsize = BatchSize *
    OUTPUTDIMS.C () * OUTPUTDIMS.H () * OUTPUTDIMS.W () * sizeof (float);
    CHECK (Cudamalloc (&buffers[inputindex], inputsize));

    CHECK (Cudamalloc (&buffers[outputindex], outputsize));

    CHECK (cudamemcpy (Buffers[inputindex], input, inputsize, cudamemcpyhosttodevice));
    cudastream_t stream;
    CHECK (Cudastreamcreate (&stream));
    cudaevent_t start, end;
    CHECK (Cudaeventcreatewithflags (&start, Cudaeventblockingsync));
    CHECK (Cudaeventcreatewithflags (&end, Cudaeventblockingsync));
    Cudaeventrecord (start, stream); Context.enqueUE (batchsize, buffers, stream, nullptr);
    Cudaeventrecord (end, stream);
    Cudaeventsynchronize (end);
    Cudaeventelapsedtime (&ms, start, end);
    Cudaeventdestroy (start);

    Cudaeventdestroy (end);
    CHECK (cudamemcpy (output, Buffers[outputindex], outputsize, cudamemcpydevicetohost));
    CHECK (Cudafree (Buffers[inputindex]));
    CHECK (Cudafree (Buffers[outputindex]));
    CHECK (Cudastreamdestroy (stream));
return MS; int Calculatescore (float* batchprob, float* labels, int batchsize, int outputsize, int threshold) {int success =
    0; for (int i = 0; i < batchsize i++) {float* prob = Batchprob + outputsize*i, correct = prob[(int) labels[i]

        ];
        int better = 0;
        for (int j = 0; J < Outputsize; J +) if (Prob[j] >= correct) better++;
    if (better <= threshold) success++;
return success; Class Int8entropycalibrator:public Iint8entropycalibrator {public:int8entRopycalibrator (batchstream& stream, int firstbatch, bool Readcache = True): Mstream (Stream), Mreadcache (READC
        Ache) {DIMSNCHW dims = Mstream.getdims ();
        Minputcount = Mstream.getbatchsize () * DIMS.C () * DIMS.H () * DIMS.W ();
        CHECK (Cudamalloc (&mdeviceinput, Minputcount * sizeof (float));
    Mstream.reset (Firstbatch);
    Virtual ~int8entropycalibrator () {CHECK (Cudafree (mdeviceinput));

    int getbatchsize () const override {return mstream.getbatchsize ();}
            BOOL Getbatch (void* bindings[], const char* names[], int nbbindings) override {if (!mstream.next ())

        return false;
        CHECK (cudamemcpy (Mdeviceinput, Mstream.getbatch (), Minputcount * sizeof (float), cudamemcpyhosttodevice));
        ASSERT (!STRCMP (names[0], input_blob_name));
        Bindings[0] = mdeviceinput;
    return true; Const void* Readcalibrationcache (size_t& length) override {McalibratiOncache.clear ();
        Std::ifstream input (Calibrationtablename (), std::ios::binary);
        Input >> STD::NOSKIPWS; if (Mreadcache && input.good ()) std::copy (std::istream_iterator<char> (input), Std::istream_itera

        Tor<char> (), Std::back_inserter (Mcalibrationcache));
        Length = Mcalibrationcache.size (); return length?
    &mcalibrationcache[0]: nullptr; } void Writecalibrationcache (const void* cache, size_t length) override {Std::ofstream output (calibratio
        Ntablename (), std::ios::binary);
    Output.write (Reinterpret_cast<const char*> (cache), length);
        } private:static std::string Calibrationtablename () {assert (gnetworkname);
    Return std::string ("calibrationtable") + gnetworkname;
    } Batchstream Mstream;

    BOOL mreadcache{true};
    size_t Minputcount;
    void* mdeviceinput{nullptr};
Std::vector<char> Mcalibrationcache;

}; std::p AIR&LT;FLOat, float> scoremodel (int batchsize, int firstbatch, int nbscorebatches, DataType DataType, iint8calibrator* Calibrat
    OR, bool quiet = False) {ihostmemory *giemodelstream{nullptr};
    bool valid = FALSE; if (Gnetworkname = = std::string ("mnist")) valid = Caffetogiemodel ("Deploy.prototxt", "Mnist_lenet.caffemodel", std
    :: Vector < std::string > {output_blob_name}, batchsize, datatype, calibrator, Giemodelstream); else valid = Caffetogiemodel ("Deploy.prototxt", std::string (gnetworkname) + ". Caffemodel", Std::vector < std::s

    Tring > {output_blob_name}, batchsize, datatype, calibrator, Giemodelstream);
        if (!valid) {std::cout << "Engine could not being created at this precision" << Std::endl;
    Return std::p air<float, float> (0,0);
    //Create engine and deserialize model.
    iruntime* infer = Createinferruntime (Glogger); icudaengine* engine = Infer->deserializecudaengine (giemodelstream->Data (), Giemodelstream->size (), nullptr);
    if (Giemodelstream) Giemodelstream->destroy ();

    iexecutioncontext* context = Engine->createexecutioncontext ();
    Batchstream Stream (batchsize, nbscorebatches);

    Stream.skip (Firstbatch); DIMSCHW outputdims = static_cast<dimschw&&> (Context->getengine (). Getbindingdimensions (context-
    >getengine (). Getbindingindex (Output_blob_name));
    int outputsize = OUTPUTDIMS.C () *outputdims.h () *OUTPUTDIMS.W ();
    int top1{0}, top5{0};
    float totaltime{0.0f};

    std::vector<float> Prob (batchsize * outputsize, 0);

        while (Stream.next ()) {totaltime + = Doinference (*context, Stream.getbatch (), &prob[0], batchsize);
        Top1 + + calculatescore (&prob[0], stream.getlabels (), BatchSize, outputsize, 1);

        TOP5 + + calculatescore (&prob[0], stream.getlabels (), BatchSize, Outputsize, 5); Std::cout << (!quiet && stream.getbatchesread ()% 10 = 0? ".": "") << (!quiet && stream.getbatchesread ()% 800 = 0?)
    \ n ":") << Std::flush;
    int imagesread = Stream.getbatchesread () *batchsize;

    float T1 = float (top1)/float (imagesread), T5 = float (top5)/float (imagesread);
        if (!quiet) {std::cout << "\NTOP1: << T1 <<", TOP5: "<< T5 << Std::endl; Std::cout << "Processing" << imagesread << "Images averaged" << Totaltime/imagesread &
    lt;< "Ms/image and" << totaltime/stream.getbatchesread () << "Ms/batch." << Std::endl;
    } Context->destroy ();
    Engine->destroy ();
    Infer->destroy ();
return Std::make_pair (T1, T5); int main (int argc, char** argv) {if (ARGC < 2) {Std::cout << "please provide the network
        As the argument << Std::endl;
    Exit (0);

    } gnetworkname = argv[1]; int batchsize = Firstscorebatch = m, NbSCorebatches = 400; By default we score over 40K images starting at 10000, so we don ' t score those used to search calibration bool Sear
    ch = false;

    Calibrationalgotype Calibrationalgo = calibrationalgotype::kentropy_calibration;  for (int i = 2; i < argc i++) {if (!strncmp (argv[i), "Batch=", 6)) BatchSize = Atoi (Argv[i] +
        6);
        else if (!strncmp (Argv[i], "start=", 6)) Firstscorebatch = Atoi (Argv[i] + 6);
        else if (!strncmp (Argv[i], "score=", 6)) Nbscorebatches = Atoi (Argv[i] + 6);
        else if (!strncmp (Argv[i], "search", 6)) search = true;
        else if (!strncmp (Argv[i], "legacy", 6)) Calibrationalgo = calibrationalgotype::klegacy_calibration;
            else {std::cout << "unrecognized argument" << argv[i] << Std::endl;
        Exit (0);
      } if (Calibrationalgo = = calibrationalgotype::kentropy_calibration) {  Search = false;
        } if (BatchSize > 128) {std::cout << "Please provide batch size <= 128" << Std::endl;
    Exit (0); } if ((Firstscorebatch + nbscorebatches) *batchsize > 500000) {std::cout << "only 50000 images A
        vailable "<< Std::endl;
    Exit (0);


    } std::cout.precision (6);

    Batchstream Calibrationstream (cal_batch_size, nb_cal_batches); Std::cout << "\nfp32 run:" << nbscorebatches << "Batches of size" << batchsize << "Starti
    ng at "<< Firstscorebatch << Std::endl;

    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kfloat, nullptr); Std::cout << "\nfp16 run:" << nbscorebatches << "Batches of size" << batchsize << "Starti
    ng at "<< Firstscorebatch << Std::endl;

    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::khalf, nullptr); Std::cout << "\ninT8 run: "<< nbscorebatches <<" Batches of size "<< batchsize <<" starting at "<< FIRSTSC
    Orebatch << Std::endl; if (Calibrationalgo = = calibrationalgotype::kentropy_calibration) {int8entropycalibrator calibrator (Calibrati
        OnStream, First_cal_batch);
    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kint8, &calibrator);
        else {std::p air<double, double> parameters = Getquantileandcutoff (gnetworkname, search);
        Int8legacycalibrator calibrator (Calibrationstream, First_cal_batch, Parameters.first, Parameters.second);
    Scoremodel (BatchSize, Firstscorebatch, Nbscorebatches, Datatype::kint8, &calibrator);
    } shutdownprotobuflibrary ();
return 0; }

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.