"Mxnet gluon" training SSD detection model based on breed classification data set of Stanford Dog

Last Update:2018-07-24 Source: Internet

Author: User

Tags mxnet

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

The data and models used in this article can be downloaded from the CSDN resource page.
Link:
Network definition File
LST files for data linking and testing
This article mainly to the original code to organize, facilitate the call and training.
The main reference to the Gluon SSD example. 1. SSD Network Model definition

ssd.py

Import mxnet as MX import matplotlib.pyplot as PLT import Os.path as OSP import mxnet.image as image from mxnet import Glu On from mxnet import nd to Mxnet.contrib.ndarray import multiboxprior from mxnet.gluon import nn def box_to_rect (box, C Olor, linewidth=3): "" "Convert an anchor box to a matplotlib rectangle" "box = Box.asnumpy () return PLT. Rectangle ((Box[0], box[1]), (box[2]-box[0), (box[3]-box[1)), Fill=false, Edgecolor=color, Linewidth=line width) def class_predictor (Num_anchors, num_classes): "" "return a layer to predict classes" "" Return NN. conv2d (Num_anchors * (num_classes + 1), 3, padding=1) def box_predictor (num_anchors): "" "return a layer to predict Del TA locations "" "return NN.  Conv2d (Num_anchors * 4, 3, padding=1) def down_sample (num_filters): "" Stack two Conv-batchnorm-relu blocks and then a Pooling layer to halve the feature size "" Out = nn. Hybridsequential () for _ in range (2): Out.add (NN. Conv2d (nuM_filters, 3, Strides=1, padding=1) out.add (NN. Batchnorm (in_channels=num_filters)) Out.add (NN. Activation (' Relu ')) Out.add (NN. MAXPOOL2D (2)) return off Def flatten_prediction (pred): Return Nd.flatten (Nd.transpose (pred, axes= (0, 2, 3, 1)) de F concat_predictions (Preds): Return to Nd.concat (*preds, dim=1) def body (): "" "" "" Return to the Body network "" "Out = N N.hybridsequential () for nfilters in [()]: Out.add (Down_sample (nfilters)) return out Def toy_ssd_m Odel (Num_anchors, num_classes): "" "Return SSD modules" "Downsamples = nn. Sequential () Downsamples.add (Down_sample (128)) Downsamples.add (Down_sample (128)) Downsamples.add (Down_sample (1 )) Class_preds = nn. Sequential () Box_preds = nn. Sequential () for scale in range (5): Class_preds.add (Class_predictor (num_anchors, num_classes)) Box_pre Ds.add (Box_predictor (num_anchors)) return to Body (), Downsamples, Class_preds, Box_preds def TOY_SSD_FORward (x, Body, Downsamples, class_preds, box_preds, sizes, ratios): # Extract feature and the body network X = bo DY (x) # for each scale, add anchors, box and class predictions, # then compute ' input to next scale default _anchors = [] predicted_boxes = [] predicted_classes = [] for I in range (5): Default_anchors.append (Mu
        Ltiboxprior (x, Sizes=sizes[i], ratios=ratios[i]) predicted_boxes.append (Flatten_prediction (Box_preds[i) (x)) 
        Predicted_classes.append (Flatten_prediction (Class_preds[i] (x)) If I < 3:x = Downsamples[i] (x) elif i = = 3: # Simply use the pooling layer x = nd. Pooling (x, Global_pool=true, pool_type= ' Max ', kernel= (4, 4)) return default_anchors, predicted_classes, Predicted_boxe S # SETP class TOYSSD (gluon.  Block): Def __init__ (self, num_classes, **kwargs): Super (TOYSSD, self). __init__ (**kwargs) # anchor Box
   Sizes for 4 feature scales     self.anchor_sizes = [[. 2,. 272], [. Notoginseng,. 447], [.,. 619], [.,.], [. A. 961]] # anchor box ratios for 4 Feature scales Self.anchor_ratios = [[1, 2,. 5]] * 5 self.num_classes = num_classes with Self.nam E_scope (): Self.body, Self.downsamples, self.class_preds, self.box_preds = Toy_ssd_model (4, num_classes) d EF forward (self, x): Default_anchors, predicted_classes, predicted_boxes = Toy_ssd_forward (x, Self.body, Self.down Samples, self.class_preds, Self.box_preds, self.anchor_sizes, Self.anchor_ratios) # we want to Concate Nate anchors, class predictions, box predictions from different layers anchors = concat_predictions (default_anchor
        s) box_preds = Concat_predictions (predicted_boxes) class_preds = concat_predictions (predicted_classes) # It is better to have class predictions reshaped for softmax computation class_preds = Nd.reshape (class_pred S, shape= (0,-1, self.nuM_classes + 1)) return anchors, Class_preds, box_preds from Mxnet.contrib.ndarray import Multiboxtarget def train
    Ing_targets (default_anchors, class_predicts, labels): class_predicts = Nd.transpose (class_predicts, axes= (0, 2, 1)) z = multiboxtarget (*[default_anchors, labels, class_predicts]) Box_target = z[0] # box offset target for (x, Y, wid
    Th, height) box_mask = z[1] # mask is used to ignore box offsets we don ' t want to penalize, e.g. negative samples Cls_target = z[2] # Cls_target is a array of labels for all anchors boxes return box_target, Box_mask, Cls_target C Lass Focalloss (gluon.loss.Loss): Def __init__ (self, axis=-1, alpha=0.25, gamma=2, Batch_axis=0, **kwargs): Sup ER (Focalloss, self). __init__ (None, Batch_axis, **kwargs) self._axis = Axis Self._alpha = Alpha Sel F._gamma = Gamma def hybrid_forward (self, F, output, label): output = F.softmax (output) pt = F.pick (o Utput, label, Axis=self._axis, keepdims=true) loss =-self._alpha * ((1-PT) * self._gamma) * F.log (PT) Return F.mean (loss, axis
        =self._batch_axis, Exclude=true) class Smoothl1loss (Gluon.loss.Loss): Def __init__ (self, batch_axis=0, **kwargs):
        Super (Smoothl1loss, self). __init__ (None, Batch_axis, **kwargs) def hybrid_forward (self, F, output, label, mask): Loss = F.SMOOTH_L1 (output-label) * mask, scalar=1.0) return F.mean (loss, Self._batch_axis, exclude=true Import NumPy as NP import Cv2 def preprocess (image): "" "takes an image and apply Preprocess" "" # Resize to Data_ Shape #image = cv2.resize (image, (Data_shape, Data_shape)) # swap BGR to RGB #mask =np.zeros ((Max (Image.shape), Max (Image.shape), 3)) #mask [(Mask.shape[0]-image.shape[0])/2: (Mask.shape[0]+image.shape[0])/2, (mask.shape[1]- IMAGE.SHAPE[1])/2: (Mask.shape[1]+image.shape[1])/2,:]=image #image =cv2.resize (Mask, (256,256)) image = image[:,:,
    (2, 1, 0)] # Convert to floatBefore subtracting mean image = Image.astype (np.float32) # subtract mean image-= Np.array ([128, 128, 128])
    # organize as [batch-channel-height-width] image = Np.transpose (image, (2, 0, 1)) image = Image[np.newaxis,:]
    # convert to Ndarray image = Nd.array (image) return image def display (IMG, out, thresh=0.5): Import random Import matplotlib as Mpl mpl.rcparams[' figure.figsize '] = (10,10) pens = Dict () PLT.CLF () plt.imshow (i
        MG) for det in out:cid = int (det[0)) if CID < 0:continue score = det[1] If score < thresh:continue if CID not in pens:pens[cid] = (random.random (), random  . Random (), random.random ()) scales = [img.shape[1], img.shape[0]] * 2 xmin, ymin, xmax, ymax = [INT (p * s) For P, s in Zip (Det[2:6].tolist (), scales)] rect = Plt.
                    Rectangle (xmin, ymin), Xmax-xmin, Ymax-ymin, Fill=false,         EDGECOLOR=PENS[CID], linewidth=3) PLT.GCA (). Add_patch (rect) text = Class_names[cid] Plt.g CA (). Text (xmin, Ymin-2, ' {: s} {:. 3f} '. Format (text, score), Bbox=dict (facecolor=pens[cid), alpha=0.5 ), fontsize=12, color= ' White ' #plt. Show () plt.save (' result.jpg ')

2. Data loading and model training

model.py

From SSDs Import * Import time from mxnet import Autograd as ag from Mxnet.contrib.ndarray import multiboxdetection Import Logging head = '% (asctime) -15s% (message) s ' Logging.basicconfig (level=logging. DEBUG, format=head) logger = Logging.getlogger () logger.setlevel (logging. DEBUG) fh = logging. Filehandler ("Log.txt") fh.setlevel (logging. DEBUG) Logger.addhandler (FH) def train (net,start_epoch,epochs,lr,ctx): Box_loss = Smoothl1loss () Cls_loss = Focall
    OSS () Cls_metric = Mx.metric.Accuracy () Box_metric = Mx.metric.MAE () Data_shape = 256 Batch_size = 64 Train_data, Test_data, class_names, Num_class = Get_iterators (Data_shape, Batch_size) Train_data.reshape (label_shape=
    (6, 5)) Train_data = Test_data.sync_label_shape (train_data) trainer = Gluon. Trainer (Net.collect_params (), ' sgd ', {' learning_rate ': LR, ' WD ': 0.0005}) Log_interval = for epoch in range (St
Art_epoch, Epochs+start_epoch): # Reset iterator and Tick train_data.reset ()        Cls_metric.reset () box_metric.reset () tic = Time.time () # Iterate through all batch For I, batch in enumerate (train_data): Btic = Time.time () # Record gradients with AG
                . Record (): x = Batch.data[0].as_in_context (ctx) y = Batch.label[0].as_in_context (CTX) Default_anchors, class_predictions, box_predictions = Net (x) Box_target, Box_mask, Cls_target = Training_targets (Default_anchors, Class_predictions, y) # Losses Loss1 = Cls_loss (class_  Predictions, cls_target) Loss2 = Box_loss (box_predictions, Box_target, box_mask) # sum all
            Losses loss = loss1*0.1 + loss2 # backpropagate Loss.backward () # Apply Trainer.step (batch_size) # Update Metrics cls_metric.update ([Cls_target], [ Nd.transpose (Class_prediCtions, (0, 2, 1))] Box_metric.update ([Box_target], [box_predictions * Box_mask]) if (i + 1)% lo
                G_interval = = 0:name1, Val1 = Cls_metric.get () name2, val2 = Box_metric.get () Print (' [Epoch%d Batch%d] speed:%f samples/s, training:%s=%f,%s=%f '% (Epoch, I, batch_size/
        (Time.time ()-btic), name1, Val1, name2, Val2)) # End of Epoch logging-name1, Val1 = Cls_metric.get ()
        name2, Val2 = Box_metric.get () print (' [Epoch%d] training:%s=%f,%s=%f '% (Epoch, name1, Val1, name2, Val2)) Print (' [Epoch%d] time cost:%f '% (Epoch, time.time ()-tic)) valid (NET,TEST_DATA,EPOCH,CTX) # we can save T He trained parameters to disk net.save_params (' ssd_%d.params '% (epochs+start_epoch)) def valid (net,valid_data,epoch,c TX): Cls_metric = mx.metric.Accuracy () Box_metric = Mx.metric.MAE () # Reset iterator and tick valid_data. Reset () CLS_METRIC.RESET () box_metric.reset () tic = Time.time () # Iterate through all batch to I, batch in enumerate (Valid_da TA): With Ag.record (): x = Batch.data[0].as_in_context (ctx) y = Batch.label[0].as_in_conte XT (CTX) default_anchors, class_predictions, box_predictions = Net (x) Box_target, Box_mask, Cls_tar get = Training_targets (Default_anchors, Class_predictions, y) # Update metrics cls_metric.update ([Cls_targ
    ET], [Nd.transpose (Class_predictions, (0, 2, 1)]) box_metric.update ([Box_target], [box_predictions * Box_mask])  name1, Val1 = Cls_metric.get () name2, val2 = Box_metric.get () print (' [Epoch%d] training:%s=%f,%s=%f '% (Epoch, Name1, Val1, name2, val2) print (' [Epoch%d] time cost:%f '% (Epoch, time.time ()-tic)) def test (net,ctx): image = Cv2.imread ('/home/muyouhang/dog/data/normalsize/n02085620_242.jpg ') x = preprocess (image) anchors, cls_preds, box_ Preds = Net (x.as_in_contExt (CTX)) cls_probs = nd. Softmaxactivation (Nd.transpose (Cls_preds, (0, 2, 1)), mode= ' channel ') output = Multiboxdetection (*[cls_probs, box_pred s, anchors], force_suppress=true, clip=false) print (Output[0].asnumpy ()) #display (image[:,:, (2, 1, 0)], output[0 ].asnumpy (), thresh=0.3) def get_iterators (Data_shape, batch_size): Class_names = [' dog '] num_class = Len (class_na MES) Train_iter = image. Imagedetiter (Batch_size=batch_size, data_shape= (3, Data_shape, Data_shape), path_imglist= '/home/m Uyouhang/dog/data/train.lst ', path_root= '/home/muyouhang/dog/data/normalsize/', mean=true) Val_iter = Image. Imagedetiter (Batch_size=batch_size, data_shape= (3, Data_shape, Data_shape), path_imglist= '/home/m Uyouhang/dog/data/valid.lst ', path_root= '/home/muyouhang/dog/data/normalsize/', mean=true) return Trai

 N_iter, Val_iter, Class_names, Num_class

3. Training

train.py

Import sys
import numpy as NP
import logging from
model Import * from
SSD import *
start_iter=int (sys . argv[1])
epochs=int (sys.argv[2])
lr = float (sys.argv[3))
finetune = Int (sys.argv[4]) model_name=

' SSD '
mx.random.seed (1)
Ctx=mx.gpu (1)
if finetune ==1:
    net = TOYSSD (1)
    Net.load_params ( model_name+ ' _%d.params '% (start_iter), CTX)
    train (NET, epochs=epochs,lr=lr,start_epoch=start_iter,ctx=ctx)
elif finetune ==-1:
    net = TOYSSD (1)
    net.load_params (model_name+ ' _%d.params '% (start_iter), CTX)
    Test (NET,CTX=CTX)
else:
    net = TOYSSD (1)
    net.initialize (Mx.init.Xavier (magnitude=2), Ctx=ctx)
    net.collect_params (). Reset_ctx (CTX)
    train (NET, epochs=epochs,lr=lr,start_epoch=start_iter,ctx=ctx)

4. Test Effect Chart

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More