The data and models used in this article can be downloaded from the CSDN resource page.
Link:
Network definition File
LST files for data linking and testing
This article mainly to the original code to organize, facilitate the call and training.
The main reference to the Gluon SSD example. 1. SSD Network Model definition
ssd.py
Import mxnet as MX import matplotlib.pyplot as PLT import Os.path as OSP import mxnet.image as image from mxnet import Glu On from mxnet import nd to Mxnet.contrib.ndarray import multiboxprior from mxnet.gluon import nn def box_to_rect (box, C Olor, linewidth=3): "" "Convert an anchor box to a matplotlib rectangle" "box = Box.asnumpy () return PLT. Rectangle ((Box[0], box[1]), (box[2]-box[0), (box[3]-box[1)), Fill=false, Edgecolor=color, Linewidth=line width) def class_predictor (Num_anchors, num_classes): "" "return a layer to predict classes" "" Return NN. conv2d (Num_anchors * (num_classes + 1), 3, padding=1) def box_predictor (num_anchors): "" "return a layer to predict Del TA locations "" "return NN. Conv2d (Num_anchors * 4, 3, padding=1) def down_sample (num_filters): "" Stack two Conv-batchnorm-relu blocks and then a Pooling layer to halve the feature size "" Out = nn. Hybridsequential () for _ in range (2): Out.add (NN. Conv2d (nuM_filters, 3, Strides=1, padding=1) out.add (NN. Batchnorm (in_channels=num_filters)) Out.add (NN. Activation (' Relu ')) Out.add (NN. MAXPOOL2D (2)) return off Def flatten_prediction (pred): Return Nd.flatten (Nd.transpose (pred, axes= (0, 2, 3, 1)) de F concat_predictions (Preds): Return to Nd.concat (*preds, dim=1) def body (): "" "" "" Return to the Body network "" "Out = N N.hybridsequential () for nfilters in [()]: Out.add (Down_sample (nfilters)) return out Def toy_ssd_m Odel (Num_anchors, num_classes): "" "Return SSD modules" "Downsamples = nn. Sequential () Downsamples.add (Down_sample (128)) Downsamples.add (Down_sample (128)) Downsamples.add (Down_sample (1 )) Class_preds = nn. Sequential () Box_preds = nn. Sequential () for scale in range (5): Class_preds.add (Class_predictor (num_anchors, num_classes)) Box_pre Ds.add (Box_predictor (num_anchors)) return to Body (), Downsamples, Class_preds, Box_preds def TOY_SSD_FORward (x, Body, Downsamples, class_preds, box_preds, sizes, ratios): # Extract feature and the body network X = bo DY (x) # for each scale, add anchors, box and class predictions, # then compute ' input to next scale default _anchors = [] predicted_boxes = [] predicted_classes = [] for I in range (5): Default_anchors.append (Mu
Ltiboxprior (x, Sizes=sizes[i], ratios=ratios[i]) predicted_boxes.append (Flatten_prediction (Box_preds[i) (x))
Predicted_classes.append (Flatten_prediction (Class_preds[i] (x)) If I < 3:x = Downsamples[i] (x) elif i = = 3: # Simply use the pooling layer x = nd. Pooling (x, Global_pool=true, pool_type= ' Max ', kernel= (4, 4)) return default_anchors, predicted_classes, Predicted_boxe S # SETP class TOYSSD (gluon. Block): Def __init__ (self, num_classes, **kwargs): Super (TOYSSD, self). __init__ (**kwargs) # anchor Box
Sizes for 4 feature scales self.anchor_sizes = [[. 2,. 272], [. Notoginseng,. 447], [.,. 619], [.,.], [. A. 961]] # anchor box ratios for 4 Feature scales Self.anchor_ratios = [[1, 2,. 5]] * 5 self.num_classes = num_classes with Self.nam E_scope (): Self.body, Self.downsamples, self.class_preds, self.box_preds = Toy_ssd_model (4, num_classes) d EF forward (self, x): Default_anchors, predicted_classes, predicted_boxes = Toy_ssd_forward (x, Self.body, Self.down Samples, self.class_preds, Self.box_preds, self.anchor_sizes, Self.anchor_ratios) # we want to Concate Nate anchors, class predictions, box predictions from different layers anchors = concat_predictions (default_anchor
s) box_preds = Concat_predictions (predicted_boxes) class_preds = concat_predictions (predicted_classes) # It is better to have class predictions reshaped for softmax computation class_preds = Nd.reshape (class_pred S, shape= (0,-1, self.nuM_classes + 1)) return anchors, Class_preds, box_preds from Mxnet.contrib.ndarray import Multiboxtarget def train
Ing_targets (default_anchors, class_predicts, labels): class_predicts = Nd.transpose (class_predicts, axes= (0, 2, 1)) z = multiboxtarget (*[default_anchors, labels, class_predicts]) Box_target = z[0] # box offset target for (x, Y, wid
Th, height) box_mask = z[1] # mask is used to ignore box offsets we don ' t want to penalize, e.g. negative samples Cls_target = z[2] # Cls_target is a array of labels for all anchors boxes return box_target, Box_mask, Cls_target C Lass Focalloss (gluon.loss.Loss): Def __init__ (self, axis=-1, alpha=0.25, gamma=2, Batch_axis=0, **kwargs): Sup ER (Focalloss, self). __init__ (None, Batch_axis, **kwargs) self._axis = Axis Self._alpha = Alpha Sel F._gamma = Gamma def hybrid_forward (self, F, output, label): output = F.softmax (output) pt = F.pick (o Utput, label, Axis=self._axis, keepdims=true) loss =-self._alpha * ((1-PT) * self._gamma) * F.log (PT) Return F.mean (loss, axis
=self._batch_axis, Exclude=true) class Smoothl1loss (Gluon.loss.Loss): Def __init__ (self, batch_axis=0, **kwargs):
Super (Smoothl1loss, self). __init__ (None, Batch_axis, **kwargs) def hybrid_forward (self, F, output, label, mask): Loss = F.SMOOTH_L1 (output-label) * mask, scalar=1.0) return F.mean (loss, Self._batch_axis, exclude=true Import NumPy as NP import Cv2 def preprocess (image): "" "takes an image and apply Preprocess" "" # Resize to Data_ Shape #image = cv2.resize (image, (Data_shape, Data_shape)) # swap BGR to RGB #mask =np.zeros ((Max (Image.shape), Max (Image.shape), 3)) #mask [(Mask.shape[0]-image.shape[0])/2: (Mask.shape[0]+image.shape[0])/2, (mask.shape[1]- IMAGE.SHAPE[1])/2: (Mask.shape[1]+image.shape[1])/2,:]=image #image =cv2.resize (Mask, (256,256)) image = image[:,:,
(2, 1, 0)] # Convert to floatBefore subtracting mean image = Image.astype (np.float32) # subtract mean image-= Np.array ([128, 128, 128])
# organize as [batch-channel-height-width] image = Np.transpose (image, (2, 0, 1)) image = Image[np.newaxis,:]
# convert to Ndarray image = Nd.array (image) return image def display (IMG, out, thresh=0.5): Import random Import matplotlib as Mpl mpl.rcparams[' figure.figsize '] = (10,10) pens = Dict () PLT.CLF () plt.imshow (i
MG) for det in out:cid = int (det[0)) if CID < 0:continue score = det[1] If score < thresh:continue if CID not in pens:pens[cid] = (random.random (), random . Random (), random.random ()) scales = [img.shape[1], img.shape[0]] * 2 xmin, ymin, xmax, ymax = [INT (p * s) For P, s in Zip (Det[2:6].tolist (), scales)] rect = Plt.
Rectangle (xmin, ymin), Xmax-xmin, Ymax-ymin, Fill=false, EDGECOLOR=PENS[CID], linewidth=3) PLT.GCA (). Add_patch (rect) text = Class_names[cid] Plt.g CA (). Text (xmin, Ymin-2, ' {: s} {:. 3f} '. Format (text, score), Bbox=dict (facecolor=pens[cid), alpha=0.5 ), fontsize=12, color= ' White ' #plt. Show () plt.save (' result.jpg ')
2. Data loading and model training
model.py
From SSDs Import * Import time from mxnet import Autograd as ag from Mxnet.contrib.ndarray import multiboxdetection Import Logging head = '% (asctime) -15s% (message) s ' Logging.basicconfig (level=logging. DEBUG, format=head) logger = Logging.getlogger () logger.setlevel (logging. DEBUG) fh = logging. Filehandler ("Log.txt") fh.setlevel (logging. DEBUG) Logger.addhandler (FH) def train (net,start_epoch,epochs,lr,ctx): Box_loss = Smoothl1loss () Cls_loss = Focall
OSS () Cls_metric = Mx.metric.Accuracy () Box_metric = Mx.metric.MAE () Data_shape = 256 Batch_size = 64 Train_data, Test_data, class_names, Num_class = Get_iterators (Data_shape, Batch_size) Train_data.reshape (label_shape=
(6, 5)) Train_data = Test_data.sync_label_shape (train_data) trainer = Gluon. Trainer (Net.collect_params (), ' sgd ', {' learning_rate ': LR, ' WD ': 0.0005}) Log_interval = for epoch in range (St
Art_epoch, Epochs+start_epoch): # Reset iterator and Tick train_data.reset () Cls_metric.reset () box_metric.reset () tic = Time.time () # Iterate through all batch For I, batch in enumerate (train_data): Btic = Time.time () # Record gradients with AG
. Record (): x = Batch.data[0].as_in_context (ctx) y = Batch.label[0].as_in_context (CTX) Default_anchors, class_predictions, box_predictions = Net (x) Box_target, Box_mask, Cls_target = Training_targets (Default_anchors, Class_predictions, y) # Losses Loss1 = Cls_loss (class_ Predictions, cls_target) Loss2 = Box_loss (box_predictions, Box_target, box_mask) # sum all
Losses loss = loss1*0.1 + loss2 # backpropagate Loss.backward () # Apply Trainer.step (batch_size) # Update Metrics cls_metric.update ([Cls_target], [ Nd.transpose (Class_prediCtions, (0, 2, 1))] Box_metric.update ([Box_target], [box_predictions * Box_mask]) if (i + 1)% lo
G_interval = = 0:name1, Val1 = Cls_metric.get () name2, val2 = Box_metric.get () Print (' [Epoch%d Batch%d] speed:%f samples/s, training:%s=%f,%s=%f '% (Epoch, I, batch_size/
(Time.time ()-btic), name1, Val1, name2, Val2)) # End of Epoch logging-name1, Val1 = Cls_metric.get ()
name2, Val2 = Box_metric.get () print (' [Epoch%d] training:%s=%f,%s=%f '% (Epoch, name1, Val1, name2, Val2)) Print (' [Epoch%d] time cost:%f '% (Epoch, time.time ()-tic)) valid (NET,TEST_DATA,EPOCH,CTX) # we can save T He trained parameters to disk net.save_params (' ssd_%d.params '% (epochs+start_epoch)) def valid (net,valid_data,epoch,c TX): Cls_metric = mx.metric.Accuracy () Box_metric = Mx.metric.MAE () # Reset iterator and tick valid_data. Reset () CLS_METRIC.RESET () box_metric.reset () tic = Time.time () # Iterate through all batch to I, batch in enumerate (Valid_da TA): With Ag.record (): x = Batch.data[0].as_in_context (ctx) y = Batch.label[0].as_in_conte XT (CTX) default_anchors, class_predictions, box_predictions = Net (x) Box_target, Box_mask, Cls_tar get = Training_targets (Default_anchors, Class_predictions, y) # Update metrics cls_metric.update ([Cls_targ
ET], [Nd.transpose (Class_predictions, (0, 2, 1)]) box_metric.update ([Box_target], [box_predictions * Box_mask]) name1, Val1 = Cls_metric.get () name2, val2 = Box_metric.get () print (' [Epoch%d] training:%s=%f,%s=%f '% (Epoch, Name1, Val1, name2, val2) print (' [Epoch%d] time cost:%f '% (Epoch, time.time ()-tic)) def test (net,ctx): image = Cv2.imread ('/home/muyouhang/dog/data/normalsize/n02085620_242.jpg ') x = preprocess (image) anchors, cls_preds, box_ Preds = Net (x.as_in_contExt (CTX)) cls_probs = nd. Softmaxactivation (Nd.transpose (Cls_preds, (0, 2, 1)), mode= ' channel ') output = Multiboxdetection (*[cls_probs, box_pred s, anchors], force_suppress=true, clip=false) print (Output[0].asnumpy ()) #display (image[:,:, (2, 1, 0)], output[0 ].asnumpy (), thresh=0.3) def get_iterators (Data_shape, batch_size): Class_names = [' dog '] num_class = Len (class_na MES) Train_iter = image. Imagedetiter (Batch_size=batch_size, data_shape= (3, Data_shape, Data_shape), path_imglist= '/home/m Uyouhang/dog/data/train.lst ', path_root= '/home/muyouhang/dog/data/normalsize/', mean=true) Val_iter = Image. Imagedetiter (Batch_size=batch_size, data_shape= (3, Data_shape, Data_shape), path_imglist= '/home/m Uyouhang/dog/data/valid.lst ', path_root= '/home/muyouhang/dog/data/normalsize/', mean=true) return Trai
N_iter, Val_iter, Class_names, Num_class
3. Training
train.py
Import sys
import numpy as NP
import logging from
model Import * from
SSD import *
start_iter=int (sys . argv[1])
epochs=int (sys.argv[2])
lr = float (sys.argv[3))
finetune = Int (sys.argv[4]) model_name=
' SSD '
mx.random.seed (1)
Ctx=mx.gpu (1)
if finetune ==1:
net = TOYSSD (1)
Net.load_params ( model_name+ ' _%d.params '% (start_iter), CTX)
train (NET, epochs=epochs,lr=lr,start_epoch=start_iter,ctx=ctx)
elif finetune ==-1:
net = TOYSSD (1)
net.load_params (model_name+ ' _%d.params '% (start_iter), CTX)
Test (NET,CTX=CTX)
else:
net = TOYSSD (1)
net.initialize (Mx.init.Xavier (magnitude=2), Ctx=ctx)
net.collect_params (). Reset_ctx (CTX)
train (NET, epochs=epochs,lr=lr,start_epoch=start_iter,ctx=ctx)
4. Test Effect Chart