"MXNet" Seventh play _ Classifier demo signal

Last Update:2018-05-28 Source: Internet

Author: User

Tags shuffle mxnet dataloader

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Unzip the file command:

With ZipFile. ZipFile ('.. /data/kaggle_cifar10/' + fin, ' r ') as Zin:            zin.extractall (' ... /data/kaggle_cifar10/')

Copy File command:

Shutil.copy (original file, target file)

Organize your data

We have two folders '. /data/kaggle_cifar10/train ' and '. /data/kaggle_cifar10/test ', an index file that records the file name and category

Our goal is to create a copy under the new folder, containing three folders train_valid, train, valid, each folder under the different Categories folder, which contains the corresponding category of pictures,

Import Osimport shutildef reorg_cifar10_data (Data_dir, Label_file, Train_dir, Test_dir, Input_dir, Valid_ratio): "" " After processing, create a new three folder to hold the data, Train_valid, train, valid Data_dir: '. /data/kaggle_cifar10 ' label_file: ' trainlabels.csv ' Train_dir = ' train ' test_dir = ' test ' Input_dir = ' Train_va    Lid_test ' Valid_ratio = 0.1 "" # Read the Training Data tab. # Open the CSV index: '.        /data/kaggle_cifar10/trainlabels.csv ' with open (Os.path.join (Data_dir, Label_file), ' R ') as F: # Skips the header row (column name) of the file.  lines = F.readlines () [1:] tokens = [L.rstrip (). Split (', ') for L in lines] # {index: label} Idx_label = Dict ((int (IDX), label) for IDX, label in tokens)) # Tag Collection labels = set (Idx_label.values ()) # Number of training data: '. /data/kaggle_cifar10/train ' Num_train = Len (Os.listdir (Os.path.join (Data_dir, Train_dir)) # Train number (corresponds to valid) num _train_tuning = Int (Num_train * (1-valid_ratio)) # <---Anomaly detection assert 0 < num_train_tuning < Num_train # The train data entry for each label Num_train_tuning_per_label = num_train_tuning//len (labels) label_count = Dict () def mkdir_if_not_exist (path): If not    Os.path.exists (Os.path.join (*path)): Os.makedirs (Os.path.join (*path)) # Organize training and validation sets. # Cycle training data picture ' ...        /data/kaggle_cifar10/train ' for Train_file in Os.listdir (Os.path.join (Data_dir, Train_dir)): # Remove Extension as Index idx = Int (train_file.split ('. ') [0]) # Index to tag label = Idx_label[idx] # ' ... /data/kaggle_cifar10/train_valid_test/train_valid ' + tag name mkdir_if_not_exist ([Data_dir, Input_dir, ' Train_valid ', Label]) # Copy Picture shutil.copy (Os.path.join (Data_dir, Train_dir, Train_file), Os.path.join (da Ta_dir, Input_dir, ' train_valid ', label) # Make sure the train folder has enough training for each type of label, and give the valid folder if label not in label _count or Label_count[label] < num_train_tuning_per_label: # ' ... /data/kaggle_cifar10/train_valid_test/train ' + tag name mkdir_if_not_exist ([Data_dir, INPUt_dir, ' train ', label]) shutil.copy (Os.path.join (Data_dir, Train_dir, train_file), OS.P            Ath.join (Data_dir, Input_dir, ' train ', label)) Label_count[label] = label_count.get (label, 0) + 1 else: Mkdir_if_not_exist ([Data_dir, Input_dir, ' valid ', label]) shutil.copy (Os.path.join (Data_dir, Train_d IR, Train_file), Os.path.join (Data_dir, Input_dir, ' valid ', label)) # Organize test set # '. /data/kaggle_cifar10/train_valid_test/test/unknown ' inside store test picture mkdir_if_not_exist ([Data_dir, Input_dir, ' Test ', ' Unknown ']) for test_file in Os.listdir (Os.path.join (Data_dir, Test_dir)): Shutil.copy (Os.path.join (Data_dir, TES T_dir, Test_file), Os.path.join (Data_dir, Input_dir, ' Test ', ' unknown ')) Train_dir = ' train ' Test_dir = ' Test ' batch_size = 128data_dir = '. /data/kaggle_cifar10 ' label_file = ' trainlabels.csv ' input_dir = ' train_valid_test ' valid_ratio = 0.1reorg_cifar10_data (Data_dir, LABel_file, Train_dir, Test_dir, Input_dir, Valid_ratio)

Pretreatment

# preprocessing from mxnet import autogradfrom mxnet import gluonfrom mxnet import initfrom mxnet import ndfrom mxnet.gluon.data impo RT visionfrom Mxnet.gluon.data.vision Import transformsimport numpy as Nptransform_train = Transforms. Compose ([# Transforms. Centercrop (+) # transforms. Randomfliptopbottom (), # Transforms. Randomcolorjitter (brightness=0.0, contrast=0.0, saturation=0.0, hue=0.0), # Transforms. Randomlighting (0.0), # transforms. Cast (' float32 '), # transforms. Resize (32), # Randomly cropped according to scale and ratio, and shrunk to a 32x32 square transforms. Randomresizedcrop (+, scale= (0.08, 1.0), ratio= (3.0/4.0, 4.0/3.0)), # Randomly flips the picture transforms. Randomflipleftright (), # reduces the image pixel value to (0,1) and changes the data format from high * width * Channel to Channel * height * width transforms. Totensor (), # Make standardized transforms for each channel of the picture. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # # When testing, there is no need for enhanced data processing beyond normalization of the image. Transform_test = Transforms. Compose ([Transforms. Totensor (), transforms. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # '../data/kaggle_cifar10, train_valid_test/' input_str = data_dir + '/' + Input_dir + '/' # read the original image file. Flag=1 indicates that the input image has three channels (color). Train_ds = Vision. Imagefolderdataset (Input_str + ' train ', flag=1) Valid_ds = vision. Imagefolderdataset (input_str + ' valid ', flag=1) Train_valid_ds = vision. Imagefolderdataset (input_str + ' train_valid ', flag=1) Test_ds = vision. Imagefolderdataset (input_str + ' test ', flag=1) loader = Gluon.data.DataLoadertrain_data = Loader (train_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Valid_data = Loader (Valid_ds.transform_first ( Transform_test), Batch_size, shuffle=true, last_batch= ' keep ') Train_valid_data = loader (train_valid_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Test_data = Loader (Test_ds.transform_first ( Transform_test), Batch_size, Shuffle=false, last_batch= ' keep ') # cross-entropy loss function. Softmax_cross_entropy = Gluon.loss.SoftmaxCrossEntropyLoss ()

Mxnet.gluon.vision.ImageFolderDataset
Mxnet.gluon.data.DataLoader

The preprocessing of the data is placed in Dataloader so that you can call Imagefolderdataset later to get the original picture set

The data is ready to complete.

Model definition

From Mxnet.gluon import nnfrom mxnet import ndclass residual (NN.        Hybridblock): Def __init__ (self, channels, same_shape=true, **kwargs): Super (residual, self). __init__ (**kwargs)            Self.same_shape = Same_shape with Self.name_scope (): strides = 1 if same_shape else 2 Self.conv1 = nn. conv2d (channels, kernel_size=3, Padding=1, strides=strides) self.bn1 = nn. Batchnorm () Self.conv2 = nn. conv2d (channels, kernel_size=3, padding=1) self.bn2 = nn. Batchnorm () if not same_shape:self.conv3 = nn.        conv2d (channels, kernel_size=1, Strides=strides) def hybrid_forward (self, F, x):            out = F.relu (SELF.BN1 (SELF.CONV1 (x))) out = SELF.BN2 (Self.conv2 (off)) if not self.same_shape: x = self.conv3 (x) return F.relu (out + X) class ResNet (NN. Hybridblock): Def __init__ (self, num_classes, Verbose=false, **kwargs): Super (ResNet, self). __init__ (**kwargs) self.verbose = verbose with Self.name_scope (): NET = Self.net = nn. Hybridsequential () # module 1 net.add (NN. Conv2d (channels=32, kernel_size=3, Strides=1, padding=1)) Net.add (NN. Batchnorm ()) Net.add (NN. Activation (activation= ' Relu ')) # Module 2 for _ in range (3): Net.add (Residual (channels=32)                # Module 3 net.add (Residual (channels=64, Same_shape=false)) for _ in range (2):            Net.add (Residual (channels=64)) # Module 4 net.add (Residual (channels=128, same_shape=false)) For _ in range (2): Net.add (Residual (channels=128)) # Module 5 net.add (NN. avgpool2d (pool_size=8)) Net.add (NN. Flatten ()) Net.add (NN. Dense (num_classes)) def hybrid_forward (self, F, x): off = X for me, B in Enumerate (self.net): o UT = B (outIf Self.verbose:print (' Block%d output:%s '% (i+1, out.shape)) return outdef get_net (CT X): Num_outputs = Ten net = ResNet (num_outputs) net.initialize (Ctx=ctx, Init=init. Xavier ()) return net

Training

Gb.accuracy (output, label)

Trainer.set_learning_rate (Trainer.learning_rate * Lr_decay)

Gb.evaluate_accuracy (Valid_data, net, CTX)

Import datetimeimport syssys.path.append (' ... ') Import Gluonbook as Gbdef train (NET, Train_data, Valid_data, Num_epochs, LR, wd, CTX, Lr_period, Lr_decay): Trainer = g Luon. Trainer (Net.collect_params (), ' sgd ', {' learning_rate ': LR, ' momentum ': 0.9, ' WD ': WD}) Prev_time = Datetime.dat Etime.now () for epoch in range (num_epochs): Train_loss = 0.0 TRAIN_ACC = 0.0 If epoch > 0 and E Poch% Lr_period = = 0:trainer.set_learning_rate (trainer.learning_rate * lr_decay) for data, label in TR                Ain_data:label = Label.astype (' float32 '). As_in_context (CTX) with Autograd.record (): Output = Net (Data.as_in_context (CTX)) loss = Softmax_cross_entropy (output, label) Loss.backward ( ) Trainer.step (batch_size) Train_loss + = Nd.mean (loss). Asscalar () Train_acc + = Gb.accurac Y (output, label) Cur_time = Datetime.datetime.now () h, remainder = Divmod ((cur_time-prev_time). seconds, 3600) m, s = divmod (remainder, g) Time_str = "Time%02d:%02d:%02d"% (H, M, s) If Valid_data is not NONE:VALID_ACC = gb.evaluate_accuracy (valid_data, net, ctx) Epoch_str = ("                            Epoch%d. Loss:%f, Train acc%f, Valid acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (Train_data), VALID_ACC)) Else:epoch_str = ("Epoch%d. Loss:%f, Trai N acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (tra　　 In_data)) Prev_time = Cur_time print (epoch_str + time_str + ', LR ' + str (trainer.learning_rate))

Actually trained,

CTX = Gb.try_gpu () Num_epochs = 1learning_rate = 0.1weight_decay = 5e-4lr_period = 80lr_decay = 0.1net = Get_net (CTX) net.hy Bridize () Train (NET, Train_data, Valid_data, Num_epochs, Learning_rate,      Weight_decay, CTX, Lr_period, Lr_decay)

Forecast

Import NumPy as Npimport pandas as pd# training net = get_net (CTX) net.hybridize () train (NET, Train_valid_data, None, Num_epochs, L Earning_rate,      Weight_decay, CTX, Lr_period, Lr_decay) # Predictive preds = []for data, label in Test_data:    output = net (data . As_in_context (CTX))    preds.extend (Output.argmax (Axis=1). Astype (int). Asnumpy ()) Sorted_ids = List (range (1, Len ( Test_ds) + 1)) sorted_ids.sort (key = Lambda x:str (x)) df = PD. DataFrame ({' id ': sorted_ids, ' label ': preds}) df[' label '] = df[' label '].apply (Lambda x:train_valid_ds.synsets[x]) Df.to_csv (' Submission.csv ', index=false)

"MXNet" Seventh play _ Classifier demo signal

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More