"MXNet" Seventh play _ Classifier demo signal

Source: Internet
Author: User
Tags shuffle mxnet dataloader

Unzip the file command:

With ZipFile. ZipFile ('.. /data/kaggle_cifar10/' + fin, ' r ') as Zin:            zin.extractall (' ... /data/kaggle_cifar10/')

Copy File command:

Shutil.copy (original file, target file)
Organize your data

We have two folders '. /data/kaggle_cifar10/train ' and '. /data/kaggle_cifar10/test ', an index file that records the file name and category

Our goal is to create a copy under the new folder, containing three folders train_valid, train, valid, each folder under the different Categories folder, which contains the corresponding category of pictures,

Import Osimport shutildef reorg_cifar10_data (Data_dir, Label_file, Train_dir, Test_dir, Input_dir, Valid_ratio): "" " After processing, create a new three folder to hold the data, Train_valid, train, valid Data_dir: '. /data/kaggle_cifar10 ' label_file: ' trainlabels.csv ' Train_dir = ' train ' test_dir = ' test ' Input_dir = ' Train_va    Lid_test ' Valid_ratio = 0.1 "" # Read the Training Data tab. # Open the CSV index: '.        /data/kaggle_cifar10/trainlabels.csv ' with open (Os.path.join (Data_dir, Label_file), ' R ') as F: # Skips the header row (column name) of the file.  lines = F.readlines () [1:] tokens = [L.rstrip (). Split (', ') for L in lines] # {index: label} Idx_label = Dict ((int (IDX), label) for IDX, label in tokens)) # Tag Collection labels = set (Idx_label.values ()) # Number of training data: '. /data/kaggle_cifar10/train ' Num_train = Len (Os.listdir (Os.path.join (Data_dir, Train_dir)) # Train number (corresponds to valid) num _train_tuning = Int (Num_train * (1-valid_ratio)) # <---Anomaly detection assert 0 < num_train_tuning < Num_train # The train data entry for each label Num_train_tuning_per_label = num_train_tuning//len (labels) label_count = Dict () def mkdir_if_not_exist (path): If not    Os.path.exists (Os.path.join (*path)): Os.makedirs (Os.path.join (*path)) # Organize training and validation sets. # Cycle training data picture ' ...        /data/kaggle_cifar10/train ' for Train_file in Os.listdir (Os.path.join (Data_dir, Train_dir)): # Remove Extension as Index idx = Int (train_file.split ('. ') [0]) # Index to tag label = Idx_label[idx] # ' ... /data/kaggle_cifar10/train_valid_test/train_valid ' + tag name mkdir_if_not_exist ([Data_dir, Input_dir, ' Train_valid ', Label]) # Copy Picture shutil.copy (Os.path.join (Data_dir, Train_dir, Train_file), Os.path.join (da Ta_dir, Input_dir, ' train_valid ', label) # Make sure the train folder has enough training for each type of label, and give the valid folder if label not in label _count or Label_count[label] < num_train_tuning_per_label: # ' ... /data/kaggle_cifar10/train_valid_test/train ' + tag name mkdir_if_not_exist ([Data_dir, INPUt_dir, ' train ', label]) shutil.copy (Os.path.join (Data_dir, Train_dir, train_file), OS.P            Ath.join (Data_dir, Input_dir, ' train ', label)) Label_count[label] = label_count.get (label, 0) + 1 else: Mkdir_if_not_exist ([Data_dir, Input_dir, ' valid ', label]) shutil.copy (Os.path.join (Data_dir, Train_d IR, Train_file), Os.path.join (Data_dir, Input_dir, ' valid ', label)) # Organize test set # '. /data/kaggle_cifar10/train_valid_test/test/unknown ' inside store test picture mkdir_if_not_exist ([Data_dir, Input_dir, ' Test ', ' Unknown ']) for test_file in Os.listdir (Os.path.join (Data_dir, Test_dir)): Shutil.copy (Os.path.join (Data_dir, TES T_dir, Test_file), Os.path.join (Data_dir, Input_dir, ' Test ', ' unknown ')) Train_dir = ' train ' Test_dir = ' Test ' batch_size = 128data_dir = '. /data/kaggle_cifar10 ' label_file = ' trainlabels.csv ' input_dir = ' train_valid_test ' valid_ratio = 0.1reorg_cifar10_data (Data_dir, LABel_file, Train_dir, Test_dir, Input_dir, Valid_ratio) 
Pretreatment
# preprocessing from mxnet import autogradfrom mxnet import gluonfrom mxnet import initfrom mxnet import ndfrom mxnet.gluon.data impo RT visionfrom Mxnet.gluon.data.vision Import transformsimport numpy as Nptransform_train = Transforms. Compose ([# Transforms. Centercrop (+) # transforms. Randomfliptopbottom (), # Transforms. Randomcolorjitter (brightness=0.0, contrast=0.0, saturation=0.0, hue=0.0), # Transforms. Randomlighting (0.0), # transforms. Cast (' float32 '), # transforms. Resize (32), # Randomly cropped according to scale and ratio, and shrunk to a 32x32 square transforms. Randomresizedcrop (+, scale= (0.08, 1.0), ratio= (3.0/4.0, 4.0/3.0)), # Randomly flips the picture transforms. Randomflipleftright (), # reduces the image pixel value to (0,1) and changes the data format from high * width * Channel to Channel * height * width transforms. Totensor (), # Make standardized transforms for each channel of the picture. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # # When testing, there is no need for enhanced data processing beyond normalization of the image. Transform_test = Transforms. Compose ([Transforms. Totensor (), transforms. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # '../data/kaggle_cifar10, train_valid_test/' input_str = data_dir + '/' + Input_dir + '/' # read the original image file. Flag=1 indicates that the input image has three channels (color). Train_ds = Vision. Imagefolderdataset (Input_str + ' train ', flag=1) Valid_ds = vision. Imagefolderdataset (input_str + ' valid ', flag=1) Train_valid_ds = vision. Imagefolderdataset (input_str + ' train_valid ', flag=1) Test_ds = vision. Imagefolderdataset (input_str + ' test ', flag=1) loader = Gluon.data.DataLoadertrain_data = Loader (train_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Valid_data = Loader (Valid_ds.transform_first ( Transform_test), Batch_size, shuffle=true, last_batch= ' keep ') Train_valid_data = loader (train_valid_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Test_data = Loader (Test_ds.transform_first ( Transform_test), Batch_size, Shuffle=false, last_batch= ' keep ') # cross-entropy loss function. Softmax_cross_entropy = Gluon.loss.SoftmaxCrossEntropyLoss ()
Mxnet.gluon.vision.ImageFolderDataset
Mxnet.gluon.data.DataLoader

The preprocessing of the data is placed in Dataloader so that you can call Imagefolderdataset later to get the original picture set

The data is ready to complete.

Model definition
From Mxnet.gluon import nnfrom mxnet import ndclass residual (NN.        Hybridblock): Def __init__ (self, channels, same_shape=true, **kwargs): Super (residual, self). __init__ (**kwargs)            Self.same_shape = Same_shape with Self.name_scope (): strides = 1 if same_shape else 2 Self.conv1 = nn. conv2d (channels, kernel_size=3, Padding=1, strides=strides) self.bn1 = nn. Batchnorm () Self.conv2 = nn. conv2d (channels, kernel_size=3, padding=1) self.bn2 = nn. Batchnorm () if not same_shape:self.conv3 = nn.        conv2d (channels, kernel_size=1, Strides=strides) def hybrid_forward (self, F, x):            out = F.relu (SELF.BN1 (SELF.CONV1 (x))) out = SELF.BN2 (Self.conv2 (off)) if not self.same_shape: x = self.conv3 (x) return F.relu (out + X) class ResNet (NN. Hybridblock): Def __init__ (self, num_classes, Verbose=false, **kwargs): Super (ResNet, self). __init__ (**kwargs) self.verbose = verbose with Self.name_scope (): NET = Self.net = nn. Hybridsequential () # module 1 net.add (NN. Conv2d (channels=32, kernel_size=3, Strides=1, padding=1)) Net.add (NN. Batchnorm ()) Net.add (NN. Activation (activation= ' Relu ')) # Module 2 for _ in range (3): Net.add (Residual (channels=32)                # Module 3 net.add (Residual (channels=64, Same_shape=false)) for _ in range (2):            Net.add (Residual (channels=64)) # Module 4 net.add (Residual (channels=128, same_shape=false)) For _ in range (2): Net.add (Residual (channels=128)) # Module 5 net.add (NN. avgpool2d (pool_size=8)) Net.add (NN. Flatten ()) Net.add (NN. Dense (num_classes)) def hybrid_forward (self, F, x): off = X for me, B in Enumerate (self.net): o UT = B (outIf Self.verbose:print (' Block%d output:%s '% (i+1, out.shape)) return outdef get_net (CT X): Num_outputs = Ten net = ResNet (num_outputs) net.initialize (Ctx=ctx, Init=init. Xavier ()) return net
Training
Gb.accuracy (output, label)
Trainer.set_learning_rate (Trainer.learning_rate * Lr_decay)
Gb.evaluate_accuracy (Valid_data, net, CTX)
Import datetimeimport syssys.path.append (' ... ') Import Gluonbook as Gbdef train (NET, Train_data, Valid_data, Num_epochs, LR, wd, CTX, Lr_period, Lr_decay): Trainer = g Luon. Trainer (Net.collect_params (), ' sgd ', {' learning_rate ': LR, ' momentum ': 0.9, ' WD ': WD}) Prev_time = Datetime.dat Etime.now () for epoch in range (num_epochs): Train_loss = 0.0 TRAIN_ACC = 0.0 If epoch > 0 and E Poch% Lr_period = = 0:trainer.set_learning_rate (trainer.learning_rate * lr_decay) for data, label in TR                Ain_data:label = Label.astype (' float32 '). As_in_context (CTX) with Autograd.record (): Output = Net (Data.as_in_context (CTX)) loss = Softmax_cross_entropy (output, label) Loss.backward ( ) Trainer.step (batch_size) Train_loss + = Nd.mean (loss). Asscalar () Train_acc + = Gb.accurac Y (output, label) Cur_time = Datetime.datetime.now () h, remainder = Divmod ((cur_time-prev_time). seconds, 3600) m, s = divmod (remainder, g) Time_str = "Time%02d:%02d:%02d"% (H, M, s) If Valid_data is not NONE:VALID_ACC = gb.evaluate_accuracy (valid_data, net, ctx) Epoch_str = ("                            Epoch%d. Loss:%f, Train acc%f, Valid acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (Train_data), VALID_ACC)) Else:epoch_str = ("Epoch%d. Loss:%f, Trai N acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (tra   In_data)) Prev_time = Cur_time print (epoch_str + time_str + ', LR ' + str (trainer.learning_rate))

Actually trained,

CTX = Gb.try_gpu () Num_epochs = 1learning_rate = 0.1weight_decay = 5e-4lr_period = 80lr_decay = 0.1net = Get_net (CTX) net.hy Bridize () Train (NET, Train_data, Valid_data, Num_epochs, Learning_rate,      Weight_decay, CTX, Lr_period, Lr_decay)
Forecast
Import NumPy as Npimport pandas as pd# training net = get_net (CTX) net.hybridize () train (NET, Train_valid_data, None, Num_epochs, L Earning_rate,      Weight_decay, CTX, Lr_period, Lr_decay) # Predictive preds = []for data, label in Test_data:    output = net (data . As_in_context (CTX))    preds.extend (Output.argmax (Axis=1). Astype (int). Asnumpy ()) Sorted_ids = List (range (1, Len ( Test_ds) + 1)) sorted_ids.sort (key = Lambda x:str (x)) df = PD. DataFrame ({' id ': sorted_ids, ' label ': preds}) df[' label '] = df[' label '].apply (Lambda x:train_valid_ds.synsets[x]) Df.to_csv (' Submission.csv ', index=false)

  

  

"MXNet" Seventh play _ Classifier demo signal

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.