Unzip the file command:
With ZipFile. ZipFile ('.. /data/kaggle_cifar10/' + fin, ' r ') as Zin: zin.extractall (' ... /data/kaggle_cifar10/')
Copy File command:
Shutil.copy (original file, target file)
Organize your data
We have two folders '. /data/kaggle_cifar10/train ' and '. /data/kaggle_cifar10/test ', an index file that records the file name and category
Our goal is to create a copy under the new folder, containing three folders train_valid, train, valid, each folder under the different Categories folder, which contains the corresponding category of pictures,
Import Osimport shutildef reorg_cifar10_data (Data_dir, Label_file, Train_dir, Test_dir, Input_dir, Valid_ratio): "" " After processing, create a new three folder to hold the data, Train_valid, train, valid Data_dir: '. /data/kaggle_cifar10 ' label_file: ' trainlabels.csv ' Train_dir = ' train ' test_dir = ' test ' Input_dir = ' Train_va Lid_test ' Valid_ratio = 0.1 "" # Read the Training Data tab. # Open the CSV index: '. /data/kaggle_cifar10/trainlabels.csv ' with open (Os.path.join (Data_dir, Label_file), ' R ') as F: # Skips the header row (column name) of the file. lines = F.readlines () [1:] tokens = [L.rstrip (). Split (', ') for L in lines] # {index: label} Idx_label = Dict ((int (IDX), label) for IDX, label in tokens)) # Tag Collection labels = set (Idx_label.values ()) # Number of training data: '. /data/kaggle_cifar10/train ' Num_train = Len (Os.listdir (Os.path.join (Data_dir, Train_dir)) # Train number (corresponds to valid) num _train_tuning = Int (Num_train * (1-valid_ratio)) # <---Anomaly detection assert 0 < num_train_tuning < Num_train # The train data entry for each label Num_train_tuning_per_label = num_train_tuning//len (labels) label_count = Dict () def mkdir_if_not_exist (path): If not Os.path.exists (Os.path.join (*path)): Os.makedirs (Os.path.join (*path)) # Organize training and validation sets. # Cycle training data picture ' ... /data/kaggle_cifar10/train ' for Train_file in Os.listdir (Os.path.join (Data_dir, Train_dir)): # Remove Extension as Index idx = Int (train_file.split ('. ') [0]) # Index to tag label = Idx_label[idx] # ' ... /data/kaggle_cifar10/train_valid_test/train_valid ' + tag name mkdir_if_not_exist ([Data_dir, Input_dir, ' Train_valid ', Label]) # Copy Picture shutil.copy (Os.path.join (Data_dir, Train_dir, Train_file), Os.path.join (da Ta_dir, Input_dir, ' train_valid ', label) # Make sure the train folder has enough training for each type of label, and give the valid folder if label not in label _count or Label_count[label] < num_train_tuning_per_label: # ' ... /data/kaggle_cifar10/train_valid_test/train ' + tag name mkdir_if_not_exist ([Data_dir, INPUt_dir, ' train ', label]) shutil.copy (Os.path.join (Data_dir, Train_dir, train_file), OS.P Ath.join (Data_dir, Input_dir, ' train ', label)) Label_count[label] = label_count.get (label, 0) + 1 else: Mkdir_if_not_exist ([Data_dir, Input_dir, ' valid ', label]) shutil.copy (Os.path.join (Data_dir, Train_d IR, Train_file), Os.path.join (Data_dir, Input_dir, ' valid ', label)) # Organize test set # '. /data/kaggle_cifar10/train_valid_test/test/unknown ' inside store test picture mkdir_if_not_exist ([Data_dir, Input_dir, ' Test ', ' Unknown ']) for test_file in Os.listdir (Os.path.join (Data_dir, Test_dir)): Shutil.copy (Os.path.join (Data_dir, TES T_dir, Test_file), Os.path.join (Data_dir, Input_dir, ' Test ', ' unknown ')) Train_dir = ' train ' Test_dir = ' Test ' batch_size = 128data_dir = '. /data/kaggle_cifar10 ' label_file = ' trainlabels.csv ' input_dir = ' train_valid_test ' valid_ratio = 0.1reorg_cifar10_data (Data_dir, LABel_file, Train_dir, Test_dir, Input_dir, Valid_ratio)
Pretreatment
# preprocessing from mxnet import autogradfrom mxnet import gluonfrom mxnet import initfrom mxnet import ndfrom mxnet.gluon.data impo RT visionfrom Mxnet.gluon.data.vision Import transformsimport numpy as Nptransform_train = Transforms. Compose ([# Transforms. Centercrop (+) # transforms. Randomfliptopbottom (), # Transforms. Randomcolorjitter (brightness=0.0, contrast=0.0, saturation=0.0, hue=0.0), # Transforms. Randomlighting (0.0), # transforms. Cast (' float32 '), # transforms. Resize (32), # Randomly cropped according to scale and ratio, and shrunk to a 32x32 square transforms. Randomresizedcrop (+, scale= (0.08, 1.0), ratio= (3.0/4.0, 4.0/3.0)), # Randomly flips the picture transforms. Randomflipleftright (), # reduces the image pixel value to (0,1) and changes the data format from high * width * Channel to Channel * height * width transforms. Totensor (), # Make standardized transforms for each channel of the picture. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # # When testing, there is no need for enhanced data processing beyond normalization of the image. Transform_test = Transforms. Compose ([Transforms. Totensor (), transforms. Normalize ([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])] # '../data/kaggle_cifar10, train_valid_test/' input_str = data_dir + '/' + Input_dir + '/' # read the original image file. Flag=1 indicates that the input image has three channels (color). Train_ds = Vision. Imagefolderdataset (Input_str + ' train ', flag=1) Valid_ds = vision. Imagefolderdataset (input_str + ' valid ', flag=1) Train_valid_ds = vision. Imagefolderdataset (input_str + ' train_valid ', flag=1) Test_ds = vision. Imagefolderdataset (input_str + ' test ', flag=1) loader = Gluon.data.DataLoadertrain_data = Loader (train_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Valid_data = Loader (Valid_ds.transform_first ( Transform_test), Batch_size, shuffle=true, last_batch= ' keep ') Train_valid_data = loader (train_valid_ds.transform_ First (Transform_train), Batch_size, shuffle=true, last_batch= ' keep ') Test_data = Loader (Test_ds.transform_first ( Transform_test), Batch_size, Shuffle=false, last_batch= ' keep ') # cross-entropy loss function. Softmax_cross_entropy = Gluon.loss.SoftmaxCrossEntropyLoss ()
Mxnet.gluon.vision.ImageFolderDataset
Mxnet.gluon.data.DataLoader
The preprocessing of the data is placed in Dataloader so that you can call Imagefolderdataset later to get the original picture set
The data is ready to complete.
Model definition
From Mxnet.gluon import nnfrom mxnet import ndclass residual (NN. Hybridblock): Def __init__ (self, channels, same_shape=true, **kwargs): Super (residual, self). __init__ (**kwargs) Self.same_shape = Same_shape with Self.name_scope (): strides = 1 if same_shape else 2 Self.conv1 = nn. conv2d (channels, kernel_size=3, Padding=1, strides=strides) self.bn1 = nn. Batchnorm () Self.conv2 = nn. conv2d (channels, kernel_size=3, padding=1) self.bn2 = nn. Batchnorm () if not same_shape:self.conv3 = nn. conv2d (channels, kernel_size=1, Strides=strides) def hybrid_forward (self, F, x): out = F.relu (SELF.BN1 (SELF.CONV1 (x))) out = SELF.BN2 (Self.conv2 (off)) if not self.same_shape: x = self.conv3 (x) return F.relu (out + X) class ResNet (NN. Hybridblock): Def __init__ (self, num_classes, Verbose=false, **kwargs): Super (ResNet, self). __init__ (**kwargs) self.verbose = verbose with Self.name_scope (): NET = Self.net = nn. Hybridsequential () # module 1 net.add (NN. Conv2d (channels=32, kernel_size=3, Strides=1, padding=1)) Net.add (NN. Batchnorm ()) Net.add (NN. Activation (activation= ' Relu ')) # Module 2 for _ in range (3): Net.add (Residual (channels=32) # Module 3 net.add (Residual (channels=64, Same_shape=false)) for _ in range (2): Net.add (Residual (channels=64)) # Module 4 net.add (Residual (channels=128, same_shape=false)) For _ in range (2): Net.add (Residual (channels=128)) # Module 5 net.add (NN. avgpool2d (pool_size=8)) Net.add (NN. Flatten ()) Net.add (NN. Dense (num_classes)) def hybrid_forward (self, F, x): off = X for me, B in Enumerate (self.net): o UT = B (outIf Self.verbose:print (' Block%d output:%s '% (i+1, out.shape)) return outdef get_net (CT X): Num_outputs = Ten net = ResNet (num_outputs) net.initialize (Ctx=ctx, Init=init. Xavier ()) return net
Training
Gb.accuracy (output, label)
Trainer.set_learning_rate (Trainer.learning_rate * Lr_decay)
Gb.evaluate_accuracy (Valid_data, net, CTX)
Import datetimeimport syssys.path.append (' ... ') Import Gluonbook as Gbdef train (NET, Train_data, Valid_data, Num_epochs, LR, wd, CTX, Lr_period, Lr_decay): Trainer = g Luon. Trainer (Net.collect_params (), ' sgd ', {' learning_rate ': LR, ' momentum ': 0.9, ' WD ': WD}) Prev_time = Datetime.dat Etime.now () for epoch in range (num_epochs): Train_loss = 0.0 TRAIN_ACC = 0.0 If epoch > 0 and E Poch% Lr_period = = 0:trainer.set_learning_rate (trainer.learning_rate * lr_decay) for data, label in TR Ain_data:label = Label.astype (' float32 '). As_in_context (CTX) with Autograd.record (): Output = Net (Data.as_in_context (CTX)) loss = Softmax_cross_entropy (output, label) Loss.backward ( ) Trainer.step (batch_size) Train_loss + = Nd.mean (loss). Asscalar () Train_acc + = Gb.accurac Y (output, label) Cur_time = Datetime.datetime.now () h, remainder = Divmod ((cur_time-prev_time). seconds, 3600) m, s = divmod (remainder, g) Time_str = "Time%02d:%02d:%02d"% (H, M, s) If Valid_data is not NONE:VALID_ACC = gb.evaluate_accuracy (valid_data, net, ctx) Epoch_str = (" Epoch%d. Loss:%f, Train acc%f, Valid acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (Train_data), VALID_ACC)) Else:epoch_str = ("Epoch%d. Loss:%f, Trai N acc%f, "% (Epoch, Train_loss/len (Train_data), Train_acc/len (tra In_data)) Prev_time = Cur_time print (epoch_str + time_str + ', LR ' + str (trainer.learning_rate))
Actually trained,
CTX = Gb.try_gpu () Num_epochs = 1learning_rate = 0.1weight_decay = 5e-4lr_period = 80lr_decay = 0.1net = Get_net (CTX) net.hy Bridize () Train (NET, Train_data, Valid_data, Num_epochs, Learning_rate, Weight_decay, CTX, Lr_period, Lr_decay)
Forecast
Import NumPy as Npimport pandas as pd# training net = get_net (CTX) net.hybridize () train (NET, Train_valid_data, None, Num_epochs, L Earning_rate, Weight_decay, CTX, Lr_period, Lr_decay) # Predictive preds = []for data, label in Test_data: output = net (data . As_in_context (CTX)) preds.extend (Output.argmax (Axis=1). Astype (int). Asnumpy ()) Sorted_ids = List (range (1, Len ( Test_ds) + 1)) sorted_ids.sort (key = Lambda x:str (x)) df = PD. DataFrame ({' id ': sorted_ids, ' label ': preds}) df[' label '] = df[' label '].apply (Lambda x:train_valid_ds.synsets[x]) Df.to_csv (' Submission.csv ', index=false)
"MXNet" Seventh play _ Classifier demo signal