LeCun mnist Data Set download
Import NumPyAs NPImport structDefLoadimageset(which=0):Print"Load image Set" binfile=NoneIf which==0:binfile = open (".. Dataset//train-images-idx3-ubyte ",' RB ')Else:binfile= Open (".. Dataset//t10k-images-idx3-ubyte ",' RB ') buffers = Binfile.read () head = Struct.unpack_from (' >IIII ', buffers,0)Print"Head,", head offset=struct.calcsize (' >IIII ') imgnum=head[1] width=head[2] height=head[3]#[60000]*28*28 Bits=imgnum*width*height bitsstring=' > ' +str (BITS) +B#like ' >47040000b ' Imgs=struct.unpack_from (bitsstring,buffers,offset) binfile.close () Imgs=np.reshape (imgs,[ Imgnum,width,height])Print"Load IMGs finished"Return IMGsDefLoadlabelset(which=0):Print"Load label set" binfile=NoneIf which==0:binfile = open (".. Dataset//train-labels-idx1-ubyte ",' RB ')Else:binfile= Open (".. Dataset//t10k-labels-idx1-ubyte ",' RB ') buffers = Binfile.read () head = Struct.unpack_from (' >ii ', buffers,0) print "Head,", head imgnum=head[1] offset = struct.calcsize (' >ii ') numstring=' > ' +str (imgnum) +"B" labels= struct.unpack_from (numstring, buffers, offset) binfile.close () Labels=np.reshape (Labels,[imgnum,1]) #print labels print ' load label finished ' return labels if __name__=="__main__": Imgs=loadimageset () #import plotutil as Pu #pu. Showimgmatrix (Imgs[0]) Loadlabelset ()
and easy-to-train reader
Import NumPyAs NPImport structImport gzipImport CpickleClassMnistreader():Def__init__(self,mnist_path,data_dim=1,one_hot=true):"' Mnist_path:the Path of mnist.pkl.gz data_dim=1 [n,784] data_dim=3 [n,28,28,1] one_hot:one hot encoding (like: [0,1,0, 0,0,0,0,0,0,0]) If true "' Self.mnist_path=mnist_path Self.data_dim=data_dim self.one_hot=one_hot self.load_minist ( Mnist_path) Self.train_datalabel=zip (self.train_x,self.train_y) self.valid_datalabel=zip (self.valid_x,self.valid _y) self.batch_offset_train=0DefNext_batch_train(self,batch_size):"Return list of images with shape [n,784] or [n,28,28,1] dependents on Self.data_dim and list of labels with shape [N] or [n,10] dependents on Self.one_hot "If Self.batch_offset_train<len (Self.train_datalabel)//batch_size:imgs=list (); Labels=list ()For d,lIn Self.train_datalabel[self.batch_offset_train:self.batch_offset_train+batch_size]:If self.data_dim==3:d=np.reshape (d, [28,28,1]) Imgs.append (d)If Self.one_hot:a=np.zeros () a[l]=1 Labels.append (L)Else:labels.append (L) self.batch_offset_train+=1Return Imgs,labelselse:self.batch_offset_train=0 Np.random.shuffle (Self.train_datalabel)Return Self.next_batch_train (batch_size)DefNext_batch_val(self,batch_size):"' return list of images with shape [n,784] or [n,28,28,1] dependents on Self.data_dim and list of labels with shape [n,1 ] or [n,10] dependents on Self.one_hot "Np.random.shuffle (Self.valid_datalabel) imgs=list (); Labels=list ()For d,lIn self.train_datalabel[0:batch_size]:If self.data_dim==3:d=np.reshape (d, [28,28,1]) Imgs.append (d)If Self.one_hot:a=np.zeros () a[l]=1 Labels.append (L)Else:labels.append (L)Return Imgs,labelsDefLoad_minist(Self,dataset):Print"Load DataSet" F = Gzip.open (DataSet,' RB ') Train_set, valid_set, test_set = Cpickle.load (f) f.close () Self.train_x,self.train_y=train_set self.valid_x, Self.valid_y=valid_set self.test_x, Self.test_y=test_setprint "Train image,label Shape:", self.train_x.shape,self.train_y.shape print "valid Image,label shape : ", Self.valid_x.shape,self.valid_y.shape print " Test Image,label shape: ", self.test_x.shape,self.test_ Y.shape Print "load DataSet End"if __name__=="__main__": Mnist=mnistreader ('.. /dataset/mnist.pkl.gz ', data_dim=3) data,label=mnist.next_batch_train (batch_size=1) print data Print label
The third mode of loading requires gzip and struct
Import gzip, structDef_read(Image,label): Minist_dir =' your_dir/'With Gzip.open (Minist_dir+label)As flbl:magic, num = Struct.unpack (">ii", Flbl.read (8) label = Np.fromstring (Flbl.read (), dtype=np.int8)with gzip.open (minist_dir+image, ' RB ') as fimg:magic, num, rows, cols = Struct.unpack ( ">IIII", Fimg.read (16)) image = Np.fromstring (Fimg.read (), dtype=np.uint8). Reshape (Len label), Rows, cols) return image,label def get_data (): Train_img,train_label = _read ( ' train-images-idx3-ubyte.gz ', ' train-labels-idx1-ubyte.gz ') test_img,test _label = _read ( ' t10k-images-idx3-ubyte.gz ', T10k-labels-idx1-ubyte.gz ') return [Train_img,train_label,test_img,test_label]
Python reads mnist image data