In the TensorFlow picture data reading, often encounter a variety of data types on the subtle problem, today is encountered in the conversion of the picture to Tfrecord process, the problem of reading pictures. Finally found ... The error occurred in the processing of the NumPy string. In order to be compatible with C, Np.array will cut off the ' \x00 ' at the end of the string to convert the picture data (stored in decimal string format) to 16 in Tobytes (), with a string ' \x92\x99\ ... ' I need to convert the list of pictures to array for subsequent chaos. In the process of turning list into Ndarray, there are some problems.
Picture data read found that the picture data saved is not in line with this picture data dimension (227x227x3=154587), always wrong in some image pixels less than 154587. Run after using the assert(IMG) ==154587 statement, the console has the following error:
Traceback (most recent call last):
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line 280, in <module>
Img2tfrecords (['/home/mokii/rgb-d/sunrgbd/statistic/trainrgblist.csv '])
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line, in Img2tfrecords
Ndarray2tfrecords (Train, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ')
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line, in Ndarray2tfrecords
ASSERT (Len (img) = 154587)
Assertionerror
The code is as follows, in fact this is adapted by the classical method, the other places are correct, the main function includes the production of Tfrecords and debug on the length of the decode data of the test code, and finally found the error is out of the annotation code:
#-*-coding:utf-8-*-import OS import CSV import numpy as NP from PIL import Image import tensorflow as TF import sys Reload (SYS) sys.setdefaultencoding (' UTF8 ') # todo wait to write Num_examples_per_epoch_for_train = $ test_num = 0 File _num = 1 image_size = Mb def createcsv (folderlist): for imgfolder in folderlist:csvfile = file (Imgfolder +
'. csv ', ' w+ ') imglist = Os.listdir (imgfolder) writer = Csv.writer (csvfile) for filename in imglist: Writer.writerow ([(Imgfolder + '/' + filename), Filename[0:3] + "\ n"]) Csvfile.close () def Ndarray2tfrecor DS (Ndarray, tfpath): writer = Tf.python_io.
Tfrecordwriter (Tfpath) for img in ndarray:example = Tf.train.Example (Features=tf.train.features (feature={ "Label": Tf.train.Feature (Int64_list=tf.train.int64list (Value=[int (img[1))), ' Img_raw ': tf.train.Fe Ature (Bytes_list=tf.train.byteslist (Value=[img[0])) # Here by changing to [Img[0:1].tobytes ()} or [img[0:1]. ToString ()] is Writer.write) (example. Serializetostring ()) # BEGIN Todo:print (' here in ndarray2tfrecords ') IMGs = Example.features.fea ture[' Img_raw '].bytes_list.value labels = example.features.feature[' label '].int64_list.value img = imgs[0 ] Label = Labels[0] Assert (len (imgs) = = 1) assert (len (labels) = = 1) print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = = 154587) # end TODO Writer.close () def img2tfrecords (file_list): arr
ay = [] for i in File_list:with open (I, ' RB ') as F:reader = Csv.reader (f) # cnt = 0
For line in Reader:img_path = line[0] img = Image.open (img_path) img = Img.resize ((image_size, image_size)) img = Np.asarray (img) img = Np.require (img
, Dtype=np.uint8, requirements= ' C ') if Img.ndim!= 3: print ' Error ' print img_path Img_raw = img.tobytes () Img_class = line
[1] categories = [' bathroom ', ' bedroom ', ' classroom ', ' computer_room ', ' conference_room ', ' corridor ', ' Dining_area ', ' dining_room ', ' discussion_area ', ' furniture_store ', ' home_o ffice ', ' kitchen ', ' lab ', ' lecture_theatre ', ' library ', ' living_room ', ' Office ', ' Rest_space ', ' study_space '] for I in xrange (): if categories[i] = = IMG_CL
Ass:label = Int (i) array.append ([Img_raw, label]) array = Np.array (array)
Perm = Np.arange (7984) np.random.shuffle (perm) train = array[perm[:7200]] validation = array[perm[7200:]] Ndarray2tfrecords (Train, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ') ndarray2tfrecords ( Validation, '/home/mokii/rgb-d/suNrgbd/data/validation/rgb/validationrgb.tfrecords ') def inputs (File_list, batch_size): Filename_queue = Tf.train.str Ing_input_producer (file_list) reader = tf. Tfrecordreader () _, Serialized_example = Reader.read (filename_queue) features = Tf.parse_single_example (serialized_e Xample, features={' label ': TF. Fixedlenfeature ([], Tf.int64), ' Img_raw ': TF.
Fixedlenfeature ([], tf.string),}) img = Tf.decode_raw (features[' Img_raw '), tf.uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = tf.cast (features[' label '], TF. Int64) # label = tf.reshape (label, [1]) Min_fraction_of_examples_in_queue = 0.5 Min_queue_examples = Int (num_examples_per _epoch_for_train * min_fraction_of_examples_in_queue) # print (' label: ', label) return _generate_image_and_label_batch (IMG, label, min_queue_examples, Batch_size def _generate_image_and_label_batch (image, label, Min_queue_examples, batch_size): Num_preprocess_threads = 5 Images, Label_batch = Tf.train.batch ([image, label], batch_size=1, Num_threads=1, Capacity=1) return I Mages, label_batch if __name__ = = "__main__": # createcsv (folder_list) img2tfrecords (['/HOME/MOKII/RGB-D/SUNRGBD
/statistic/trainrgblist.csv ']) print ' done! ' DataSet = "/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords" cnt = 0 for serialized_example in Tf.pytho N_io.tf_record_iterator (DataSet): CNT + + 1 Print CNT example = Tf.train.Example () example. Parsefromstring (serialized_example) IMGs = example.features.feature[' Img_raw '].bytes_list.value labels = example.f eatures.feature[' label '].int64_list.value img = imgs[0] label = Labels[0] Assert (len (IMGs) = 1) Assert (len (labels) = = 1) # print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = = 154587) img =Tf.decode_raw (IMG, tf.uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (img, tf.float32) Label = tf.cast (label, Tf.int64)
After the search, finally concluded that the problem is this happened, it is a bit silent:
>>> a= ' \x01\x00 '
>>> a = Np.array ([a])
>>> a
array ([' \x01 '],
dtype= ' | S2 ')
>>> a.tobytes ()
' \x01\x00 '
>>> a[0].tobytes ()
' \x01 '
>>> a[ 0:1].tobytes ()
' \x01\x00 '
>>>
You can correct the bug like an Mi note, but my brother said it is still not very stable, in fact, if we can bypass the NumPy string automatically truncated at the end of the "" "Bug" that is the best.
#-*-coding:utf-8-*-import OS import CSV import numpy as NP from PIL import Image import tensorflow as TF import sys r Eload (SYS) sys.setdefaultencoding (' UTF8 ') #todo wait to write Num_examples_per_epoch_for_train = Test_num = 0 File_n Um = 1 image_size = Mb def createcsv (folderlist): for imgfolder in folderlist:csvfile = File (imgfolder+ '. C
SV ', ' w+ ') imglist = Os.listdir (imgfolder) writer = Csv.writer (csvfile) for filename in imglist: Writer.writerow ([(imgfolder+ '/' +filename), Filename[0:3] + "\ n"]) Csvfile.close () def Ndarray2tfre Cords (images, labels, tfpath): writer = Tf.python_io. Tfrecordwriter (Tfpath) for IMG, label in zip (images, labels): example = Tf.train.Example (Features=tf.train.fea Tures (feature={"label": Tf.train.Feature (Int64_list=tf.train.int64list (value=[int (label))), ' img _raw ': Tf.train.Feature (Bytes_list=tf.train.byteslist (Value=[img.tobytes ()))))
Writer.write (example. Serializetostring ()) writer.close () def img2tfrecords (file_list): Image_list = [] Label_list = [] for I In File_list:with open (I, ' RB ') as F:reader = Csv.reader (f) CNT = 0 for Line in Reader:img_path = line[0] img = Image.open (img_path) img = img
. Resize ((image_size, image_size)) Img_raw = Np.asarray (img) if Img_raw.ndim!= 3:
print ' Error ' Print img_path # im = Img.split ()
# If Len (IM)!= 3: # print ' Error ' # print Img_path # Img_raw = img.tobytes (' raw ') # Img_class = line[1] Categories =[' bathroom ',
' Bedroom ', ' classroom ', ' computer_room ', ' conference_room ', ' Corridor ', ' Dining_area ', ' dining_room ' , ' Discussion_area ', ' furniture_store ', ' home_office ', ' kitchen ', ' lab ', ' lecture_theatre ', ' library ', ' living_room '
, ' Office ', ' Rest_space ', ' study_space '] for I in Xrange (19): If categories[i] = = Img_class:label = Int (i) # array.append ([Img_raw, Labe L]) image_list.append (Img_raw) label_list.append (label) # assert (Len (a)
Rray[-1][0] = = 154587) # cnt + + 1 # If CNT >= 325: # Break image_list= Np.array (image_list) label_list = Np.array (label_list) perm = Np.arange (7984) Np.random.shuffle (p ERM) train_img = image_list[perm[:7200]] [Train_label = label_list[perm[:7200]] validation_img = Image_list[per M[7200:]] Validation_label = label_list[perm[7200:]] # images = image_list # labels = label_list Ndarray 2tfrecords (train_img, trAin_label, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ') ndarray2tfrecords (validation_img, Validation_label, '/home/mokii/rgb-d/sunrgbd/data/validation/rgb/validationrgb.tfrecords ') # ndarray2tfrecords ( Array, '/home/mokii/rgb-d/sunrgbd/data/test/rgb/testrgb.tfrecords ') def inputs (file_list,batch_size): Filename_que UE = Tf.train.string_input_producer (file_list) reader = tf. Tfrecordreader () _, Serialized_example = Reader.read (filename_queue) features = Tf.parse_single_example (serialized _example, features={' label ': TF. Fixedlenfeature ([], Tf.int64), ' Img_raw ': TF. Fixedlenfeature ([], tf.string),}) img = Tf.decode_raw (features[' Img_raw '), TF . uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = Tf.cast (fe atures[' label '],TF.Int64) # label = tf.reshape (label, [1]) Min_fraction_of_examples_in_queue = 0.5 Min_queue_examples = Int (num_ Examples_per_epoch_for_train * min_fraction_of_examples_in_queue) # print (' label: ', label) return _generate_image_a Nd_label_batch (IMG, label, Min_queue_examples, Batch_size) def _generate_image_and_label_batch (image, label, Min_ Queue_examples, batch_size): Num_preprocess_threads = 5 images, Label_batch = Tf.train.shuffle_batch ([i Mage, label], Batch_size=batch_size, Num_threads=num_preprocess_threads, Capacity=min_queue_exa Mples + 3 * batch_size, Min_after_dequeue=min_queue_examples) return images, Tf.reshape (Label_batch, [batch_s Ize,-1]) images, Label_batch = Tf.train.shuffle_batch ([image, label], batch_size=44, num _threads=1, capacity=2, min_after_dequeue=1) # images, Label_batch = Tf.train.batch (# [IMA GE, label], # Batch_size=1, # Num_threads=1, # Capacity=1) return images, label_batch if __name__ = "__main__": #crea
Tecsv (folder_list) img2tfrecords (['/home/mokii/rgb-d/sunrgbd/statistic/trainrgblist.csv ']) print ' DONE! ' DataSet = "/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords" # DataSet = "/home/mokii/rgb-d/sunrgbd/data/ Validation/rgb/validationrgb.tfrecords "cnt = 0 for serialized_example in Tf.python_io.tf_record_iterator (dataset) : CNT + + 1 Print CNT example = Tf.train.Example () example. Parsefromstring (serialized_example) IMGs = example.features.feature[' Img_raw '].bytes_list.value labels = e xample.features.feature[' label '].int64_list.value img = imgs[0] label = Labels[0] Assert (Len (img
s) = = 1 assert (len (labels) = = 1) print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = 154587) img = Tf.decode_raw (IMG, tf.uint8) img = Tf.reshapE (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = tf.cast (label, Tf.int64)