TensorFlow image preprocessing, numpy reading data stepping pit __numpy

Source: Internet
Author: User
Tags assert shuffle string format

In the TensorFlow picture data reading, often encounter a variety of data types on the subtle problem, today is encountered in the conversion of the picture to Tfrecord process, the problem of reading pictures. Finally found ... The error occurred in the processing of the NumPy string. In order to be compatible with C, Np.array will cut off the ' \x00 ' at the end of the string to convert the picture data (stored in decimal string format) to 16 in Tobytes (), with a string ' \x92\x99\ ... ' I need to convert the list of pictures to array for subsequent chaos. In the process of turning list into Ndarray, there are some problems.


Picture data read found that the picture data saved is not in line with this picture data dimension (227x227x3=154587), always wrong in some image pixels less than 154587. Run after using the assert(IMG) ==154587 statement, the console has the following error:

Traceback (most recent call last):
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line 280, in <module>
Img2tfrecords (['/home/mokii/rgb-d/sunrgbd/statistic/trainrgblist.csv '])
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line, in Img2tfrecords
Ndarray2tfrecords (Train, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ')
File "/home/mokii/rgb-d/sun-alexnet/tmp.py", line, in Ndarray2tfrecords
ASSERT (Len (img) = 154587)
Assertionerror

The code is as follows, in fact this is adapted by the classical method, the other places are correct, the main function includes the production of Tfrecords and debug on the length of the decode data of the test code, and finally found the error is out of the annotation code:

#-*-coding:utf-8-*-import OS import CSV import numpy as NP from PIL import Image import tensorflow as TF import sys Reload (SYS) sys.setdefaultencoding (' UTF8 ') # todo wait to write Num_examples_per_epoch_for_train = $ test_num = 0 File  _num = 1 image_size = Mb def createcsv (folderlist): for imgfolder in folderlist:csvfile = file (Imgfolder +
        '. csv ', ' w+ ') imglist = Os.listdir (imgfolder) writer = Csv.writer (csvfile) for filename in imglist: Writer.writerow ([(Imgfolder + '/' + filename), Filename[0:3] + "\ n"]) Csvfile.close () def Ndarray2tfrecor DS (Ndarray, tfpath): writer = Tf.python_io.
            Tfrecordwriter (Tfpath) for img in ndarray:example = Tf.train.Example (Features=tf.train.features (feature={ "Label": Tf.train.Feature (Int64_list=tf.train.int64list (Value=[int (img[1))), ' Img_raw ': tf.train.Fe Ature (Bytes_list=tf.train.byteslist (Value=[img[0])) # Here by changing to [Img[0:1].tobytes ()} or [img[0:1]. ToString ()] is Writer.write) (example. Serializetostring ()) # BEGIN Todo:print (' here in ndarray2tfrecords ') IMGs = Example.features.fea ture[' Img_raw '].bytes_list.value labels = example.features.feature[' label '].int64_list.value img = imgs[0  ] Label = Labels[0] Assert (len (imgs) = = 1) assert (len (labels) = = 1) print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = = 154587) # end TODO Writer.close () def img2tfrecords (file_list): arr 
            ay = [] for i in File_list:with open (I, ' RB ') as F:reader = Csv.reader (f) # cnt = 0
                For line in Reader:img_path = line[0] img = Image.open (img_path) img = Img.resize ((image_size, image_size)) img = Np.asarray (img) img = Np.require (img
                   , Dtype=np.uint8, requirements= ' C ') if Img.ndim!= 3: print ' Error ' print img_path Img_raw = img.tobytes () Img_class = line
                              [1] categories = [' bathroom ', ' bedroom ', ' classroom ', ' computer_room ', ' conference_room ', ' corridor ', ' Dining_area ', ' dining_room ', ' discussion_area ', ' furniture_store ', ' home_o ffice ', ' kitchen ', ' lab ', ' lecture_theatre ', ' library ', ' living_room ', ' Office ', ' Rest_space ', ' study_space '] for I in xrange (): if categories[i] = = IMG_CL
    Ass:label = Int (i) array.append ([Img_raw, label]) array = Np.array (array)

    Perm = Np.arange (7984) np.random.shuffle (perm) train = array[perm[:7200]] validation = array[perm[7200:]] Ndarray2tfrecords (Train, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ') ndarray2tfrecords ( Validation, '/home/mokii/rgb-d/suNrgbd/data/validation/rgb/validationrgb.tfrecords ') def inputs (File_list, batch_size): Filename_queue = Tf.train.str Ing_input_producer (file_list) reader = tf. Tfrecordreader () _, Serialized_example = Reader.read (filename_queue) features = Tf.parse_single_example (serialized_e Xample, features={' label ': TF. Fixedlenfeature ([], Tf.int64), ' Img_raw ': TF.
Fixedlenfeature ([], tf.string),}) img = Tf.decode_raw (features[' Img_raw '), tf.uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = tf.cast (features[' label '], TF. Int64) # label = tf.reshape (label, [1]) Min_fraction_of_examples_in_queue = 0.5 Min_queue_examples = Int (num_examples_per _epoch_for_train * min_fraction_of_examples_in_queue) # print (' label: ', label) return _generate_image_and_label_batch (IMG, label, min_queue_examples, Batch_size def _generate_image_and_label_batch (image, label, Min_queue_examples, batch_size): Num_preprocess_threads = 5 Images, Label_batch = Tf.train.batch ([image, label], batch_size=1, Num_threads=1, Capacity=1) return I Mages, label_batch if __name__ = = "__main__": # createcsv (folder_list) img2tfrecords (['/HOME/MOKII/RGB-D/SUNRGBD

    /statistic/trainrgblist.csv ']) print ' done! ' DataSet = "/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords" cnt = 0 for serialized_example in Tf.pytho N_io.tf_record_iterator (DataSet): CNT + + 1 Print CNT example = Tf.train.Example () example. Parsefromstring (serialized_example) IMGs = example.features.feature[' Img_raw '].bytes_list.value labels = example.f  eatures.feature[' label '].int64_list.value img = imgs[0] label = Labels[0] Assert (len (IMGs) = 1) Assert (len (labels) = = 1) # print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = = 154587) img =Tf.decode_raw (IMG, tf.uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (img, tf.float32) Label = tf.cast (label, Tf.int64)


After the search, finally concluded that the problem is this happened, it is a bit silent:

>>> a= ' \x01\x00 '
>>> a = Np.array ([a])
>>> a
array ([' \x01 '], 
      dtype= ' | S2 ')
>>> a.tobytes ()
' \x01\x00 '
>>> a[0].tobytes ()
' \x01 '
>>> a[ 0:1].tobytes ()
' \x01\x00 '
>>> 


You can correct the bug like an Mi note, but my brother said it is still not very stable, in fact, if we can bypass the NumPy string automatically truncated at the end of the "" "Bug" that is the best.

#-*-coding:utf-8-*-import OS import CSV import numpy as NP from PIL import Image import tensorflow as TF import sys r Eload (SYS) sys.setdefaultencoding (' UTF8 ') #todo wait to write Num_examples_per_epoch_for_train = Test_num = 0 File_n Um = 1 image_size = Mb def createcsv (folderlist): for imgfolder in folderlist:csvfile = File (imgfolder+ '. C
            SV ', ' w+ ') imglist = Os.listdir (imgfolder) writer = Csv.writer (csvfile) for filename in imglist: Writer.writerow ([(imgfolder+ '/' +filename), Filename[0:3] + "\ n"]) Csvfile.close () def Ndarray2tfre Cords (images, labels, tfpath): writer = Tf.python_io. Tfrecordwriter (Tfpath) for IMG, label in zip (images, labels): example = Tf.train.Example (Features=tf.train.fea Tures (feature={"label": Tf.train.Feature (Int64_list=tf.train.int64list (value=[int (label))), ' img _raw ': Tf.train.Feature (Bytes_list=tf.train.byteslist (Value=[img.tobytes ()))))
        Writer.write (example.  Serializetostring ()) writer.close () def img2tfrecords (file_list): Image_list = [] Label_list = [] for I In File_list:with open (I, ' RB ') as F:reader = Csv.reader (f) CNT = 0 for Line in Reader:img_path = line[0] img = Image.open (img_path) img = img
                     . Resize ((image_size, image_size)) Img_raw = Np.asarray (img) if Img_raw.ndim!= 3:
                     print ' Error ' Print img_path # im = Img.split ()
                     # If Len (IM)!= 3: # print ' Error ' # print Img_path # Img_raw = img.tobytes (' raw ') # Img_class = line[1] Categories =[' bathroom ',
                     ' Bedroom ', ' classroom ', ' computer_room ', ' conference_room ', ' Corridor ', ' Dining_area ', ' dining_room '           , ' Discussion_area ', ' furniture_store ', ' home_office ', ' kitchen ', ' lab ', ' lecture_theatre ', ' library ', ' living_room '
                     , ' Office ', ' Rest_space ', ' study_space '] for I in Xrange (19): If categories[i] = = Img_class:label = Int (i) # array.append ([Img_raw, Labe L]) image_list.append (Img_raw) label_list.append (label) # assert (Len (a)

    Rray[-1][0] = = 154587) # cnt + + 1 # If CNT >= 325: # Break image_list= Np.array (image_list) label_list = Np.array (label_list) perm = Np.arange (7984) Np.random.shuffle (p ERM) train_img = image_list[perm[:7200]] [Train_label = label_list[perm[:7200]] validation_img = Image_list[per M[7200:]] Validation_label = label_list[perm[7200:]] # images = image_list # labels = label_list Ndarray 2tfrecords (train_img, trAin_label, '/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords ') ndarray2tfrecords (validation_img, Validation_label, '/home/mokii/rgb-d/sunrgbd/data/validation/rgb/validationrgb.tfrecords ') # ndarray2tfrecords ( Array, '/home/mokii/rgb-d/sunrgbd/data/test/rgb/testrgb.tfrecords ') def inputs (file_list,batch_size): Filename_que UE = Tf.train.string_input_producer (file_list) reader = tf. Tfrecordreader () _, Serialized_example = Reader.read (filename_queue) features = Tf.parse_single_example (serialized _example, features={' label ': TF. Fixedlenfeature ([], Tf.int64), ' Img_raw ': TF. Fixedlenfeature ([], tf.string),}) img = Tf.decode_raw (features[' Img_raw '), TF . uint8) img = Tf.reshape (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = Tf.cast (fe atures[' label '],TF.Int64) # label = tf.reshape (label, [1]) Min_fraction_of_examples_in_queue = 0.5 Min_queue_examples = Int (num_ Examples_per_epoch_for_train * min_fraction_of_examples_in_queue) # print (' label: ', label) return _generate_image_a Nd_label_batch (IMG, label, Min_queue_examples, Batch_size) def _generate_image_and_label_batch (image, label, Min_ Queue_examples, batch_size): Num_preprocess_threads = 5 images, Label_batch = Tf.train.shuffle_batch ([i Mage, label], Batch_size=batch_size, Num_threads=num_preprocess_threads, Capacity=min_queue_exa Mples + 3 * batch_size, Min_after_dequeue=min_queue_examples) return images, Tf.reshape (Label_batch, [batch_s Ize,-1]) images, Label_batch = Tf.train.shuffle_batch ([image, label], batch_size=44, num _threads=1, capacity=2, min_after_dequeue=1) # images, Label_batch = Tf.train.batch (# [IMA GE, label], # Batch_size=1, # Num_threads=1, # Capacity=1) return images, label_batch if __name__ = "__main__": #crea

    Tecsv (folder_list) img2tfrecords (['/home/mokii/rgb-d/sunrgbd/statistic/trainrgblist.csv ']) print ' DONE! ' DataSet = "/home/mokii/rgb-d/sunrgbd/data/train/rgb/trainrgb.tfrecords" # DataSet = "/home/mokii/rgb-d/sunrgbd/data/ Validation/rgb/validationrgb.tfrecords "cnt = 0 for serialized_example in Tf.python_io.tf_record_iterator (dataset) : CNT + + 1 Print CNT example = Tf.train.Example () example. Parsefromstring (serialized_example) IMGs = example.features.feature[' Img_raw '].bytes_list.value labels = e xample.features.feature[' label '].int64_list.value img = imgs[0] label = Labels[0] Assert (Len (img
        s) = = 1 assert (len (labels) = = 1) print (' Len (img) = ', Len (IMG)) ASSERT (Len (img) = 154587) img = Tf.decode_raw (IMG, tf.uint8) img = Tf.reshapE (IMG, [Image_size, Image_size, 3]) # img = Tf.cast (IMG, tf.float32) label = tf.cast (label, Tf.int64)
 


Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.