Tfrecords Making __tensorflow

Source: Internet
Author: User
Tags glob shuffle wrapper

Another piece of hydrology.
The first is to read the file name tag, I'm here to be a simple cat and dog identification dataset

def _find_image_files (Data_dir, labels_file):
    jpeg_file_path = '%s/*.jpg '% (data_dir) #文件路径
    matching_files = Tf.gfile.Glob (Jpeg_file_path) #搜寻这个路径下的所有指定格式文件, here is *.jpg, that is, all jpg files
    labels = [0 if ' cat ' in Os.path.basename (file) Else 1 for file in Matching_files] #根据标签的格式截取cat 0.jpg ..., so we determine if the picture name contains a cat or dog
    c = List (Zip (matching_files, labels)) # The filename and corresponding label are combined with
    Shuffle (c) #将其打乱
    filenames, labels = zip (*c) #解压出来后就是打乱后的数据了 return
    filenames, labels

Then specify the data format that needs to be saved, that is example, this is the TensorFlow specified data structure, I am here to save only the data and labels of the picture

def _convert_to_example (image_buffer, label):
    example = Tf.train.Example (Features=tf.train.features (feature={
            ' Image/label ': _int64_feature (label),
            ' image/encoded ': _bytes_feature (Image_buffer)}
    ) return example

Then is to write the data, we can write all the pictures and tags to a file, can also be divided into several files, the first is the picture and the label into a few parts, and then these pieces of data are written to different tfrecords.

def _process_image_files (output_directory, name, filenames, labels, num_shards): Num_images = Len (filenames) # See how many pictures Slice Num_batch = Np.linspace (0,num_images,num_shards+1). Astype (Np.int) # Dividing the data into n blocks, you can look at Np.linspace and know why you add 1 for cou Nter in Range (num_shards): output_filename = '%s-%.5d-of-%.5d '% (name, counter, num_shards) #要保存的tfrecords文件名, tra  in-00001-00008 (first name, written several files, a total of several files) Output_file = Os.path.join (output_directory, Output_filename) start,end = Num_batch[counter], num_batch[counter+1] #获得每一段数据的起始范围 writer = tf.python_io. Tfrecordwriter (Output_file) #写文件描述符 for J in Range (Start,end): #遍历这段区间的所有文件 Filename,label = filenames[ J],LABELS[J] Try:with tf.gfile.FastGFile (filename, ' RB ') as F:image_buffe R = F.read () #读取图片的原始数据 except Exception as E:print (e) Continue E Xample = _convert_to_example (image_buffer, label) #将数据和标签保存成The specified example format writer.write (example. Serializetostring ()) print (' Writing {} picture, the filename is {}, the label is {}, the Shard is {} '. Format (J,filename,la Bel,counter)) Writer.close ()

Integrate

def _process_dataset (output_directory,name, directory, Labels_file, num_shards):
    filenames, labels = _find_image_ Files (directory, labels_file)        
    _process_image_files (output_directory,name, filenames, labels,num_shards)   

def main (UNUSED_ARGV):
    _process_dataset (' Tfdata ', ' train ', '. /.. /cat_dog/train ', ', 8   
# Cat and dog data no specific tag file, so direct for NULL, if there are words to deal with if
__name__ = = ' __main__ ':
    tf.app.run ()

The output of the execution process

Writing 24993 picture, the filename is ... \.. \cat_dog\train\dog.10861.jpg, the label is 1, the Shard is 7
writing 24994 the picture, the filename is. \.. \cat_dog\train\dog.7031.jpg, the label is 1, the Shard is 7
writing 24995 the picture, the filename is. \.. \cat_dog\train\cat.7885.jpg, the label is 0, the Shard is 7
writing 24996 the picture, the filename is. \.. \cat_dog\train\dog.8770.jpg, the label is 1, the Shard is 7
writing 24997 the picture, the filename is. \.. \cat_dog\train\dog.6193.jpg, the label is 1, the Shard is 7
writing 24998 the picture, the filename is. \.. \cat_dog\train\cat.11390.jpg, the label is 0, the Shard is 7
writing 24999 the picture, the filename is. \.. \cat_dog\train\cat.5946.jpg, label is 0, Shard is 7

After the file is written we can read it, first of all, to load the written tfrecords file in

DataSet = Tf.data.TFRecordDataset (filenames)

With this dataset we can do whatever we want, such as we would like to disrupt the data can use Dataset.buffle (1024), we want to repeat a few times, in fact, is the cycle of training several times dataset, you can use Dataset.repeat (), and so on ... , of course, before doing this, we should first parse the format of our data example into picture format, and we need Dataset.map (parse_function) function

    def _parse_function (Example_proto):
        features = {
                ' Image/label ': TF. Fixedlenfeature ((), Tf.int64, default_value=0),
                ' image/encoded ': TF. Fixedlenfeature ((), tf.string, default_value= "")
            }
        parsed = Tf.parse_single_example (Example_proto, features #解析example

        label = tf.cast (parsed[' Image/label '], tf.int32)
        encoded = tf.image.decode_image (parsed[' image/ Encoded '])
        #encoded = tf.image.decode_jpeg (parsed[' image/encoded ']) #这个就不需要加下面的那句了
        encoded.set_shape ([ None, none, none] #一定要加这一句, do not believe you try,
        encoded = tf.image.resize_images (encoded, (224,224)) #如果不缩放到相同的尺寸, can not batch read, This is not very good, it is better to read their own batch, such as the original multithreaded queue read return
        encoded, label

The following is a summary of all the code

#-*-Coding:utf-8-*-#-*-coding:utf-8-*-import tensorflow as TF import six import OS import NumPy as NP from ran Dom Import Shuffle def _int64_feature (value): If not isinstance (value, list): value = [value] return TF.TRAIN.F  Eature (Int64_list=tf.train.int64list (value=value)) def _float_feature (value): "" wrapper for inserting float features
  Into Example Proto. "" "


If not isinstance (value, list): value = [value] return Tf.train.Feature (Float_list=tf.train.floatlist (Value=value))
  def _bytes_feature (value): "" "wrapper for inserting bytes features into Example." "" If Isinstance (value, six.string_types): value = Six.binary_type (value, encoding= ' Utf-8 ') return Tf.train . Feature (Bytes_list=tf.train.byteslist (value=[value)) def _convert_to_example (Image_buffer, label): example = Tf.tra In. Example (features=tf.train.features feature={' Image/label ': _int64_feature (label), ' image/encoded ' : _bytes_feature (imagE_buffer)}) Return Example Def _process_image_files (output_directory, name, filenames, labels, num_shards): num

    _images = Len (filenames) Num_batch = Np.linspace (0,num_images,num_shards+1). Astype (np.int) # divides the data into n blocks [0,100,200 ...] For counter in range (num_shards): output_filename = '%s-%.5d-of-%.5d '% (name, counter, num_shards) ou
        Tput_file = Os.path.join (output_directory, output_filename) start,end = Num_batch[counter], num_batch[counter+1] writer = Tf.python_io.
            Tfrecordwriter (Output_file) for J in Range (start,end): Filename,label = Filenames[j],labels[j]
            Try:with tf.gfile.FastGFile (filename, ' RB ') as F:image_buffer = F.read () Except Exception as E:print (e) Continue example = _convert_to_example (im Age_buffer, label) Writer.write (example. Serializetostring ()) print (' Writing {} picture, FilenaMe is {}, the label is {}, the Shard is {} '. Format (j,filename,label,counter)) Writer.close () def _find_image_files (data_d IR, labels_file): Jpeg_file_path = '%s/*.jpg '% (data_dir) matching_files = Tf.gfile.Glob (Jpeg_file_path) Labe
    ls = [0 if ' cat ' in Os.path.basename (file) Else 1 for file in matching_files] c = list (Zip (matching_files, labels)) Shuffle (c) filenames, labels = zip (*c) return filenames, Labels def _process_dataset (Output_directory,name, dir Ectory, Labels_file, num_shards): filenames, labels = _find_image_files (directory, labels_file) _process_i Mage_files (Output_directory,name, filenames, Labels,num_shards) def main (UNUSED_ARGV): _process_dataset (' Tfdata ', ' Train ', '. /..
                /cat_dog/train ', ', 8 def input_function (filenames): Def _parse_function (Example_proto): Features = { ' Image/label ': TF. Fixedlenfeature ((), Tf.int64, default_value=0), ' image/encoded ': TF. Fixedlenfeature((), tf.string, default_value= "")}
        parsed = Tf.parse_single_example (Example_proto, features) label = Tf.cast (parsed[' Image/label '], Tf.int32)  encoded = Tf.image.decode_image (parsed[' image/encoded ') encoded.set_shape (none, none, none) encoded = Tf.image.resize_images (encoded, (224,224)) #如果不缩放到相同的尺寸, cannot read return encoded, label DataSet = Tf.data.TFRecor Ddataset (filenames) DataSet = Dataset.map (_parse_function) DataSet = Dataset.repeat () DataSet = Dataset.batch ( iterator = Dataset.make_one_shot_iterator () next_element = Iterator.get_next () return next_element if __n
 ame__ = = ' __main__ ': Tf.app.run ()

We can verify that the data is read

#-*-Coding:utf-8-*-

import tensorflow as TF from
build1 import input_function
import Matplotlib.pyplot as PLT
import NumPy as np

filenames = [' tfdata/train-00000-of-00008 ', ' tfdata/train-00001-of-00008 ']
next_ element = Input_function (filenames) with
TF. Session () as Sess:
    Img,lab = Sess.run (next_element)
    plt.imshow (Img[0].astype (np.int))
    print (img[0). SHAPE) while
    True:
        try:
            print (Sess.run (next_element))
        except Tf.errors.OutOfRangeError: Break
            

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.