tensorflow 数据加载

技术2022-07-10 145

数据载入的三种形式：一、直接使用numpy加载

for i in range(train_data.shape[0]//batch_size): image_batch = train_data[i*batch_size:(i+1)*batch_size,:,:,np.newaxis] label_batch = train_label[i * batch_size:(i + 1) * batch_size]

二、使用tf.data.Dataset.from_tensor_slices加载

def load_dataset(): # Step0 准备数据集, 可以是自己动手丰衣足食, 也可以从 tf.keras.datasets 加载需要的数据集(获取到的是numpy数据) # 这里以 mnist 为例 (x, y), (x_test, y_test) = keras.datasets.mnist.load_data() # Step1 使用 tf.data.Dataset.from_tensor_slices 进行加载 db_train = tf.data.Dataset.from_tensor_slices((x, y)) db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)) # Step2 打乱数据 db_train.shuffle(1000) db_test.shuffle(1000) # Step3 预处理 (预处理函数在下面) db_train.map(preprocess) db_test.map(preprocess) # Step4 设置 batch size 一次喂入64个数据 db_train.batch(64) db_test.batch(64) # Step5 设置迭代次数(迭代2次) test数据集不需要emmm db_train.repeat(2) return db_train, db_test def preprocess(labels, images): ''' 最简单的预处理函数: 转numpy为Tensor、分类问题需要处理label为one_hot编码、处理训练数据 ''' # 把numpy数据转为Tensor labels = tf.cast(labels, dtype=tf.int32) # labels 转为one_hot编码 labels = tf.one_hot(labels, depth=10) # 顺手归一化 images = tf.cast(images, dtype=tf.float32) / 255 return labels, images

三、使用tf.train.string_input_producer加载

filename_queue = tf.train.string_input_producer(["file0.csv", "file1.csv"]) reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the # decoded result. record_defaults = [[1], [1], [1], [1], [1]] col1, col2, col3, col4, col5 = tf.decode_csv( value, record_defaults=record_defaults) features = tf.concat(0, [col1, col2, col3, col4]) with tf.Session() as sess: # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(1200): # Retrieve a single instance: example, label = sess.run([features, col5]) coord.request_stop() coord.join(threads)

Processed: 0.013, SQL: 9