tensorflow中TFRecord是怎么用的?

发布于 2022-09-05 02:52:37 字数 6732 浏览 14 评论 0

  1. 怎么把下面的代码中的mnist数据集换成TFRecord
  2. 假设TFRecord数据集已经准备好,train.tfrecordstest.tfrecords 都在当前py的目录下
  3. 已经有TFRecord的读取代码。
def read_and_decode(filename):
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'label': tf.FixedLenFeature([], tf.int64),
                                           'img_raw': tf.FixedLenFeature([], tf.string),
                                       })
    img = tf.decode_raw(features['img_raw'], tf.uint8)
    img = tf.reshape(img, [512, 288, 3])
    img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
    label = tf.cast(features['label'], tf.int32)
    return img, label
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

mnist = input_data.read_data_sets("/tmp/tensorflow/mnist/input_data", one_hot=True)

# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 64
display_step = 20

# Network Parameters
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)
dropout = 0.75  # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)  # dropout (keep probability)


def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


# Create custom model
def conv2d(name, l_input, w, b):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1, 1, 1, 1], padding='SAME'), b), name=name)


def max_pool(name, l_input, k):
    return tf.nn.max_pool(l_input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)


def norm(name, l_input, lsize=4):
    return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)


def dnn(_x, _weights, _biases, _dropout):
    _x = tf.nn.dropout(_x, _dropout)
    d1 = tf.nn.relu(tf.nn.bias_add(tf.matmul(_x, _weights['wd1']), _biases['bd1']), name="d1")

    d2x = tf.nn.dropout(d1, _dropout)
    d2 = tf.nn.relu(tf.nn.bias_add(tf.matmul(d2x, _weights['wd2']), _biases['bd2']), name="d2")

    dout = tf.nn.dropout(d2, _dropout)
    out = tf.matmul(dout, _weights['out']) + _biases['out']
    return out


# Store layers weight & bias
weights = {
    'wd1': tf.Variable(tf.random_normal([784, 600], stddev=0.01)),
    'wd2': tf.Variable(tf.random_normal([600, 480], stddev=0.01)),
    'out': tf.Variable(tf.random_normal([480, 10]))
}

biases = {
    'bd1': tf.Variable(tf.random_normal([600])),
    'bd2': tf.Variable(tf.random_normal([480])),
    'out': tf.Variable(tf.random_normal([10]))
}

# Construct model
pred = dnn(x, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

#
tf.summary.scalar("loss", cost)
tf.summary.scalar("accuracy", accuracy)
# Merge all summaries to a single operator
merged_summary_op = tf.summary.merge_all()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    summary_writer = tf.summary.FileWriter('/tmp/logs/ex12_dnn', graph=sess.graph)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
            # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
            print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
            summary_str = sess.run(merged_summary_op, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
            summary_writer.add_summary(summary_str, step)
        step += 1
    print("Optimization Finished!")
    # Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:",
          sess.run(accuracy, feed_dict={x: mnist.test.images[:256], y: mnist.test.labels[:256], keep_prob: 1.}))
    # 98%

不知道具体怎么使用, 改了几次执行都报错

错误类似

ValueError: Only call `softmax_cross_entropy_with_logits` with named arguments (labels=..., logits=..., ...)

以下为修改后可运行的的部分代码,不要回复了,解决方案如下

以下内容可以跑,但并不是mnist的转换代码。
  • decode
def read_and_decode(filename, batch_size):
    # 根据文件名生成一个队列
    filename_queue = tf.train.string_input_producer([filename], num_epochs=50)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)  # 返回文件名和文件
    features = tf.parse_single_example(
        serialized_example,
        features={
            'label': tf.FixedLenFeature([], tf.int64),
            'data': tf.FixedLenFeature([], tf.string),
        }
    )
    data = tf.decode_raw(features['data'], tf.float32)
    data = tf.reshape(data, [961])
    label = tf.cast(features['label'], tf.int32)
    data_batch, label_batch = tf.train.shuffle_batch([data, label],
                                                     batch_size=batch_size,
                                                     num_threads=64,
                                                     capacity=3000,
                                                     min_after_dequeue=3000 - 1)
    return data_batch, tf.reshape(label_batch, [batch_size])
  • encode
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))



 example = tf.train.Example(
    features=tf.train.Features(
        feature={
            'id': _int64_feature(int(id)),
            'label': _int64_feature(int(label)),
            "data": _bytes_feature(np.array(dotrow).tostring()) ### 主要代码
        }
    )
)

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

十年不长 2022-09-12 02:52:37

已经搞定了。 读的图片格式不对。 导致数组长度不一致, 然后自己的的老是不对。。。

默嘫て 2022-09-12 02:52:37

不知道是否理解你的意思,这段代码mnist = input_data.read_data_sets("/tmp/tensorflow/mnist/input_data", one_hot=True)读取的就是mnist数据,你把它换掉,然后在使用TFRecord的读取代码读取TFRecord数据,将下面训练网络的代码中的mnist也换掉,同时确保你使用的卷积操作参数要和TFRecord数据对应。

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文