鍍金池/ 問答/人工智能  Python/ Tensorflow 計算loss時的維度問題

Tensorflow 計算loss時的維度問題

在跑一個簡單的CNN模型, 圖片為64X64
第一層卷積為8X5X5,池化為5X5,步長為2;
第二層卷積為16X5X5,池化為5X5,步長為2;
第三層卷積為32X1X1,池化為全局池化 16X16
輸出1X1X32的特征值,展平后輸入全連接層
第四層全連接層 輸出為2維
算出來最后輸入全連接層的特征值應(yīng)該有32個 輸出2個值

batch_size為20
但在使用sparse_softmax_cross_entropy計算交叉熵?fù)p失的時候 一直報維度錯誤:
logits and labels must have the same first dimension, got logits shape [1280,2] and labels shape [20]

代碼如下:

# -*- coding: utf-8 -*-

# Steganalysis with High-Level API 

# import dataset
import load_record

import tensorflow as tf
import numpy as np
import layer_module

flags = tf.app.flags

flags.DEFINE_integer('num_epochs', 10, 'Number of training epochs')
flags.DEFINE_integer('batch_size', 20, 'Batch size')
flags.DEFINE_float('learning_rate', 0.01, 'Learning rate')
flags.DEFINE_float('dropout_rate', 0.5, 'Dropout rate')

flags.DEFINE_string('train_dataset', './dataset/train512.tfrecords',
                    'Filename of training dataset')
flags.DEFINE_string('eval_dataset', './dataset/test512.tfrecords',
                    'Filename of evaluation dataset')
flags.DEFINE_string('test_dataset', './dataset/test512.tfrecords',
                    'Filename of testing dataset')
flags.DEFINE_string('model_dir', 'models/steganalysis_cnn_model',
                    'Filename of testing dataset')

FLAGS = flags.FLAGS

def stg_model_fn(features, labels, mode):
    # Input Layer
    x = tf.reshape(features, [-1, 64, 64, 1])
    # print(x)
    x = layer_module.conv_group(
        inputs = x,
        activation = "tanh",
        filters = 8,
        kernel_size = [5, 5],
        pool_size = 5,
        strides = 2,
        abs_layer = True,
        pool_padding = "same")
    print(x)

    x = layer_module.conv_group(
        inputs = x,
        filters = 16,
        activation = "tanh",
        kernel_size = [5, 5],
        pool_size = 5,
        strides = 2,
        abs_layer = False,
        pool_padding = "same")
    print(x)

    x = layer_module.conv_group(
        inputs = x,
        filters = 32,
        activation = "relu",
        kernel_size = [1, 1],
        pool_size = 16,
        strides = 1,
        abs_layer = False,
        pool_padding = "valid")
    print(x)


    x = tf.reshape(x, [-1, 32])
    x = tf.layers.dense(inputs = x, units = 2)
    
    # x = tf.contrib.layers.flatten(inputs = x)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print(tf.nn.softmax(x, name="softmax_tensor").eval(), labels.shape)
    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=x, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(x, name="softmax_tensor")
        }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        # Calculate Loss (for both TRAIN and EVAL modes)
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=2)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels = labels, logits = x)
        # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
        # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(
                labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

def parser(record):
    keys_to_features = {
        'img_raw': tf.FixedLenFeature((), tf.string),
        'label': tf.FixedLenFeature((), tf.int64)
    }
    parsed = tf.parse_single_example(record, keys_to_features)
    image = tf.decode_raw(parsed['img_raw'], tf.uint8)
    image = tf.cast(image, tf.float32)
    label = tf.cast(parsed['label'], tf.int32)
    return image, label


def save_hp_to_json():
    '''Save hyperparameters to a json file'''
    filename = os.path.join(FLAGS.model_dir, 'hparams.json')
    hparams = FLAGS.flag_values_dict()
    with open(filename, 'w') as f:
        json.dump(hparams, f, indent=4, sort_keys=True)

def main(unused_argv):

    def train_input_fn():
        train_dataset = tf.data.TFRecordDataset(FLAGS.train_dataset)
        train_dataset = train_dataset.map(parser)
        train_dataset = train_dataset.repeat(FLAGS.num_epochs)
        train_dataset = train_dataset.batch(FLAGS.batch_size)
        train_iterator = train_dataset.make_one_shot_iterator()

        features, labels = train_iterator.get_next()
        return features, labels

    def eval_input_fn():
        eval_dataset = tf.data.TFRecordDataset(FLAGS.eval_dataset)
        eval_dataset = eval_dataset.map(parser)
        # eval_dataset = eval_dataset.repeat(FLAGS.num_epochs)
        eval_dataset = eval_dataset.batch(FLAGS.batch_size)
        eval_iterator = eval_dataset.make_one_shot_iterator()
        features, labels = eval_iterator.get_next()
        return features, labels

    steg_classifier = tf.estimator.Estimator(
        model_fn=stg_model_fn, model_dir=FLAGS.model_dir)

    # Train
    steg_classifier.train(input_fn=train_input_fn)

    # Evaluation
    eval_results = steg_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

    tf.logging.info('Saving hyperparameters ...')


if __name__ == "__main__":
    tf.app.run()

我不懂那個1280是怎么來的 明明batch_size只有20


補充layer_module的代碼:

def conv_group(inputs, activation, filters, kernel_size, pool_size, strides, pool_padding, abs_layer):
    x = tf.layers.conv2d(
        inputs = inputs,
        filters = filters,
        kernel_size = kernel_size,
        padding = "same")

    if (abs_layer):
        x = tf.abs(x)

    x = tf.layers.batch_normalization(inputs = x)

    if (activation == "relu"):
        x = tf.nn.relu(x)
    elif (activation == "tanh"):
        x = tf.nn.tanh(x)
    print(x)
    x = tf.layers.average_pooling2d(
        inputs = x,
        padding = pool_padding,
        pool_size = pool_size,
        strides = strides)
    print(x)
    return x
回答
編輯回答
呆萌傻

已解決, reshape的問題。

圖片輸入時是512X512X1,reshape成64X64(這是錯誤的用法)之后,由于shape的第一個元素是-1,所以意味著batch_size會改變大小來使得總尺寸不變。 所以batch_size變成了20X(512/64)X(512/64) = 20X8X8 = 1280

2017年10月25日 07:44