使用tf.gradienttape的变性自动编码器

发布于 2025-01-23 08:41:23 字数 3719 浏览 7 评论 0原文

这是Keras为典型的各种自动编码器提供的TensorFlow Gradienttape的示例：

train_step函数在模型内实现，并通过

但是，对于另一个应用程序，我需要实现 train_step 函数模型定义的外部。一开始，我从上述示例开始，因为目标应用程序也是一种VAE。因此，我应用了一些修改，并试图训练相同的模型结构。请在下一个中找到整个代码；但是，与原始代码相比，我得到了非常奇怪的数字。即使经过几次迭代，它也会获得 nan 损失的值。您能否让我知道什么是错误，为什么会发生这种错误？

提前致谢

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'

class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()




optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
num_samples = x_train.shape[0]
epochs=1
batch_size=128

@tf.function
def train_step(data):
  with tf.GradientTape() as tape:
    reconstruction, z_mean, z_log_var = model(data, training=True)
    data = tf.expand_dims(data, axis=-1)
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
    total_loss = (reconstruction_loss + kl_loss)
  grads = tape.gradient(total_loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return total_loss, reconstruction_loss, kl_loss


with tf.device('gpu:0'):
  for epoch in range (epochs):
    for step in range(num_samples//batch_size):
      s = step*batch_size
      e = s+batch_size
      x_batch = x_train[s:e,:,:]
      total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
      print("-----------------")
      print(f"epoch: {epoch} step: {step}")
      print(f"reconstruction_loss: {reconstruction_loss} ")
      print(f"kl_loss: {kl_loss} ")
      print(f"total_loss: {total_loss}")

原文

Here is an example of tensorflow GradientTape provided by keras for a typical variotional autoencoder:

VAE-keras-example

The train_step function is implemented inside the model and it is trained with the "model.fit()". The example performs great and no problem at all.

However, for another application, I need to implement the train_step function outside of the model definition. In the beginning, I started with above mentioned example as the target application is also a kind of VAE. Accordingly, I applied some modifications and tried to train the same model structure; please find the whole code in the next; however, I get very weird numbers for the loss values comparing to the original code; even after a couple of iterations it gets nan values for losses.
Could you please let me know what's the mistake and why this happens?

Thanks in advance

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'

class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()




optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
num_samples = x_train.shape[0]
epochs=1
batch_size=128

@tf.function
def train_step(data):
  with tf.GradientTape() as tape:
    reconstruction, z_mean, z_log_var = model(data, training=True)
    data = tf.expand_dims(data, axis=-1)
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
    total_loss = (reconstruction_loss + kl_loss)
  grads = tape.gradient(total_loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return total_loss, reconstruction_loss, kl_loss


with tf.device('gpu:0'):
  for epoch in range (epochs):
    for step in range(num_samples//batch_size):
      s = step*batch_size
      e = s+batch_size
      x_batch = x_train[s:e,:,:]
      total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
      print("-----------------")
      print(f"epoch: {epoch} step: {step}")
      print(f"reconstruction_loss: {reconstruction_loss} ")
      print(f"kl_loss: {kl_loss} ")
      print(f"total_loss: {total_loss}")

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

茶花眉 2025-01-30 08:41:23

我认为您忘了将数据归一化，如您所指的教程所示：

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255

否则，您的代码似乎运行良好，而不是 nan 中的损失。这是参考的代码：

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'

class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()

optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
num_samples = x_train.shape[0]
epochs=4
batch_size=128

@tf.function
def train_step(data):
  with tf.GradientTape() as tape:
    reconstruction, z_mean, z_log_var = model(data, training=True)
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
    
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))/batch_size
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))/batch_size
    total_loss = (reconstruction_loss + kl_loss)
  grads = tape.gradient(total_loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return total_loss, reconstruction_loss, kl_loss


with tf.device('gpu:0'):
  for epoch in range (epochs):
    for step in range(num_samples//batch_size):
      s = step*batch_size
      e = s+batch_size
      x_batch = x_train[s:e,:,:, tf.newaxis]
      print(x_batch.shape)
      total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
      print("-----------------")
      print(f"epoch: {epoch} step: {step}")
      print(f"reconstruction_loss: {reconstruction_loss} ")
      print(f"kl_loss: {kl_loss} ")
      print(f"total_loss: {total_loss}")

I think you forgot to normalize your data as shown in the tutorial you are referring to:

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255

Otherwise, your code seems to be running fine and the loss in not nan. Here is the code for reference:

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'

class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()

optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()

(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
num_samples = x_train.shape[0]
epochs=4
batch_size=128

@tf.function
def train_step(data):
  with tf.GradientTape() as tape:
    reconstruction, z_mean, z_log_var = model(data, training=True)
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
    
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))/batch_size
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))/batch_size
    total_loss = (reconstruction_loss + kl_loss)
  grads = tape.gradient(total_loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return total_loss, reconstruction_loss, kl_loss


with tf.device('gpu:0'):
  for epoch in range (epochs):
    for step in range(num_samples//batch_size):
      s = step*batch_size
      e = s+batch_size
      x_batch = x_train[s:e,:,:, tf.newaxis]
      print(x_batch.shape)
      total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
      print("-----------------")
      print(f"epoch: {epoch} step: {step}")
      print(f"reconstruction_loss: {reconstruction_loss} ")
      print(f"kl_loss: {kl_loss} ")
      print(f"total_loss: {total_loss}")

回复收藏 0 原文

~没有更多了~