使用张量流训练深度神经推荐模型时的 Nan Loss

发布于 01-19 19:57 字数 13605 浏览 6 评论 0原文

我正在尝试关注 tensorflow文档并将相同的技术应用于玩具数据库中的一项。

在训练期间，我将损失全部损失。我试图使用debugger v2进行调试，并且可以看到tf.keras.layers.globalaveragepooling1d 是由0划分的NAN，这在返回propapagation期间导致所有值是NAN。但是从调试器V2 GUI中不清楚的是总和为0。我确实尝试减少数据集的功能数量和大小，但是每项活动都给我带来了新的错误稍后的每个问题的问题线程）。

以下是供参考的代码。我还提供数据集以及

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

tf.debugging.experimental.enable_dump_debug_info(
    "./tfdbg2_logdir",
    tensor_debug_mode="FULL_HEALTH",
    circular_buffer_size=-1)

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs

准备数据

ds=pd.read_csv('train_recom.csv')
ds['year'].replace(0,1,inplace=True)
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})
ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

读取数据到TensorFlow数据

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: x["song_id"])

集准备火车和测试数据集

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=np.concatenate(list(ratings.map(lambda x: x['year']).batch(4000)))

用户模型

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['release']).batch(4000))))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.artist_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['artist_name']).batch(4000))))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['title']).batch(4000))))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
            ])

    def call(self, inputs):
      return tf.concat([
          self.user_embedding(inputs['user_id']),
          self.release_text_embedding(inputs['release'])
          ,
          self.year_embedding(inputs['year']), 
          self.artist_text_embedding(inputs['artist_name']),
          self.title_text_embedding(inputs['title']),
             ], axis=1)

类项目

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

    def call(self, inputs):
      return self.title_embedding(inputs)

模型模型。创建深层模型

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

为项目模型创建深层模型，

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

结合了查询和候选模型

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
                "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],
        })

        item_embeddings = self.candidate_model(features["song_id"])

        return self.task(query_embeddings, item_embeddings)

模型，我得到了

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

训练以下ID的

WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-26-fdc864fc30cf>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-26-fdc864fc30cf>
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-25-e3009db55439>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-25-e3009db55439>
Epoch 1/9
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
10/10 [==============================] - 63s 1s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0022 - factorized_top_k/top_10_categorical_accuracy: 0.0033 - factorized_top_k/top_50_categorical_accuracy: 0.0073 - factorized_top_k/top_100_categorical_accuracy: 0.0103 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 2/9
10/10 [==============================] - 9s 945ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 3/9
10/10 [==============================] - 10s 953ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 4/9
10/10 [==============================] - 9s 948ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 5/9
10/10 [==============================] - 10s 966ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 6/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 7/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 8/9
10/10 [==============================] - 10s 958ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 9/9
10/10 [==============================] - 10s 971ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan

原文

I am trying to follow tensorflow documentation and applying same technique to one of toy dataset.

During training I am getting all loss as Nan. I have tried to debug the same using Debugger V2 and I could see that tf.keras.layers.GlobalAveragePooling1D is giving Nan due to division by 0, which is causing all values to be Nan during backpropagation. But what is not clear from the debugger V2 GUI why the sum is becoming 0. I did try to reduce the number of features and the size of the dataset, but each of this activity is giving me new error (probably I shall start a separate question thread for each issues at a later point ).

Below is the code for reference. I am providing the dataset as well here. I had tried below code on Google Colab.

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

tf.debugging.experimental.enable_dump_debug_info(
    "./tfdbg2_logdir",
    tensor_debug_mode="FULL_HEALTH",
    circular_buffer_size=-1)

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs

Preparing Data

ds=pd.read_csv('train_recom.csv')
ds['year'].replace(0,1,inplace=True)
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})
ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

Reading data to tensorflow dataset

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: x["song_id"])

Preparing train and test dataset

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=np.concatenate(list(ratings.map(lambda x: x['year']).batch(4000)))

User model class

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['release']).batch(4000))))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.artist_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['artist_name']).batch(4000))))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(np.concatenate(list(ratings.map(lambda x: x['title']).batch(4000))))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
            ])

    def call(self, inputs):
      return tf.concat([
          self.user_embedding(inputs['user_id']),
          self.release_text_embedding(inputs['release'])
          ,
          self.year_embedding(inputs['year']), 
          self.artist_text_embedding(inputs['artist_name']),
          self.title_text_embedding(inputs['title']),
             ], axis=1)

Item model

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

    def call(self, inputs):
      return self.title_embedding(inputs)

Query model . Creating Deep model

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

Creating deep model for the Item model

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

Combining both query and candidate model

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
                "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],
        })

        item_embeddings = self.candidate_model(features["song_id"])

        return self.task(query_embeddings, item_embeddings)

training the model

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

Below id the outout that I got

WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-26-fdc864fc30cf>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-26-fdc864fc30cf>
WARNING:tensorflow:Failed to read source code from path: /content/<ipython-input-25-e3009db55439>. Reason: Source path neither exists nor can be loaded as a .par file: /content/<ipython-input-25-e3009db55439>
Epoch 1/9
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
type of feature ---- <class 'dict'>
WARNING:tensorflow:Model was constructed with shape (None, None) for input KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name='embedding_10_input'), name='embedding_10_input', description="created by layer 'embedding_10_input'"), but it was called on an input with incompatible shape (None,).
10/10 [==============================] - 63s 1s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0022 - factorized_top_k/top_10_categorical_accuracy: 0.0033 - factorized_top_k/top_50_categorical_accuracy: 0.0073 - factorized_top_k/top_100_categorical_accuracy: 0.0103 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 2/9
10/10 [==============================] - 9s 945ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 3/9
10/10 [==============================] - 10s 953ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 4/9
10/10 [==============================] - 9s 948ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 5/9
10/10 [==============================] - 10s 966ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 6/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 7/9
10/10 [==============================] - 10s 955ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 8/9
10/10 [==============================] - 10s 958ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan
Epoch 9/9
10/10 [==============================] - 10s 971ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_50_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_100_categorical_accuracy: 0.0000e+00 - loss: nan - regularization_loss: 0.0000e+00 - total_loss: nan

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

柳絮泡泡2025-01-26 19:57:41

在自定义数据集上使用TFR时，我也有类似的错误。事实证明，我在数据中没有打印字符和sysmbols。我只是只是搜索并删除了符号（手动，一些正则表达式），并且我还将数据框中的文本列仅限于可打印字符。

from string import printable as pt

allowed_set = set(pt)
df[col] = df[col].apply(lambda x:  ''.join([' ' if  s not in  allowed_set else s for s in x]))

我希望它有帮助。

I got a similar error when using tfrs on a custom dataset. And it turns out that I had some none print characters and sysmbols in the data. I simply searched and removed the symbols (manually, some regex) and i also limit the text columns in the dataframe to printable characters only.

from string import printable as pt

allowed_set = set(pt)
df[col] = df[col].apply(lambda x:  ''.join([' ' if  s not in  allowed_set else s for s in x]))

I hope it helps.

回复收藏 0 原文

叹倦2025-01-26 19:57:41

问题是，当我们用空白空间替换特殊字符时，一个记录整个数据变为null（对于发行字段）。结论是数据问题，而不是代码问题。然后，我们在下面添加了两行以处理此类情况ds.replace（r'^\ s*$'，'none'，regex = true）。以下是整个代码，并进行了所有更改

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs  

ds=pd.read_csv('train_recom.csv')

print(ds['release'].isnull().sum())
print(ds['title'].isnull().sum())
print(ds['artist_name'].isnull().sum())
print(ds['year'].isnull().sum())
print(ds.isna().any(axis=None))
print(any(ds[c].hasnans for c in ds))
for c in ds:
  if ds[c].hasnans:
    print(c)

ds['year'].replace(0,1,inplace=True)
ds.release.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.artist_name.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.title.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds2=ds.replace(r'^\s*
, np.nan, regex=True)
ds2['release']=ds2['release'].fillna('None')
ds=ds2
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})

ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: {
    "song_id":x["song_id"],
    "release":x["release"],
    "artist_name":x["artist_name"],
    "title":x["title"],
    "year":x["year"],
}) 

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000).map(lambda x: x["title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=list(songs.map(lambda x: x['year']))

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])



    def call(self, inputs):
      return self.user_embedding(inputs['user_id'])

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000_00

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(songs.map(lambda x: x['release']))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.artist_vectorizer.adapt(songs.map(lambda x: x['artist_name']))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(songs.map(lambda x: x['title']))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
              # tf.keras.layers.Embedding(2501, 32),
            ])
        
    def call(self, inputs):
      # return self.title_embedding(inputs['title'])
      return tf.concat([
    self.title_embedding(inputs['title']),
    self.release_text_embedding(inputs['release'])
    ,
    self.year_embedding(inputs['year']), 
    self.artist_text_embedding(inputs['artist_name']),
    self.title_text_embedding(inputs['title']),
        ], axis=1)

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
        })

        item_embeddings = self.candidate_model({            
            "song_id": features["song_id"],
                "title" : features["title"],
                 "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],

        })

        return self.task(query_embeddings, item_embeddings)

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

Issue was that when we replaced special characters with blank space then for one record whole data became NULL ( for release field). Conclusively it was data issue rather than code issue. We then added below two lines to deal with such case ds.replace(r'^\s*$', 'None', regex=True). Below is the whole code with all changes made

import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

!pip install -q tensorflow-recommenders
import tensorflow_recommenders as tfrs  

ds=pd.read_csv('train_recom.csv')

print(ds['release'].isnull().sum())
print(ds['title'].isnull().sum())
print(ds['artist_name'].isnull().sum())
print(ds['year'].isnull().sum())
print(ds.isna().any(axis=None))
print(any(ds[c].hasnans for c in ds))
for c in ds:
  if ds[c].hasnans:
    print(c)

ds['year'].replace(0,1,inplace=True)
ds.release.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.artist_name.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds.title.replace({r'[^a-zA-Z0-9 ]+':''}, regex=True, inplace=True)
ds2=ds.replace(r'^\s*
, np.nan, regex=True)
ds2['release']=ds2['release'].fillna('None')
ds=ds2
ds_song=ds.groupby(['song_id','title','release','artist_name','year']).size().reset_index().rename(columns={0:'count'})

ds_song.to_csv('songs_details.csv')
ds.to_csv('train_recom_transformed.csv')

ratings = tf.data.experimental.make_csv_dataset(
    "./train_recom_transformed.csv",
    batch_size=5,
    select_columns=['user_id', 'song_id', 'listen_count', 'title', 'release', 'artist_name',
       'year'],
    header=True,
    num_epochs=1,
    ignore_errors=False,)
songs = tf.data.experimental.make_csv_dataset(
    "./songs_details.csv",
    batch_size=128,
    select_columns=['song_id','title','release','artist_name','year'],
    num_epochs=1,
    ignore_errors=True,)
ratings = ratings.unbatch().map(lambda x: {
    "song_id": x["song_id"],
    "user_id": x["user_id"],
    "release" : x["release"],
    "artist_name" : x["artist_name"],
    "title" : x["title"],
    "year" : x["year"],
    "listen_count": x["listen_count"]
})
songs = songs.unbatch().map(lambda x: {
    "song_id":x["song_id"],
    "release":x["release"],
    "artist_name":x["artist_name"],
    "title":x["title"],
    "year":x["year"],
}) 

tf.random.set_seed(42)
shuffled = ratings.shuffle(16000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(12000)
test = shuffled.skip(12000).take(4000)
cached_train = train.shuffle(100_000).batch(1200).cache()
cached_test = test.batch(400).cache()

title = songs.batch(1000).map(lambda x: x["title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
unique_song_titles = np.unique(np.concatenate(list(title)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))
year_data=list(songs.map(lambda x: x['year']))

class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 1_000_000

        embedding_dimension = 32
        self.user_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
          ])



    def call(self, inputs):
      return self.user_embedding(inputs['user_id'])

class ItemModel(tf.keras.Model):

    def __init__(self):
        super().__init__()

        max_tokens = 10_000_00

        embedding_dimension = 32

        ## embed title from unique_song_titles
        self.title_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_song_titles, mask_token=None),
        tf.keras.layers.Embedding(len(unique_song_titles) + 1, embedding_dimension)
      ])

        self.release_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        
        self.release_text_embedding = tf.keras.Sequential([
          self.release_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True,input_length=144),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.release_vectorizer.adapt(songs.map(lambda x: x['release']))

        self.artist_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.artist_text_embedding = tf.keras.Sequential([
          self.artist_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.artist_vectorizer.adapt(songs.map(lambda x: x['artist_name']))
        
        self.title_vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
            max_tokens=max_tokens)
        self.title_text_embedding = tf.keras.Sequential([
          self.title_vectorizer,
          tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
          tf.keras.layers.GlobalAveragePooling1D(),
        ])
        self.title_vectorizer.adapt(songs.map(lambda x: x['title']))
        
        self.year_embedding = tf.keras.Sequential([
              tf.keras.layers.Embedding(len(year_data) + 1, 32),
              # tf.keras.layers.Embedding(2501, 32),
            ])
        
    def call(self, inputs):
      # return self.title_embedding(inputs['title'])
      return tf.concat([
    self.title_embedding(inputs['title']),
    self.release_text_embedding(inputs['release'])
    ,
    self.year_embedding(inputs['year']), 
    self.artist_text_embedding(inputs['artist_name']),
    self.title_text_embedding(inputs['title']),
        ], axis=1)

class QueryModel(tf.keras.Model):
  """Model for encoding user queries."""

  def __init__(self, layer_sizes):
    """Model for encoding user queries.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class CandidateModel(tf.keras.Model):
  """Model for encoding movies."""

  def __init__(self, layer_sizes):
    """Model for encoding movies.

    Args:
      layer_sizes:
        A list of integers where the i-th entry represents the number of units
        the i-th layer contains.
    """
    super().__init__()

    self.embedding_model = ItemModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))

  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

class SongModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)
        self.task = tfrs.tasks.Retrieval(
          metrics=tfrs.metrics.FactorizedTopK(
              candidates=songs.batch(128).map(self.candidate_model),
          ),
      )

    def compute_loss(self, features, training=False):
        print('type of feature ----',type(features))

        query_embeddings = self.query_model({
            "user_id": features["user_id"]
            ,
        })

        item_embeddings = self.candidate_model({            
            "song_id": features["song_id"],
                "title" : features["title"],
                 "release" : features["release"]
                ,
                "artist_name" : features["artist_name"],
                "title": features["title"],
                "year" : features["year"],

        })

        return self.task(query_embeddings, item_embeddings)

model = SongModel([32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model_hist = model.fit(cached_train, epochs=9)

回复收藏 0 原文

~没有更多了~