TensorFlow Fit()中缺少目标数据()
因此,当我使用包含文本数据集的张量流使用 BERT 训练深度学习时遇到问题。所以我想 fit()
模型,但在训练时出现错误。我认为发生这种情况是因为 data_train 没有标签。但从我的研究来看,它也有同样的问题,就像这里的问题同样的问题。既然没有答案,这是一个错误吗?错误是这样的
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.
我的代码是这样的
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))
def generate_training_data(df, ids, masks, tokenizer):
for i, text in tqdm(enumerate(df['text'])):
tokenized_text = tokenizer.encode_plus(
text,
max_length=256,
truncation=True,
padding='max_length',
add_special_tokens=True,
return_tensors='tf'
)
ids[i, :] = tokenized_text.input_ids
masks[i, :] = tokenized_text.attention_mask
return ids, masks
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)
labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks
},
dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)
model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
train_dataset,
validation_data=val_dataset,
epochs=2
)
So I have a problem when train deep learning with BERT with tensorflow which contain text dataset. So i want to fit()
the model but got an error when training. I think it happen because the data_train did't have the label. But from my research It also same problem like SO question in here Same problem. Since it didn't have a answer is this a bug? The error is like this
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.
My code like this
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))
def generate_training_data(df, ids, masks, tokenizer):
for i, text in tqdm(enumerate(df['text'])):
tokenized_text = tokenizer.encode_plus(
text,
max_length=256,
truncation=True,
padding='max_length',
add_special_tokens=True,
return_tensors='tf'
)
ids[i, :] = tokenized_text.input_ids
masks[i, :] = tokenized_text.attention_mask
return ids, masks
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)
labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks
},
dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)
model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
train_dataset,
validation_data=val_dataset,
epochs=2
)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
我花了一些时间寻找可以更新的东西,然后从网站下载模型。
[ 示例 ]:
[ 输出 ]:
I spend a bit of time finding something we can update and I download the model from the websites.
[ Sample ]:
[ Output ]: