Tensorflow梯度胶带返回零
我正在尝试使用TensorFlow中的梯度胶带计算梯度。
描述 -
a -tf.constant
x -tf.variable
y -tf.variable
函数
get_regularization_loss-计算L1/L2惩罚
structe_loss_function-计算损失
get_gradients_-自动diff损耗,并将梯度wrt计算为x& y
目前,我对两个X,Y都没有得到任何问题。
import tensorflow as tf
def get_regularization_loss(X, loss_info):
penalty = loss_info['penalty_type']
alpha = loss_info['alpha']
#Extract sub matrix
X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']
if penalty == 'L2':
loss_regularization_X = get_L2_penalty(X[X_00:X_10, X_01:X_11], alpha)
elif penalty == 'L1':
loss_regularization_X = get_L1_penalty(X[X_00:X_10, X_01:X_11], alpha)
else:
loss_regularization_X = tf.Variable(0, dtype=tf.float64)
return loss_regularization_X
def construct_loss_function(A, X, Y, loss_info):
#Extract sub matrix
A_00, A_10, A_01, A_11 = loss_info['A_start_row'], loss_info['A_end_row'], loss_info['A_start_col'], loss_info['A_end_col']
X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']
Y_00, Y_10, Y_01, Y_11 = loss_info['Y_start_row'], loss_info['Y_end_row'], loss_info['Y_start_col'], loss_info['Y_end_col']
loss_name = loss_info['loss']
if loss_name == 'binary_crossentropy':
exp_value = tf.math.exp(tf.matmul(X[X_00:X_10, X_01:X_11],Y[Y_00:Y_10, Y_01:Y_11]))
log_odds = exp_value/(1+exp_value)
loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(A[A_00:A_10, A_01:A_11], log_odds))
else:
loss = tf.Variable(0, dtype=tf.float64)
return loss
def get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list):
Z_loss = tf.Variable(0, dtype=tf.float64)
X_loss = tf.Variable(0, dtype=tf.float64)
Y_loss = tf.Variable(0, dtype=tf.float64)
with tf.GradientTape(persistent=True) as tape:
tape.watch(X)
tape.watch(Y)
for loss_info in A_loss_list:
Z_loss.assign(Z_loss + construct_loss_function(A, X, Y, loss_info))
for loss_info in X_loss_list:
X_loss.assign(X_loss + get_regularization_loss(X, loss_info))
for loss_info in Y_loss_list:
Y_loss.assign(Y_loss+get_regularization_loss(Y, loss_info))
loss = X_loss + Y_loss + Z_loss
return_dictionary = {
'total_loss': loss,
'Z_loss': Z_loss,
'loss_regularization_X': X_loss,
'loss_regularization_Y': Y_loss,
'gradients': tape.gradient(loss, {'X': X, 'Y': Y})
}
return return_dictionary
print(get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list))
I'm trying to compute gradients using Gradient Tape in tensorflow.
Description -
A - tf.constant
X - tf.Variable
Y - tf.Variable
Functions
get_regularization_loss - computes the L1/L2 penalty
construct_loss_function - computes the loss
get_gradients_ - auto diff loss and compute the gradients wrt to X & Y
Currently I'm getting None for both X, Y. Any suggestions on what might be wrong?
import tensorflow as tf
def get_regularization_loss(X, loss_info):
penalty = loss_info['penalty_type']
alpha = loss_info['alpha']
#Extract sub matrix
X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']
if penalty == 'L2':
loss_regularization_X = get_L2_penalty(X[X_00:X_10, X_01:X_11], alpha)
elif penalty == 'L1':
loss_regularization_X = get_L1_penalty(X[X_00:X_10, X_01:X_11], alpha)
else:
loss_regularization_X = tf.Variable(0, dtype=tf.float64)
return loss_regularization_X
def construct_loss_function(A, X, Y, loss_info):
#Extract sub matrix
A_00, A_10, A_01, A_11 = loss_info['A_start_row'], loss_info['A_end_row'], loss_info['A_start_col'], loss_info['A_end_col']
X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']
Y_00, Y_10, Y_01, Y_11 = loss_info['Y_start_row'], loss_info['Y_end_row'], loss_info['Y_start_col'], loss_info['Y_end_col']
loss_name = loss_info['loss']
if loss_name == 'binary_crossentropy':
exp_value = tf.math.exp(tf.matmul(X[X_00:X_10, X_01:X_11],Y[Y_00:Y_10, Y_01:Y_11]))
log_odds = exp_value/(1+exp_value)
loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(A[A_00:A_10, A_01:A_11], log_odds))
else:
loss = tf.Variable(0, dtype=tf.float64)
return loss
def get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list):
Z_loss = tf.Variable(0, dtype=tf.float64)
X_loss = tf.Variable(0, dtype=tf.float64)
Y_loss = tf.Variable(0, dtype=tf.float64)
with tf.GradientTape(persistent=True) as tape:
tape.watch(X)
tape.watch(Y)
for loss_info in A_loss_list:
Z_loss.assign(Z_loss + construct_loss_function(A, X, Y, loss_info))
for loss_info in X_loss_list:
X_loss.assign(X_loss + get_regularization_loss(X, loss_info))
for loss_info in Y_loss_list:
Y_loss.assign(Y_loss+get_regularization_loss(Y, loss_info))
loss = X_loss + Y_loss + Z_loss
return_dictionary = {
'total_loss': loss,
'Z_loss': Z_loss,
'loss_regularization_X': X_loss,
'loss_regularization_Y': Y_loss,
'gradients': tape.gradient(loss, {'X': X, 'Y': Y})
}
return return_dictionary
print(get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list))
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
尝试使用行中 X , Y 张量的所有值:
您可以用大负数填充其他值,这样它们就不会影响损失值,而不是切片 X 和 Y,然后使用整个 X 变量。
Try to use all the values of the X ,Y tensors at the lines :
Instead of slicing X and Y, you can fill the other values with large negative numbers so that they can't affect the value of the loss, and then use the entire X variabe.