当复制/粘贴到我的脚本中时，为什么Keras层的行为会有所不同？

发布于 2025-02-13 17:32:08 字数 6328 浏览 0 评论 0原文

当我将TF的PRELU层复制到自己的脚本时，它的构建方式似乎与打包版本不同。在下面，我显示了一个带有PRELU代码的玩具模型（Mac M1上的2.9.2）无法报告任何可训练的参数。在同一模型中，包装的PRELU层报告了5个可训练的层，并更简洁地总结。

我想念什么？

复制

import tensorflow as tf

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

from tensorflow.python.framework import dtypes
from tensorflow.python.keras import backend
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.keras.engine.input_spec import InputSpec
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import math_ops
from tensorflow.python.util.tf_export import keras_export

#copied and renamed from the tf install
@keras_export('keras.layers.PReLUcopy')
class PReLUcopy(Layer):
  """Parametric Rectified Linear Unit.

  It follows:

  ` ``
    f(x) = alpha * x for x < 0
    f(x) = x for x >= 0
  ` ``

  where `alpha` is a learned array with the same shape as x.

  Input shape:
    Arbitrary. Use the keyword argument `input_shape`
    (tuple of integers, does not include the samples axis)
    when using this layer as the first layer in a model.

  Output shape:
    Same shape as the input.

  Args:
    alpha_initializer: Initializer function for the weights.
    alpha_regularizer: Regularizer for the weights.
    alpha_constraint: Constraint for the weights.
    shared_axes: The axes along which to share learnable
      parameters for the activation function.
      For example, if the incoming feature maps
      are from a 2D convolution
      with output shape `(batch, height, width, channels)`,
      and you wish to share parameters across space
      so that each filter only has one set of parameters,
      set `shared_axes=[1, 2]`.
  """

  def __init__(self,
               alpha_initializer='zeros',
               alpha_regularizer=None,
               alpha_constraint=None,
               shared_axes=None,
               **kwargs):
    super(PReLUcopy, self).__init__(**kwargs)
    self.supports_masking = True
    self.alpha_initializer = initializers.get(alpha_initializer)
    self.alpha_regularizer = regularizers.get(alpha_regularizer)
    self.alpha_constraint = constraints.get(alpha_constraint)
    if shared_axes is None:
      self.shared_axes = None
    elif not isinstance(shared_axes, (list, tuple)):
      self.shared_axes = [shared_axes]
    else:
      self.shared_axes = list(shared_axes)

  @tf_utils.shape_type_conversion
  def build(self, input_shape):
    param_shape = list(input_shape[1:])
    if self.shared_axes is not None:
      for i in self.shared_axes:
        param_shape[i - 1] = 1
    self.alpha = self.add_weight(
        shape=param_shape,
        name='alpha',
        initializer=self.alpha_initializer,
        regularizer=self.alpha_regularizer,
        constraint=self.alpha_constraint)
    # Set input spec
    axes = {}
    if self.shared_axes:
      for i in range(1, len(input_shape)):
        if i not in self.shared_axes:
          axes[i] = input_shape[i]
    self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
    self.built = True

  def call(self, inputs):
    pos = backend.relu(inputs)
    neg = -self.alpha * backend.relu(-inputs)
    return pos + neg

  def get_config(self):
    config = {
        'alpha_initializer': initializers.serialize(self.alpha_initializer),
        'alpha_regularizer': regularizers.serialize(self.alpha_regularizer),
        'alpha_constraint': constraints.serialize(self.alpha_constraint),
        'shared_axes': self.shared_axes
    }
    base_config = super(PReLUcopy, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  @tf_utils.shape_type_conversion
  def compute_output_shape(self, input_shape):
    return input_shape


def test1():
    A_in = Input(shape=(5,), name='A_in')
    out = PReLUcopy()(A_in)
    out2 = PReLU()(A_in)
    model = Model(inputs=[A_in], outputs=[out,out2])
    model.compile(optimizer='adam', loss='mean_squared_error')
    print( model.summary() )



if __name__ == '__main__':
    test1()

输出

__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 A_in (InputLayer)              [(None, 5)]          0           []                               
                                                                                                  
 tf.math.negative (TFOpLambda)  (None, 5)            0           ['A_in[0][0]']                   
                                                                                                  
 tf.nn.relu_1 (TFOpLambda)      (None, 5)            0           ['tf.math.negative[0][0]']       
                                                                                                  
 tf.nn.relu (TFOpLambda)        (None, 5)            0           ['A_in[0][0]']                   
                                                                                                  
 tf.math.multiply (TFOpLambda)  (None, 5)            0           ['tf.nn.relu_1[0][0]']           
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 5)           0           ['tf.nn.relu[0][0]',             
 da)                                                              'tf.math.multiply[0][0]']       
                                                                                                  
 p_re_lu (PReLU)                (None, 5)            5           ['A_in[0][0]']                   
                                                                                                  
==================================================================================================
Total params: 5
Trainable params: 5
Non-trainable params: 0
__________________________________________________________________________________________________

原文

When I copy-and-paste TF's PReLU layer to my own script, it appears to build differently than the packaged version. Below I show a toy model with the PReLU code from my installation (2.9.2 on mac m1) fail to report any trainable parameters. In the same model, the packaged PReLU layer reports 5 trainable layers and is summarized more succinctly.

What am I missing?

To reproduce

import tensorflow as tf

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

from tensorflow.python.framework import dtypes
from tensorflow.python.keras import backend
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.keras.engine.input_spec import InputSpec
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import math_ops
from tensorflow.python.util.tf_export import keras_export

#copied and renamed from the tf install
@keras_export('keras.layers.PReLUcopy')
class PReLUcopy(Layer):
  """Parametric Rectified Linear Unit.

  It follows:

  ` ``
    f(x) = alpha * x for x < 0
    f(x) = x for x >= 0
  ` ``

  where `alpha` is a learned array with the same shape as x.

  Input shape:
    Arbitrary. Use the keyword argument `input_shape`
    (tuple of integers, does not include the samples axis)
    when using this layer as the first layer in a model.

  Output shape:
    Same shape as the input.

  Args:
    alpha_initializer: Initializer function for the weights.
    alpha_regularizer: Regularizer for the weights.
    alpha_constraint: Constraint for the weights.
    shared_axes: The axes along which to share learnable
      parameters for the activation function.
      For example, if the incoming feature maps
      are from a 2D convolution
      with output shape `(batch, height, width, channels)`,
      and you wish to share parameters across space
      so that each filter only has one set of parameters,
      set `shared_axes=[1, 2]`.
  """

  def __init__(self,
               alpha_initializer='zeros',
               alpha_regularizer=None,
               alpha_constraint=None,
               shared_axes=None,
               **kwargs):
    super(PReLUcopy, self).__init__(**kwargs)
    self.supports_masking = True
    self.alpha_initializer = initializers.get(alpha_initializer)
    self.alpha_regularizer = regularizers.get(alpha_regularizer)
    self.alpha_constraint = constraints.get(alpha_constraint)
    if shared_axes is None:
      self.shared_axes = None
    elif not isinstance(shared_axes, (list, tuple)):
      self.shared_axes = [shared_axes]
    else:
      self.shared_axes = list(shared_axes)

  @tf_utils.shape_type_conversion
  def build(self, input_shape):
    param_shape = list(input_shape[1:])
    if self.shared_axes is not None:
      for i in self.shared_axes:
        param_shape[i - 1] = 1
    self.alpha = self.add_weight(
        shape=param_shape,
        name='alpha',
        initializer=self.alpha_initializer,
        regularizer=self.alpha_regularizer,
        constraint=self.alpha_constraint)
    # Set input spec
    axes = {}
    if self.shared_axes:
      for i in range(1, len(input_shape)):
        if i not in self.shared_axes:
          axes[i] = input_shape[i]
    self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
    self.built = True

  def call(self, inputs):
    pos = backend.relu(inputs)
    neg = -self.alpha * backend.relu(-inputs)
    return pos + neg

  def get_config(self):
    config = {
        'alpha_initializer': initializers.serialize(self.alpha_initializer),
        'alpha_regularizer': regularizers.serialize(self.alpha_regularizer),
        'alpha_constraint': constraints.serialize(self.alpha_constraint),
        'shared_axes': self.shared_axes
    }
    base_config = super(PReLUcopy, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  @tf_utils.shape_type_conversion
  def compute_output_shape(self, input_shape):
    return input_shape


def test1():
    A_in = Input(shape=(5,), name='A_in')
    out = PReLUcopy()(A_in)
    out2 = PReLU()(A_in)
    model = Model(inputs=[A_in], outputs=[out,out2])
    model.compile(optimizer='adam', loss='mean_squared_error')
    print( model.summary() )



if __name__ == '__main__':
    test1()

Output

__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 A_in (InputLayer)              [(None, 5)]          0           []                               
                                                                                                  
 tf.math.negative (TFOpLambda)  (None, 5)            0           ['A_in[0][0]']                   
                                                                                                  
 tf.nn.relu_1 (TFOpLambda)      (None, 5)            0           ['tf.math.negative[0][0]']       
                                                                                                  
 tf.nn.relu (TFOpLambda)        (None, 5)            0           ['A_in[0][0]']                   
                                                                                                  
 tf.math.multiply (TFOpLambda)  (None, 5)            0           ['tf.nn.relu_1[0][0]']           
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 5)           0           ['tf.nn.relu[0][0]',             
 da)                                                              'tf.math.multiply[0][0]']       
                                                                                                  
 p_re_lu (PReLU)                (None, 5)            5           ['A_in[0][0]']                   
                                                                                                  
==================================================================================================
Total params: 5
Trainable params: 5
Non-trainable params: 0
__________________________________________________________________________________________________

分享到QQ

分享到微博