caffe的测试结果比较奇怪，accuracy在0和接近1之间反复跳跃？

发布于 2022-09-02 13:22:37 字数 5392 浏览 50 评论 0

初次用caffe来训练CNN网络，得到的结果保存在log里面，我提取出来的到如下的结果图。对于其中Test Accuracy部分感觉很奇怪，一直在0和接近1附近跳跃，而且仿佛很有规律性。
一次训练的结果

下面是我的网络结构

#this net is a redesigned one, input data size is 64*64
name: "StanNet"
#----layer:data----
layer {
  name: "platedata"
  type: "ImageData"
  top: "data"
  top: "label"
  image_data_param {
    source: "G:/Samples/Plate/filelist_train.txt"
    batch_size: 64
  }
  transform_param {
    scale: 0.00390625
  }
  include: { phase: TRAIN }
}
layer {
  name: "platedata"
  type: "ImageData"
  top: "data"
  top: "label"
  image_data_param {
    source: "G:/Samples/Plate/filelist_test.txt"
    batch_size: 100
  }
  transform_param {
    scale: 0.00390625
  }
  include: { phase: TEST }
}
#----layer:1----
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  bottom: "conv1"
  top: "conv1"
  name: "bn_conv1"
  type: "BatchNorm"
  param {
    lr_mult: 0
    decay_mult: 0
  } 
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
}
layer {
  bottom: "conv1"
  top: "conv1"
  name: "scale_conv1"
  type: "Scale"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "relu_pool1"
  type: "ReLU"
  bottom: "pool1"
  top: "pool1"
}
#----layer:2----
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  bottom: "conv2"
  top: "conv2"
  name: "bn_conv2"
  type: "BatchNorm"
  param {
    lr_mult: 0
    decay_mult: 0
  } 
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
}
layer {
  bottom: "conv2"
  top: "conv2"
  name: "scale_conv2"
  type: "Scale"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "relu_pool2"
  type: "ReLU"
  bottom: "pool2"
  top: "pool2"
}
#----layer:3----
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 40
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  bottom: "conv3"
  top: "conv3"
  name: "bn_conv3"
  type: "BatchNorm"
  param {
    lr_mult: 0
    decay_mult: 0
  } 
  param {
    lr_mult: 0
    decay_mult: 0
  }
  param {
    lr_mult: 0
    decay_mult: 0
  }
}
layer {
  bottom: "conv3"
  top: "conv3"
  name: "scale_conv3"
  type: "Scale"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "relu_pool3"
  type: "ReLU"
  bottom: "pool3"
  top: "pool3"
}
#----layer:ip---
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool3"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 100
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "ip2"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}

我的训练参数：

# The train/test net protocol buffer definition
net: "StanNetV0.2.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 5
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
#solver_type: ADAGRAD
weight_decay: 0.0005
#weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 40000
# snapshot intermediate results
snapshot: 2000
snapshot_prefix: "snapshot"
# solver mode: CPU or GPU
solver_mode: GPU

不知道是不是我的BatchSize选的有问题，我一共有两类样本，每类样本有2500个训练样本，500个测试样本。
训练的BatchSize为64，测试的BatchSize为100.

刚开始学习CNN，还有很多不懂的地方，希望各位大神帮忙解答。

分享到QQ

分享到微博