无法将 NumPy 数组转换为张量(不支持的对象类型 numpy.ndarray),这与我看到的答案不同

发布于 2025-01-20 13:59:22 字数 8915 浏览 1 评论 0 原文

我要从教程中创建一个自定义对象检测模型来识别徽标,但我已经陷入了一段时间。因此,我已经尝试了我在堆栈中看到的所有解决方案,但不幸的是,它没有奏效。我目前的NP阵列是具有形状(145,)的一维数组的问题。我已经尝试使用修复其结构,但对我来说还没有解决。我尝试了:

train_images = np.array(train_images,dtype = object)

triar_images = np.asarray(train_images)

和其他几个变体

库:

import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pathlib
import pandas as pd
from PIL import IcnsImagePlugin
from PIL.ImageDraw import Draw
import glob
import pandas as pd
import xml.etree.ElementTree as ET
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image 
from sklearn.model_selection import train_test_split
from skimage.segmentation import mark_boundaries 

将我的XML文件[边界框图像]转换为2个CSV文件。一个用于训练图像,另一个用于验证图像。使用的Labelimg创建边界框:

SKIP_NEGATIVES = True
NEGATIVE_CLASS = "no_logo"


def xml_to_csv(path, skipNegatives):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        #print("XML_FILE is: "+xml_file)
        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root.find('object'):           
            for member in root.findall('object'):
                bbx = member.find('bndbox')                
                xmin = round(float(bbx.find('xmin').text))
                ymin = round(float(bbx.find('ymin').text))
                xmax = round(float(bbx.find('xmax').text))
                ymax = round(float(bbx.find('ymax').text))
                label = member.find('name').text
                value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        label,
                        xmin,
                        ymin,
                        xmax,
                        ymax
                        )
                print(value)
                
                if(value[1]>0 and value[2]>0):
                  xml_list.append(value)
                  print("Value appended",end=" ")
                  print(value)


        elif not skipNegatives:
            value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        NEGATIVE_CLASS,
                        0,
                        0,
                        0,
                        0
                        )
            print("Printing Value")
            print(value)

            if(value[1]>0 and value[2]>0):
              xml_list.append(value)
              print("Value appended",end=" ")
              print(value)
            else:
              print("VALUE NOT APPENDED")

    column_name = ['filename', 'width', 'height',
                   'class', 'xmin', 'ymin', 'xmax', 'ymax']

    print("Printing XML_LIST: ")
    print(xml_list)
    xml_df = pd.DataFrame(xml_list, columns=column_name)

    print("Printing xml_df")
    print(xml_df)
    return xml_df


def main():
    datasets = ['/content/drive/MyDrive/Logo_Model/train','/content/drive/MyDrive/Logo_Model/validation']

    for ds in datasets:
        image_path = os.path.join(os.getcwd(), 'Images', ds)
        xml_df = xml_to_csv(image_path, SKIP_NEGATIVES)

        print(xml_df)
        xml_df.to_csv('/{}_data.csv'.format(ds), index=None)
        print('Successfully converted xml to csv.')


main()

现在这是问题开始的地方。当Train_img_arr附加到train_images时,我的train_images形状为(145,)。验证数据也是相同的过程。

num_classes = 2
classes = ["logo","no_logo"]

TRAINING_CSV_FILE = '/content/drive/MyDrive/Logo_Model/train/logo_data.csv'
TRAINING_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/train/'
training_image_records = pd.read_csv(TRAINING_CSV_FILE)

train_image_path = os.path.join(os.getcwd(), TRAINING_IMAGE_DIR)

train_images = []
train_targets = []
train_labels = []

for index, row in training_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  train_image_fullpath = os.path.join(train_image_path, filename)
  train_img = tf.keras.preprocessing.image.load_img(train_image_fullpath, target_size=(height, width))
  train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  train_images.append(train_img_arr)
  train_targets.append((xmin, ymin, xmax, ymax))
  train_labels.append(classes.index(class_name))

train_img_arr的示例输出:

(78, 323, 3)
(180, 235, 3)
(180, 166, 3)
(156, 311, 3)
(180, 342, 3)
(180, 197, 3)
(180, 315, 3)
(180, 297, 3)
(180, 156, 3)
(180, 190, 3)
(180, 325, 3)
(180, 227, 3)
(176, 192, 3)
(180, 235, 3)
(180, 138, 3)
(180, 222, 3)
(180, 213, 3)

转换为NP数组:

train_images = np.array(train_images)
train_targets = np.array(train_targets)
train_labels = np.array(train_labels)

print(train_images.shape)
print(train_targets.shape)
print(train_labels.shape)

输出:

(145,)
(145, 4)
(145,)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

这是我的其余代码

VALIDATION_CSV_FILE = '/content/drive/MyDrive/Logo_Model/validation/logo_data.csv'
VALIDATION_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/validation/'
validation_image_records = pd.read_csv(VALIDATION_CSV_FILE)

validation_image_path = os.path.join(os.getcwd(), VALIDATION_IMAGE_DIR)

validation_images = []
validation_targets = []
validation_labels = []

for index, row in validation_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  validation_image_fullpath = os.path.join(validation_image_path, filename)
  validation_img = tf.keras.preprocessing.image.load_img(validation_image_fullpath, target_size=(height, width))
  validation_img_arr =tf.keras.preprocessing.image.img_to_array(validation_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  validation_images.append(validation_img_arr)
  validation_targets.append((xmin, ymin, xmax, ymax))
  validation_labels.append(classes.index(class_name))
validation_images = np.array(validation_images)
validation_targets = np.array(validation_targets)
validation_labels = np.array(validation_labels)

print(validation_images.shape)
print(validation_targets.shape)
print(validation_labels.shape)
input_shape = (height,width,3)
input_layer = tf.keras.layers.Input(input_shape)

#create the base layers
base_layers = tf.keras.layers.experimental.preprocessing.Rescaling(1./255, name='bl_1')(input_layer)
base_layers = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', name='bl_2')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_3')(base_layers)
base_layers = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', name='bl_4')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_5')(base_layers)
base_layers = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', name='bl_6')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_7')(base_layers)
base_layers = tf.keras.layers.Flatten(name='bl_8')(base_layers)

#create the classifier branch
classifier_branch = tf.keras.layers.Dense(128, activation='relu', name='cl_1')(base_layers)
classifier_branch = tf.keras.layers.Dense(num_classes, name='cl_head')(classifier_branch)  

#create the localiser branch
locator_branch = tf.keras.layers.Dense(128, activation='relu', name='bb_1')(base_layers)
locator_branch = tf.keras.layers.Dense(64, activation='relu', name='bb_2')(locator_branch)
locator_branch = tf.keras.layers.Dense(32, activation='relu', name='bb_3')(locator_branch)
locator_branch = tf.keras.layers.Dense(4, activation='sigmoid', name='bb_head')(locator_branch)

model = tf.keras.Model(input_layer,outputs=[classifier_branch,locator_branch])

model.summary()
losses ={"cl_head":tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), "bb_head":tf.keras.losses.MSE}
model.compile(loss=losses, optimizer='Adam', metrics=['accuracy'])
trainTargets = {
    "cl_head": train_labels,
    "bb_head": train_targets
}
validationTargets = {
    "cl_head": validation_labels,
    "bb_head": validation_targets
}
history = model.fit(train_images, trainTargets,validation_data=(validation_images, validationTargets),batch_size=4,epochs=20,shuffle=True,verbose=1)

I am going off of a tutorial to create a custom object detection model to recognize a logo but I've been stuck for a while. So I've tried all the solutions that I've seen on stack overflow to this problem and unfortunately it hasn't worked. I currently am having problems with my np array being a 1D array with shape (145,). I've tried using to fix its structure but it hasn't worked out for me. I tried:

train_images=np.array(train_images,dtype=object)

train_images=np.asarray(train_images)

and several other variations

Libraries:

import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pathlib
import pandas as pd
from PIL import IcnsImagePlugin
from PIL.ImageDraw import Draw
import glob
import pandas as pd
import xml.etree.ElementTree as ET
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image 
from sklearn.model_selection import train_test_split
from skimage.segmentation import mark_boundaries 

Converts my xml files [bounding box images] to 2 csv files. One for the training images, and one for the validation images. Used Labelimg to create the bounding boxes:

SKIP_NEGATIVES = True
NEGATIVE_CLASS = "no_logo"


def xml_to_csv(path, skipNegatives):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        #print("XML_FILE is: "+xml_file)
        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root.find('object'):           
            for member in root.findall('object'):
                bbx = member.find('bndbox')                
                xmin = round(float(bbx.find('xmin').text))
                ymin = round(float(bbx.find('ymin').text))
                xmax = round(float(bbx.find('xmax').text))
                ymax = round(float(bbx.find('ymax').text))
                label = member.find('name').text
                value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        label,
                        xmin,
                        ymin,
                        xmax,
                        ymax
                        )
                print(value)
                
                if(value[1]>0 and value[2]>0):
                  xml_list.append(value)
                  print("Value appended",end=" ")
                  print(value)


        elif not skipNegatives:
            value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        NEGATIVE_CLASS,
                        0,
                        0,
                        0,
                        0
                        )
            print("Printing Value")
            print(value)

            if(value[1]>0 and value[2]>0):
              xml_list.append(value)
              print("Value appended",end=" ")
              print(value)
            else:
              print("VALUE NOT APPENDED")

    column_name = ['filename', 'width', 'height',
                   'class', 'xmin', 'ymin', 'xmax', 'ymax']

    print("Printing XML_LIST: ")
    print(xml_list)
    xml_df = pd.DataFrame(xml_list, columns=column_name)

    print("Printing xml_df")
    print(xml_df)
    return xml_df


def main():
    datasets = ['/content/drive/MyDrive/Logo_Model/train','/content/drive/MyDrive/Logo_Model/validation']

    for ds in datasets:
        image_path = os.path.join(os.getcwd(), 'Images', ds)
        xml_df = xml_to_csv(image_path, SKIP_NEGATIVES)

        print(xml_df)
        xml_df.to_csv('/{}_data.csv'.format(ds), index=None)
        print('Successfully converted xml to csv.')


main()

Now this is where the problem begins. When train_img_arr is appended to train_images, I have a resulting shape of train_images of (145,). The same process is for the validation data.

num_classes = 2
classes = ["logo","no_logo"]

TRAINING_CSV_FILE = '/content/drive/MyDrive/Logo_Model/train/logo_data.csv'
TRAINING_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/train/'
training_image_records = pd.read_csv(TRAINING_CSV_FILE)

train_image_path = os.path.join(os.getcwd(), TRAINING_IMAGE_DIR)

train_images = []
train_targets = []
train_labels = []

for index, row in training_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  train_image_fullpath = os.path.join(train_image_path, filename)
  train_img = tf.keras.preprocessing.image.load_img(train_image_fullpath, target_size=(height, width))
  train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  train_images.append(train_img_arr)
  train_targets.append((xmin, ymin, xmax, ymax))
  train_labels.append(classes.index(class_name))

sample output of train_img_arr:

(78, 323, 3)
(180, 235, 3)
(180, 166, 3)
(156, 311, 3)
(180, 342, 3)
(180, 197, 3)
(180, 315, 3)
(180, 297, 3)
(180, 156, 3)
(180, 190, 3)
(180, 325, 3)
(180, 227, 3)
(176, 192, 3)
(180, 235, 3)
(180, 138, 3)
(180, 222, 3)
(180, 213, 3)

Convert to np arrays:

train_images = np.array(train_images)
train_targets = np.array(train_targets)
train_labels = np.array(train_labels)

print(train_images.shape)
print(train_targets.shape)
print(train_labels.shape)

Output:

(145,)
(145, 4)
(145,)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

Here is the rest of my code

VALIDATION_CSV_FILE = '/content/drive/MyDrive/Logo_Model/validation/logo_data.csv'
VALIDATION_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/validation/'
validation_image_records = pd.read_csv(VALIDATION_CSV_FILE)

validation_image_path = os.path.join(os.getcwd(), VALIDATION_IMAGE_DIR)

validation_images = []
validation_targets = []
validation_labels = []

for index, row in validation_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  validation_image_fullpath = os.path.join(validation_image_path, filename)
  validation_img = tf.keras.preprocessing.image.load_img(validation_image_fullpath, target_size=(height, width))
  validation_img_arr =tf.keras.preprocessing.image.img_to_array(validation_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  validation_images.append(validation_img_arr)
  validation_targets.append((xmin, ymin, xmax, ymax))
  validation_labels.append(classes.index(class_name))
validation_images = np.array(validation_images)
validation_targets = np.array(validation_targets)
validation_labels = np.array(validation_labels)

print(validation_images.shape)
print(validation_targets.shape)
print(validation_labels.shape)
input_shape = (height,width,3)
input_layer = tf.keras.layers.Input(input_shape)

#create the base layers
base_layers = tf.keras.layers.experimental.preprocessing.Rescaling(1./255, name='bl_1')(input_layer)
base_layers = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', name='bl_2')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_3')(base_layers)
base_layers = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', name='bl_4')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_5')(base_layers)
base_layers = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', name='bl_6')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_7')(base_layers)
base_layers = tf.keras.layers.Flatten(name='bl_8')(base_layers)

#create the classifier branch
classifier_branch = tf.keras.layers.Dense(128, activation='relu', name='cl_1')(base_layers)
classifier_branch = tf.keras.layers.Dense(num_classes, name='cl_head')(classifier_branch)  

#create the localiser branch
locator_branch = tf.keras.layers.Dense(128, activation='relu', name='bb_1')(base_layers)
locator_branch = tf.keras.layers.Dense(64, activation='relu', name='bb_2')(locator_branch)
locator_branch = tf.keras.layers.Dense(32, activation='relu', name='bb_3')(locator_branch)
locator_branch = tf.keras.layers.Dense(4, activation='sigmoid', name='bb_head')(locator_branch)

model = tf.keras.Model(input_layer,outputs=[classifier_branch,locator_branch])

model.summary()
losses ={"cl_head":tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), "bb_head":tf.keras.losses.MSE}
model.compile(loss=losses, optimizer='Adam', metrics=['accuracy'])
trainTargets = {
    "cl_head": train_labels,
    "bb_head": train_targets
}
validationTargets = {
    "cl_head": validation_labels,
    "bb_head": validation_targets
}
history = model.fit(train_images, trainTargets,validation_data=(validation_images, validationTargets),batch_size=4,epochs=20,shuffle=True,verbose=1)

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

染火枫林 2025-01-27 13:59:22

Numpy阵列的形状不会显示,因为每个图像的形状尚未固定。

添加 tf.image.resize ,让图像的形状固定。

train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
train_img_arr = tf.image.resize(train_img_arr, (size1, size2))

如果您希望输入数据具有不同的形状,则可以检查此 link

The shape of the numpy array won't show up, because the shape of each image isn't fixed.

Add tf.image.resize, let the shape of image fixed.

train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
train_img_arr = tf.image.resize(train_img_arr, (size1, size2))

If you want your input data have different shape, you can check this link.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文