Keras 使用自定义图像生成器进行增强

发布于 2025-01-20 08:50:10 字数 5909 浏览 3 评论 0原文

我正在使用自定义图像生成器按此处所述批量从磁盘读取数据，https:// keras.io/examples/vision/oxford_pets_image_segmentation/

确切的生成器如下所示：

from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
import tensorflow

class OxfordPets(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")
            y[j] = np.expand_dims(img, 2)
            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

这非常有效，并且很有帮助，因为我的数据太大而无法读入内存。它需要两条路径，分别是输入图像 (batch_input_img_paths) 和蒙版 (batch_target_img_paths) 的路径。

我想修改这个生成器以使用数据增强。我正在尝试这个：

class OxfordPets(keras.utils.Sequence)： “”“帮助迭代数据（作为 Numpy 数组）。”“”

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size
    
    def __data_augmentation(self, img):
         ''' function for apply some data augmentation '''
         img = tensorflow.keras.preprocessing.image.random_shift(img, 0.2, 0.2)
         img = tensorflow.keras.preprocessing.image.random_zoom(img, 0.2)
         img = tensorflow.keras.preprocessing.image.random_shear(img, 0.2)
         img = tensorflow.keras.preprocessing.image.random_rotation(img, 40)
         img = tensorflow.image.random_flip_left_right(img)
         img = tensorflow.image.random_flip_up_down(img)
         return img

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)

            #apply augmentation
            img = self.__data_augmentation(img)

            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")
            y[j] = np.expand_dims(img, 2)

             #apply augmentation
            img = self.__data_augmentation(img)

            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

但这会返回：

AttributeError: 'Image' object has no attribute 'shape'

我不确定的另一件事是是否将完全相同的增强应用于数据和标签。也许为此需要随机种子？我想做的另一件事是不仅将增强图像输入到最终网络中，而且通过增强来增加训练规模，以便输入一些真实图像和一些增强图像，例如仅增强 30% 的图像输入图像。

编辑：

我认为我的增强工作是这样的：

class OxfordPets(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        
        #augmentation
        self.augmentor = tensorflow.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths
       
    def __len__(self):
        return len(self.target_img_paths) // self.batch_size


    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
           
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")

            
            y[j] = np.expand_dims(img, 2)
            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

但这并不能解决我实际增加数据集大小的问题，我相信它现在只发送增强图像，而没有真正的数据。

原文

I am using a custom image generator to read my data off disk in batches as described here, https://keras.io/examples/vision/oxford_pets_image_segmentation/

The exact generator looks like this:

from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
import tensorflow

class OxfordPets(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")
            y[j] = np.expand_dims(img, 2)
            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

This works great and is helpful as my data is too large to read into ram. It takes two paths which are pathways to the input images (batch_input_img_paths) and the masks (batch_target_img_paths).

I would like to modify this generator to use data augmentation. I am trying this:

class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size
    
    def __data_augmentation(self, img):
         ''' function for apply some data augmentation '''
         img = tensorflow.keras.preprocessing.image.random_shift(img, 0.2, 0.2)
         img = tensorflow.keras.preprocessing.image.random_zoom(img, 0.2)
         img = tensorflow.keras.preprocessing.image.random_shear(img, 0.2)
         img = tensorflow.keras.preprocessing.image.random_rotation(img, 40)
         img = tensorflow.image.random_flip_left_right(img)
         img = tensorflow.image.random_flip_up_down(img)
         return img

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)

            #apply augmentation
            img = self.__data_augmentation(img)

            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")
            y[j] = np.expand_dims(img, 2)

             #apply augmentation
            img = self.__data_augmentation(img)

            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

but this returns:

AttributeError: 'Image' object has no attribute 'shape'

Another thing I am not positive about is if the exact same augmentation will be applied to the data and to the label. Perhaps a random seed is needed for this? Another thing I would like to do is not only feed in augmented images into the final network but to increase the training size with the augmentation, so that some real images and some augmented images are being fed in, for instance only augment 30% of the input images.

EDIT:

I think I got the augmentation working like this:

class OxfordPets(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        
        #augmentation
        self.augmentor = tensorflow.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths
       
    def __len__(self):
        return len(self.target_img_paths) // self.batch_size


    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
           
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")

            
            y[j] = np.expand_dims(img, 2)
            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
            y[j] -= 1
        return x, y

but this does not solve my problem of actually increasing the dataset size, I believe it is only sending augmented images in now, and no real data.

分享到QQ

分享到微博