在预处理HMDB51数据集期间遇到内存错误

发布于 2025-02-08 07:59:12 字数 4959 浏览 1 评论 0原文

我正在研究HMDB51的行动识别。这是我下面的代码。

这部分是为了声明某些常数和目录：

# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the directory containing the UCF50 dataset. 
DATASET_DIR = r"\HMDB51"

# Specify the list containing the names of the classes used for training. Feel free to choose          any set of classes.
CLASSES_LIST = ["brush_hair", "cartwheel", "catch", "chew", "clap", "climb", "climb_stairs", "dive",
            "draw_sword", "dribble", "drink", "eat", "fall_floor", "fencing", "flic_flac", "golf",
            "handstand", "hit", "hug", "jump", "kick", "kick_ball", "kiss", "laugh", 
            "pick", "pour", "pullup", "punch", "push", "pushup", "ride_bike", "ride_horse", 
            "run", "shake_hands", "shoot_ball", "shoot_bow", "shoot_gun", "sit", "situp", "smile", 
            "smoke", "somersault", "stand","swing_baseball", "sword", "sword_exercise", "talk", "throw", "turn", 
            "walk", "wave"]

此部分用于从每个视频中提取帧：

def frames_extraction(video_path):
# Declare a list to store video frames.
frames_list = []

# Read the Video File using the VideoCapture object.
video_reader = cv2.VideoCapture(video_path)

# Get the total number of frames in the video.
video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

# Calculate the the interval after which frames will be added to the list.
skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

# Iterate through the Video Frames.
for frame_counter in range(SEQUENCE_LENGTH):

    # Set the current frame position of the video.
    video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

    # Reading the frame from the video. 
    success, frame = video_reader.read() 

    # Check if Video frame is not successfully read then break the loop
    if not success:
        break

    # Resize the Frame to fixed height and width.
    resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
    
    # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
    normalized_frame = resized_frame / 255
    
    # Append the normalized frame into the frames list
    frames_list.append(normalized_frame)

# Release the VideoCapture object. 
video_reader.release()

# Return the frames list.
return frames_list

此部分用于创建火车，标签列表：

def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(CLASSES_LIST):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in files_list:
            
            # Get the complete video path.
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)

            # Extract the frames of the video file.
            frames = frames_extraction(video_file_path)

            # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
            # So ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == SEQUENCE_LENGTH:

                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths

因此，当我尝试创建数据集时：

   # Create the dataset.
   features, labels, video_files_paths = create_dataset()

我将在下面得到货币：

< a href =“ https://i.sstatic.net/ss5f2.png” rel =“ nofollow noreferrer”>在这里！

我如何解决此问题？我认为我需要在训练模型期间将数据集作为批量进行预处理。但是我该怎么做？当我从事图像时，我已经使用了keras.utils.image_dataset_from_directory，但是现在我应该构建自己的数据加载程序吗？

原文

I am working on action recognition on HMDB51. Here is my code below.

This part is for declaring some constants and directories:

# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the directory containing the UCF50 dataset. 
DATASET_DIR = r"\HMDB51"

# Specify the list containing the names of the classes used for training. Feel free to choose          any set of classes.
CLASSES_LIST = ["brush_hair", "cartwheel", "catch", "chew", "clap", "climb", "climb_stairs", "dive",
            "draw_sword", "dribble", "drink", "eat", "fall_floor", "fencing", "flic_flac", "golf",
            "handstand", "hit", "hug", "jump", "kick", "kick_ball", "kiss", "laugh", 
            "pick", "pour", "pullup", "punch", "push", "pushup", "ride_bike", "ride_horse", 
            "run", "shake_hands", "shoot_ball", "shoot_bow", "shoot_gun", "sit", "situp", "smile", 
            "smoke", "somersault", "stand","swing_baseball", "sword", "sword_exercise", "talk", "throw", "turn", 
            "walk", "wave"]

This part is for extracting frames from each video:

def frames_extraction(video_path):
# Declare a list to store video frames.
frames_list = []

# Read the Video File using the VideoCapture object.
video_reader = cv2.VideoCapture(video_path)

# Get the total number of frames in the video.
video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

# Calculate the the interval after which frames will be added to the list.
skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

# Iterate through the Video Frames.
for frame_counter in range(SEQUENCE_LENGTH):

    # Set the current frame position of the video.
    video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

    # Reading the frame from the video. 
    success, frame = video_reader.read() 

    # Check if Video frame is not successfully read then break the loop
    if not success:
        break

    # Resize the Frame to fixed height and width.
    resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
    
    # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
    normalized_frame = resized_frame / 255
    
    # Append the normalized frame into the frames list
    frames_list.append(normalized_frame)

# Release the VideoCapture object. 
video_reader.release()

# Return the frames list.
return frames_list

This part is for creating train, label lists:

def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(CLASSES_LIST):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in files_list:
            
            # Get the complete video path.
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)

            # Extract the frames of the video file.
            frames = frames_extraction(video_file_path)

            # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
            # So ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == SEQUENCE_LENGTH:

                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths

So, when I tried to create the dataset as below:

   # Create the dataset.
   features, labels, video_files_paths = create_dataset()

I am getting the errow below:

Here it is!

How can I fix this? I think I need to preprocess dataset as batches during training the model. But how can I do that? When I worked on images, I have used keras.utils.image_dataset_from_directory but now should I build my own data loader?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

最偏执的依靠 2025-02-15 07:59:12

您也可以在不构建自定义数据加载程序的情况下执行任务。
安装必要的依赖项：TensorFlow，Keras，Glob

，然后安装：！PIP安装keras-video-generator == 1.0.11

这是一个完整的示例：

import tensorflow as tf
import glob
import keras
from keras_video import VideoFrameGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, GRU, Dense, Flatten
from tensorflow.keras.optimizers import  Adam



glob_pattern='/content/HMDB51/{classname}/*.mp4' 


classes = [i.split('/')[3] for i in glob.glob('/content/HMDB51/*')]
classes.sort()


SIZE = (128, 128)
CHANNELS = 3
NBFRAME = 20
BS = 5


data_aug = keras.preprocessing.image.ImageDataGenerator(
        dtype = 'float16',
        rescale = 1./255,
        zoom_range = .1,
        horizontal_flip = True,
        rotation_range = 15
    )

train = VideoFrameGenerator(
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME,
    split_val=.20,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

valid = train.get_validation_generator()

def build_vgg(shape=(128, 128, 3), nbout=51):
    vgg_model = VGG16(include_top=False, input_shape=shape, weights='imagenet')  

    for layer in vgg_model.layers:
        layer.trainable = False

    return vgg_model

def action_model(shape=(20, 128, 128, 3), nbout=51):
    convnet = build_vgg(shape[1:])
    model = Sequential()    
    model.add(TimeDistributed(convnet, input_shape=shape))
    model.add(Dense(256, activation='relu'))
    model.add(TimeDistributed(Flatten()))
    model.add(GRU(256, dropout=0.20)) 
    model.add(Dense(128, activation='relu'))
    model.add(Dense(nbout, activation='softmax'))
    model.summary()
    return model

INSHAPE=(NBFRAME,) + SIZE + (CHANNELS,)
model = action_model(INSHAPE, len(classes))
optimizer = Adam(lr=1e-4)
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
EPOCHS=100

history = model.fit_generator(
    train,
    validation_data=valid,
    verbose=1,
    epochs=EPOCHS,
    callbacks=callbacks
)

You can also do the task without building your custom data loader.
Install necessary dependencies: tensorflow, keras, glob

then install: !pip install keras-video-generators==1.0.11

Here is a full example below:

import tensorflow as tf
import glob
import keras
from keras_video import VideoFrameGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, GRU, Dense, Flatten
from tensorflow.keras.optimizers import  Adam



glob_pattern='/content/HMDB51/{classname}/*.mp4' 


classes = [i.split('/')[3] for i in glob.glob('/content/HMDB51/*')]
classes.sort()


SIZE = (128, 128)
CHANNELS = 3
NBFRAME = 20
BS = 5


data_aug = keras.preprocessing.image.ImageDataGenerator(
        dtype = 'float16',
        rescale = 1./255,
        zoom_range = .1,
        horizontal_flip = True,
        rotation_range = 15
    )

train = VideoFrameGenerator(
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME,
    split_val=.20,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

valid = train.get_validation_generator()

def build_vgg(shape=(128, 128, 3), nbout=51):
    vgg_model = VGG16(include_top=False, input_shape=shape, weights='imagenet')  

    for layer in vgg_model.layers:
        layer.trainable = False

    return vgg_model

def action_model(shape=(20, 128, 128, 3), nbout=51):
    convnet = build_vgg(shape[1:])
    model = Sequential()    
    model.add(TimeDistributed(convnet, input_shape=shape))
    model.add(Dense(256, activation='relu'))
    model.add(TimeDistributed(Flatten()))
    model.add(GRU(256, dropout=0.20)) 
    model.add(Dense(128, activation='relu'))
    model.add(Dense(nbout, activation='softmax'))
    model.summary()
    return model

INSHAPE=(NBFRAME,) + SIZE + (CHANNELS,)
model = action_model(INSHAPE, len(classes))
optimizer = Adam(lr=1e-4)
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
EPOCHS=100

history = model.fit_generator(
    train,
    validation_data=valid,
    verbose=1,
    epochs=EPOCHS,
    callbacks=callbacks
)

回复收藏 0 原文

~没有更多了~