如何在图像的文件夹上正确使用预训练的CNN进行图像预测

发布于 2025-01-20 10:15:28 字数 4246 浏览 4 评论 0原文

我正在尝试构建一个 CNN 模型，并在 2833 个图像上使用它，看看它是否可以预测（我自己选择的）三个特征的选择以及表格数据集中的流行度得分。到目前为止，我的代码如下所示：

import os

import cv2
import argparse
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image as image_utils
from keras.applications.imagenet_utils import preprocess_input, decode_predictions

# Construct argument parser and parse the arguments
argument_parser = argparse.ArgumentParser()

# First two arguments specifies our only argument "image" with both short-/longhand versions where either 
# can be used
# This is a required argument, noted by required=True, the help gives additional info in the terminal
# if needed
argument_parser.add_argument("-i", "--image", required=True, help="path to the input image")

# Set path to files
img_path = "images/"
files = os.listdir(img_path)
print("[INFO] loading and processing images...")

# Loop through images
for filename in files:
    # Load original via OpenCV, so we can draw on it and display it on our screen
    original = cv2.imread(filename)

    # Load image while resizing to 224x224 pixels, then convert to a NumPy array because load_img returns 
    # Pillow format
    image = image_utils.load_img(filename, target_size=(224, 224))
    image = image_utils.img_to_array(image)

    """
    PRE-PROCESS
    The image is now a NumPy array of shape (224, 224, 3). 224 pixels tall, 224 pixels wide, 3 channels = 
    Red, Green, Blue. We need to expand to (1, 3, 224, 224) because when classifying images using Deep
    Learning and Convolutional Neural Networks, we often send several images (instead of one) through
    the network in “batches” for efficiency. We also subtract the mean RGB pixel intensity from the
    ImageNet dataset.
    """
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)

    # Load Keras and classify the image
    print("[INFO] loading network...")
    model = VGG16(weights="imagenet")    # Load the VGG16 network pre-trained on the ImageNet dataset

    print("[INFO] classifying image...")
    predictions = model.predict(image)   # Classify the image (NumPy array with 1000 entries)
    P = decode_predictions(predictions)  # Get the ImageNet Unique ID of the label, along with human-readable label
    print(P)

    # Loop over the predictions and display the rank-5 (5 epochs) predictions + probabilities to our terminal
    for (i, (imagenetID, label, prob)) in enumerate(P[0]):
        print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

    # Load the image via OpenCV, draw the top prediction on the image, and display the 
    image to our screen
    original = cv2.imread(filename)
    (imagenetID, label, prob) = P[0][0]
    cv2.putText(original, "Label: {}, {:.2f}%".format(label, prob * 100), (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Classification", original)
    cv2.waitKey(0)

我遵循 this 一篇关于如何做到这一点的文章，它适用于一张图像。但是当我尝试将代码放入循环中时，收到以下错误消息：

[ WARN:[email protected]] global D:\a\opencv-python\opencv-python\opencv\modules\imgcodecs\src\loadsave.cpp (239) cv::findDecoder imread_('100.png'): can't open/read file: check file path/integrity
Traceback (most recent call last):
  File "C:\PATH\test_imagenet.py", line 28, in <module>
    image = image_utils.load_img(filename, target_size=(224, 224))
  File "C:\PATH\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\preprocessing\image.py", line 313, in load_img
    return image.load_img(path, grayscale=grayscale, color_mode=color_mode,
  File "C:\PATH\AppData\Local\Programs\Python\Python39\lib\site-packages\keras_preprocessing\image\utils.py", line 113, in load_img
    with open(path, 'rb') as f:
FileNotFoundError: [Errno 2] No such file or directory: '100.png'

如您所见，我的项目中有该文件，所以我不知道为什么没有找到它。如何对图像文件而不是仅对一张图像正确执行此操作？

原文

I am trying to build a CNN model and use it on 2833 images to see if it can predict a selection (of my own choice) of three features and the popularity score from a tabular dataset. So far my code looks like this:

import os

import cv2
import argparse
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image as image_utils
from keras.applications.imagenet_utils import preprocess_input, decode_predictions

# Construct argument parser and parse the arguments
argument_parser = argparse.ArgumentParser()

# First two arguments specifies our only argument "image" with both short-/longhand versions where either 
# can be used
# This is a required argument, noted by required=True, the help gives additional info in the terminal
# if needed
argument_parser.add_argument("-i", "--image", required=True, help="path to the input image")

# Set path to files
img_path = "images/"
files = os.listdir(img_path)
print("[INFO] loading and processing images...")

# Loop through images
for filename in files:
    # Load original via OpenCV, so we can draw on it and display it on our screen
    original = cv2.imread(filename)

    # Load image while resizing to 224x224 pixels, then convert to a NumPy array because load_img returns 
    # Pillow format
    image = image_utils.load_img(filename, target_size=(224, 224))
    image = image_utils.img_to_array(image)

    """
    PRE-PROCESS
    The image is now a NumPy array of shape (224, 224, 3). 224 pixels tall, 224 pixels wide, 3 channels = 
    Red, Green, Blue. We need to expand to (1, 3, 224, 224) because when classifying images using Deep
    Learning and Convolutional Neural Networks, we often send several images (instead of one) through
    the network in “batches” for efficiency. We also subtract the mean RGB pixel intensity from the
    ImageNet dataset.
    """
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)

    # Load Keras and classify the image
    print("[INFO] loading network...")
    model = VGG16(weights="imagenet")    # Load the VGG16 network pre-trained on the ImageNet dataset

    print("[INFO] classifying image...")
    predictions = model.predict(image)   # Classify the image (NumPy array with 1000 entries)
    P = decode_predictions(predictions)  # Get the ImageNet Unique ID of the label, along with human-readable label
    print(P)

    # Loop over the predictions and display the rank-5 (5 epochs) predictions + probabilities to our terminal
    for (i, (imagenetID, label, prob)) in enumerate(P[0]):
        print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

    # Load the image via OpenCV, draw the top prediction on the image, and display the 
    image to our screen
    original = cv2.imread(filename)
    (imagenetID, label, prob) = P[0][0]
    cv2.putText(original, "Label: {}, {:.2f}%".format(label, prob * 100), (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Classification", original)
    cv2.waitKey(0)

I followed this article on how to do it, and it worked on one image. But when I tried to put the code inside a loop, I get this error message:

[ WARN:[email protected]] global D:\a\opencv-python\opencv-python\opencv\modules\imgcodecs\src\loadsave.cpp (239) cv::findDecoder imread_('100.png'): can't open/read file: check file path/integrity
Traceback (most recent call last):
  File "C:\PATH\test_imagenet.py", line 28, in <module>
    image = image_utils.load_img(filename, target_size=(224, 224))
  File "C:\PATH\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\preprocessing\image.py", line 313, in load_img
    return image.load_img(path, grayscale=grayscale, color_mode=color_mode,
  File "C:\PATH\AppData\Local\Programs\Python\Python39\lib\site-packages\keras_preprocessing\image\utils.py", line 113, in load_img
    with open(path, 'rb') as f:
FileNotFoundError: [Errno 2] No such file or directory: '100.png'

As you can see, I have the file in the project, so I don't know why it doesn't find it. How do I do this correctly for a file of images, instead of for one image only?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

∝单色的世界 2025-01-27 10:15:28

请在下面找到工作代码；

import os

import cv2
import argparse
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image as image_utils
from keras.applications.imagenet_utils import preprocess_input, decode_predictions

# Construct argument parser and parse the arguments
argument_parser = argparse.ArgumentParser()

# First two arguments specifies our only argument "image" with both short-/longhand versions where either 
# can be used
# This is a required argument, noted by required=True, the help gives additional info in the terminal
# if needed
argument_parser.add_argument("-i", "--image", required=True, help="path to the input image")

# Set path to files
img_path = "/content/train/"
files = os.listdir(img_path)
print("[INFO] loading and processing images...")

for filename in files:
    
    # Passing the entire path of the image file
    file= os.path.join(img_path, filename)
    
    # Load original via OpenCV, so we can draw on it and display it on our screen
    original = cv2.imread(file)

    image = image_utils.load_img(file, target_size=(224, 224))
    image = image_utils.img_to_array(image)

    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)


    print("[INFO] loading network...")
    model = VGG16(weights="imagenet")    # Load the VGG16 network pre-trained on the ImageNet dataset

    print("[INFO] classifying image...")
    predictions = model.predict(image)   # Classify the image (NumPy array with 1000 entries)
    P = decode_predictions(predictions)  # Get the ImageNet Unique ID of the label, along with human-readable label
    print(P)    

    # Loop over the predictions and display the rank-5 (5 epochs) predictions + probabilities to our terminal
    for (i, (imagenetID, label, prob)) in enumerate(P[0]):
        print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

    original = cv2.imread(file)
    (imagenetID, label, prob) = P[0][0]
    cv2.putText(original, "Label: {}, {:.2f}%".format(label, prob * 100), (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow(original)  
    cv2.waitKey(0)

输出如下：

让我们知道问题是否仍然存在。谢谢！

Please find the working code below;

import os

import cv2
import argparse
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image as image_utils
from keras.applications.imagenet_utils import preprocess_input, decode_predictions

# Construct argument parser and parse the arguments
argument_parser = argparse.ArgumentParser()

# First two arguments specifies our only argument "image" with both short-/longhand versions where either 
# can be used
# This is a required argument, noted by required=True, the help gives additional info in the terminal
# if needed
argument_parser.add_argument("-i", "--image", required=True, help="path to the input image")

# Set path to files
img_path = "/content/train/"
files = os.listdir(img_path)
print("[INFO] loading and processing images...")

for filename in files:
    
    # Passing the entire path of the image file
    file= os.path.join(img_path, filename)
    
    # Load original via OpenCV, so we can draw on it and display it on our screen
    original = cv2.imread(file)

    image = image_utils.load_img(file, target_size=(224, 224))
    image = image_utils.img_to_array(image)

    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)


    print("[INFO] loading network...")
    model = VGG16(weights="imagenet")    # Load the VGG16 network pre-trained on the ImageNet dataset

    print("[INFO] classifying image...")
    predictions = model.predict(image)   # Classify the image (NumPy array with 1000 entries)
    P = decode_predictions(predictions)  # Get the ImageNet Unique ID of the label, along with human-readable label
    print(P)    

    # Loop over the predictions and display the rank-5 (5 epochs) predictions + probabilities to our terminal
    for (i, (imagenetID, label, prob)) in enumerate(P[0]):
        print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

    original = cv2.imread(file)
    (imagenetID, label, prob) = P[0][0]
    cv2.putText(original, "Label: {}, {:.2f}%".format(label, prob * 100), (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow(original)  
    cv2.waitKey(0)

Output is as follows: