pytorch fasterrcnn没有输出

发布于 2025-02-05 17:25:18 字数 4966 浏览 2 评论 0原文



import torch
import torchvision
from config import device
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn_v2
from torchvision.transforms import transforms as T
import torchvision.transforms.functional as TF

CLASSES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

class detector():
  def __init__(self, model_path,threshold=0.3):
    self.threshold = threshold
    self.model = self.load_model(model_path)
  def load_model(self, model_path=None):
    Loads a pretrained model and state_dict if desired 

    Todo: implement channels for IR image data
    implement channels polarimetric data"""

    print("Loading model...")
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
    print("Loading model...done")

    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # # define a new head for the detector with required number of classes
    num_classes = len(CLASSES)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

    if model_path is not None:
      print("Loading model from:", model_path)
      model.load_state_dict(torch.load(model_path), map_location=device)

    return model

  def train(self, dataloader, optimizer, scheduler, num_epochs):
    # loads a custom dataset and trains the model with it

    "Not implemented"
    return None

  def detect(self, image):
    image: tensor

              T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

    norm_tensor = transform(image.type(torch.FloatTensor)) #normalize the image
    image_tensor = (norm_tensor - torch.min(norm_tensor))/(torch.max(norm_tensor) - torch.min(norm_tensor))*(1 - 0) + 0 # min max scaling from 0 to 1

    if len(image_tensor.shape) == 3:   #if input is single image, wrap in a batch
      image_tensor = image_tensor.unsqueeze(0)
    #else if input is a batch, do nothing

    image_tensor =

    with torch.no_grad():
      outputs = self.model(image_tensor)

    outputs = [{k:'cpu') for k, v in t.items()} for t in outputs] #move outputs to cpu

    boxes = outputs[0]['boxes']
    scores = outputs[0]['scores']
    labels = outputs[0]['labels']

    # filter out boxes according to threshold
    conf_mask = scores > self.threshold
    boxes = boxes[conf_mask]
    labels = labels[conf_mask]

    # get all the predicited class names
    pred_classes = [CLASSES[i] for i in labels.cpu()]

    return boxes, pred_classes

def image_visualizer(detector,img_path, tensorboard=False):
    print('Loading image...')

    img_tensor = read_image(img_path) # C H W
    boxes, pred_cls = detector.detect(img_tensor)
    if boxes.shape[0] == 0:
        print("No objects detected")

    bbox_img = draw_bounding_boxes(img_tensor, boxes, labels=pred_cls)

    if tensorboard:
        print("Saving output to tensorboard...")
        writer = SummaryWriter()
        writer.add_image('image', bbox_img)
        print("Image saved")
        write_png(bbox_img, 'detection_img.png')

from detector import detector
from visualizer import image_visualizer

image_path = "sailboat.jpg"
predictor = detector(threshold = .3, model_path = None)
image_visualizer(predictor, image_path,  tensorboard=opts.tensorboard)

这是 FasterRCNN的火炬文档指出输入值应在[0,1]之间。如果我不扩展到[0,1],但是[-1,1]我会得到预测,但是它是100个完全虚假的,低信心的预测。归一化似乎没有影响。在不修改IN_Features或Box_predictor的情况下加载模型也具有相同的效果,在我得到预测的情况下,但它是100个虚假的预测。我不确定该怎么办。请帮忙。

I am trying to build a simple object detector using the torchvision pretrained model FasterRCNN. I want to be able to input an image, predict the objects, and output that image with the bounding boxes. I have loaded the torchvision.models.detection.fasterrcnn_resnet50_fpn_v2 and its weights, input an image, but it gives me no predictions at all.

Here is my detector class,

import torch
import torchvision
from config import device
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn_v2
from torchvision.transforms import transforms as T
import torchvision.transforms.functional as TF

CLASSES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

class detector():
  def __init__(self, model_path,threshold=0.3):
    self.threshold = threshold
    self.model = self.load_model(model_path)
  def load_model(self, model_path=None):
    Loads a pretrained model and state_dict if desired 

    Todo: implement channels for IR image data
    implement channels polarimetric data"""

    print("Loading model...")
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
    print("Loading model...done")

    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # # define a new head for the detector with required number of classes
    num_classes = len(CLASSES)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

    if model_path is not None:
      print("Loading model from:", model_path)
      model.load_state_dict(torch.load(model_path), map_location=device)

    return model

  def train(self, dataloader, optimizer, scheduler, num_epochs):
    # loads a custom dataset and trains the model with it

    "Not implemented"
    return None

  def detect(self, image):
    image: tensor

              T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

    norm_tensor = transform(image.type(torch.FloatTensor)) #normalize the image
    image_tensor = (norm_tensor - torch.min(norm_tensor))/(torch.max(norm_tensor) - torch.min(norm_tensor))*(1 - 0) + 0 # min max scaling from 0 to 1

    if len(image_tensor.shape) == 3:   #if input is single image, wrap in a batch
      image_tensor = image_tensor.unsqueeze(0)
    #else if input is a batch, do nothing

    image_tensor =

    with torch.no_grad():
      outputs = self.model(image_tensor)

    outputs = [{k:'cpu') for k, v in t.items()} for t in outputs] #move outputs to cpu

    boxes = outputs[0]['boxes']
    scores = outputs[0]['scores']
    labels = outputs[0]['labels']

    # filter out boxes according to threshold
    conf_mask = scores > self.threshold
    boxes = boxes[conf_mask]
    labels = labels[conf_mask]

    # get all the predicited class names
    pred_classes = [CLASSES[i] for i in labels.cpu()]

    return boxes, pred_classes

Here is my visualizer,

def image_visualizer(detector,img_path, tensorboard=False):
    print('Loading image...')

    img_tensor = read_image(img_path) # C H W
    boxes, pred_cls = detector.detect(img_tensor)
    if boxes.shape[0] == 0:
        print("No objects detected")

    bbox_img = draw_bounding_boxes(img_tensor, boxes, labels=pred_cls)

    if tensorboard:
        print("Saving output to tensorboard...")
        writer = SummaryWriter()
        writer.add_image('image', bbox_img)
        print("Image saved")
        write_png(bbox_img, 'detection_img.png')

And the usage is:

from detector import detector
from visualizer import image_visualizer

image_path = "sailboat.jpg"
predictor = detector(threshold = .3, model_path = None)
image_visualizer(predictor, image_path,  tensorboard=opts.tensorboard)

I played around with the normalization and scaling. The torch documentation for fasterrcnn states the input values should be between [0,1]. If I don't scale to [0,1] but to [-1,1] I get predictions but it's 100 completely bogus, low confidence predictions. Normalization seems to have no effect. Loading the model without modifying the in_features or box_predictor also has the same effect where I get predictions, but it's 100 bogus predictions. I'm not sure what to do. Please help.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。



需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。


夏花。依旧 2025-02-12 17:25:18


    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # # define a new head for the detector with required number of classes
    num_classes = len(CLASSES)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


I found out my problem, this section should be removed:

    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # # define a new head for the detector with required number of classes
    num_classes = len(CLASSES)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

I don't understand why it should or shouldn't be in there, I followed a few tutorials suggestions to do so, but removing it seemed to fix the problem.

我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。