pytorch fasterrcnn没有输出
我正在尝试使用porchvision预测的模型来构建一个简单的对象检测器。我希望能够输入图像,预测对象并使用边界框输出该图像。我已经加载了torchvision.models.detection.fasterrcnn_resnet50_fpn_v2及其权重,输入图像,但它根本没有任何预测。
我的检测器类,检测
import torch
import torchvision
from config import device
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn_v2
from torchvision.transforms import transforms as T
import torchvision.transforms.functional as TF
CLASSES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
class detector():
def __init__(self, model_path,threshold=0.3):
self.threshold = threshold
self.model = self.load_model(model_path)
def load_model(self, model_path=None):
"""
Loads a pretrained model and state_dict if desired
Todo: implement channels for IR image data
implement channels polarimetric data"""
print("Loading model...")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
print("Loading model...done")
# get the number of input features
in_features = model.roi_heads.box_predictor.cls_score.in_features
# # define a new head for the detector with required number of classes
num_classes = len(CLASSES)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
if model_path is not None:
print("Loading model from:", model_path)
model.load_state_dict(torch.load(model_path), map_location=device)
return model
def train(self, dataloader, optimizer, scheduler, num_epochs):
# loads a custom dataset and trains the model with it
"Not implemented"
return None
def detect(self, image):
"""
image: tensor
"""
transform=T.Compose([
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
norm_tensor = transform(image.type(torch.FloatTensor)) #normalize the image
image_tensor = (norm_tensor - torch.min(norm_tensor))/(torch.max(norm_tensor) - torch.min(norm_tensor))*(1 - 0) + 0 # min max scaling from 0 to 1
if len(image_tensor.shape) == 3: #if input is single image, wrap in a batch
image_tensor = image_tensor.unsqueeze(0)
#else if input is a batch, do nothing
image_tensor = image_tensor.to(device)
with torch.no_grad():
self.model.to(device)
self.model.eval()
outputs = self.model(image_tensor)
outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs] #move outputs to cpu
boxes = outputs[0]['boxes']
scores = outputs[0]['scores']
labels = outputs[0]['labels']
# filter out boxes according to threshold
conf_mask = scores > self.threshold
boxes = boxes[conf_mask]
labels = labels[conf_mask]
# get all the predicited class names
pred_classes = [CLASSES[i] for i in labels.cpu()]
return boxes, pred_classes
器
def image_visualizer(detector,img_path, tensorboard=False):
print('Loading image...')
img_tensor = read_image(img_path) # C H W
boxes, pred_cls = detector.detect(img_tensor)
if boxes.shape[0] == 0:
print("No objects detected")
return
bbox_img = draw_bounding_boxes(img_tensor, boxes, labels=pred_cls)
if tensorboard:
print("Saving output to tensorboard...")
writer = SummaryWriter()
writer.add_image('image', bbox_img)
writer.close()
print("Image saved")
else:
write_png(bbox_img, 'detection_img.png')
。
from detector import detector
from visualizer import image_visualizer
image_path = "sailboat.jpg"
predictor = detector(threshold = .3, model_path = None)
image_visualizer(predictor, image_path, tensorboard=opts.tensorboard)
这是 FasterRCNN的火炬文档指出输入值应在[0,1]之间。如果我不扩展到[0,1],但是[-1,1]我会得到预测,但是它是100个完全虚假的,低信心的预测。归一化似乎没有影响。在不修改IN_Features或Box_predictor的情况下加载模型也具有相同的效果,在我得到预测的情况下,但它是100个虚假的预测。我不确定该怎么办。请帮忙。
I am trying to build a simple object detector using the torchvision pretrained model FasterRCNN. I want to be able to input an image, predict the objects, and output that image with the bounding boxes. I have loaded the torchvision.models.detection.fasterrcnn_resnet50_fpn_v2 and its weights, input an image, but it gives me no predictions at all.
Here is my detector class, detector.py:
import torch
import torchvision
from config import device
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn_v2
from torchvision.transforms import transforms as T
import torchvision.transforms.functional as TF
CLASSES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
class detector():
def __init__(self, model_path,threshold=0.3):
self.threshold = threshold
self.model = self.load_model(model_path)
def load_model(self, model_path=None):
"""
Loads a pretrained model and state_dict if desired
Todo: implement channels for IR image data
implement channels polarimetric data"""
print("Loading model...")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
print("Loading model...done")
# get the number of input features
in_features = model.roi_heads.box_predictor.cls_score.in_features
# # define a new head for the detector with required number of classes
num_classes = len(CLASSES)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
if model_path is not None:
print("Loading model from:", model_path)
model.load_state_dict(torch.load(model_path), map_location=device)
return model
def train(self, dataloader, optimizer, scheduler, num_epochs):
# loads a custom dataset and trains the model with it
"Not implemented"
return None
def detect(self, image):
"""
image: tensor
"""
transform=T.Compose([
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
norm_tensor = transform(image.type(torch.FloatTensor)) #normalize the image
image_tensor = (norm_tensor - torch.min(norm_tensor))/(torch.max(norm_tensor) - torch.min(norm_tensor))*(1 - 0) + 0 # min max scaling from 0 to 1
if len(image_tensor.shape) == 3: #if input is single image, wrap in a batch
image_tensor = image_tensor.unsqueeze(0)
#else if input is a batch, do nothing
image_tensor = image_tensor.to(device)
with torch.no_grad():
self.model.to(device)
self.model.eval()
outputs = self.model(image_tensor)
outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs] #move outputs to cpu
boxes = outputs[0]['boxes']
scores = outputs[0]['scores']
labels = outputs[0]['labels']
# filter out boxes according to threshold
conf_mask = scores > self.threshold
boxes = boxes[conf_mask]
labels = labels[conf_mask]
# get all the predicited class names
pred_classes = [CLASSES[i] for i in labels.cpu()]
return boxes, pred_classes
Here is my visualizer, visualizer.py:
def image_visualizer(detector,img_path, tensorboard=False):
print('Loading image...')
img_tensor = read_image(img_path) # C H W
boxes, pred_cls = detector.detect(img_tensor)
if boxes.shape[0] == 0:
print("No objects detected")
return
bbox_img = draw_bounding_boxes(img_tensor, boxes, labels=pred_cls)
if tensorboard:
print("Saving output to tensorboard...")
writer = SummaryWriter()
writer.add_image('image', bbox_img)
writer.close()
print("Image saved")
else:
write_png(bbox_img, 'detection_img.png')
And the usage is:
from detector import detector
from visualizer import image_visualizer
image_path = "sailboat.jpg"
predictor = detector(threshold = .3, model_path = None)
image_visualizer(predictor, image_path, tensorboard=opts.tensorboard)
I played around with the normalization and scaling. The torch documentation for fasterrcnn states the input values should be between [0,1]. If I don't scale to [0,1] but to [-1,1] I get predictions but it's 100 completely bogus, low confidence predictions. Normalization seems to have no effect. Loading the model without modifying the in_features or box_predictor also has the same effect where I get predictions, but it's 100 bogus predictions. I'm not sure what to do. Please help.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
我发现我的问题,应该删除本节:
我不明白为什么应该或不应该在那里,我遵循了一些教程建议,但要删除它似乎可以解决问题。
I found out my problem, this section should be removed:
I don't understand why it should or shouldn't be in there, I followed a few tutorials suggestions to do so, but removing it seemed to fix the problem.