暗网 yolov4 上的空检测变量
我关注了 Youtube 上关于如何使用 Darknet 运行 Yolov4 的视频。
问题是,当我使用 .exe 文件运行该程序时,它可以完美运行并具有出色的 FPS。 现在我必须更改代码的某些部分来执行我想要的操作,这意味着我必须使用其 python 代码运行代码,而不再使用 .exe 文件。 但在进行任何更改之前,我测试了在不进行任何更改的情况下运行它,但我只以低 FPS 运行视频并且没有显示边界框。
经过一番研究,我发现“检测”变量为空,问题出在“检测图像”函数本身上。问题是我不明白为什么它是空的,即使我知道他应该在上面检测到一些东西。
这是来自 darknet_video.py 的代码
from ctypes import *
import random
import os
import cv2
import time
import darknet
import argparse
from threading import Thread, enumerate
from queue import Queue
def parser():
parser = argparse.ArgumentParser(description="YOLO Object Detection")
parser.add_argument("--input", type=str, default=0,
help="video source. If empty, uses webcam 0 stream")
parser.add_argument("--out_filename", type=str, default="",
help="inference video name. Not saved if empty")
parser.add_argument("--weights", default="yolov4.weights",
help="yolo weights path")
parser.add_argument("--dont_show", action='store_true',
help="windown inference display. For headless systems")
parser.add_argument("--ext_output", action='store_true',
help="display bbox coordinates of detected objects")
parser.add_argument("--config_file", default="./cfg/yolov4.cfg",
help="path to config file")
parser.add_argument("--data_file", default="./cfg/coco.data",
help="path to data file")
parser.add_argument("--thresh", type=float, default=.25,
help="remove detections with confidence below this value")
return parser.parse_args()
def str2int(video_path):
"""
argparse returns and string althout webcam uses int (0, 1 ...)
Cast to int if needed
"""
try:
return int(video_path)
except ValueError:
return video_path
def check_arguments_errors(args):
assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
if not os.path.exists(args.config_file):
raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
if not os.path.exists(args.weights):
raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
if not os.path.exists(args.data_file):
raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
if str2int(args.input) == str and not os.path.exists(args.input):
raise(ValueError("Invalid video path {}".format(os.path.abspath(args.input))))
def set_saved_video(input_video, output_video, size):
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
fps = int(input_video.get(cv2.CAP_PROP_FPS))
video = cv2.VideoWriter(output_video, fourcc, fps, size)
return video
def convert2relative(bbox):
"""
YOLO format use relative coordinates for annotation
"""
x, y, w, h = bbox
_height = darknet_height
_width = darknet_width
return x/_width, y/_height, w/_width, h/_height
def convert2original(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_x = int(x * image_w)
orig_y = int(y * image_h)
orig_width = int(w * image_w)
orig_height = int(h * image_h)
bbox_converted = (orig_x, orig_y, orig_width, orig_height)
return bbox_converted
def convert4cropping(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_left = int((x - w / 2.) * image_w)
orig_right = int((x + w / 2.) * image_w)
orig_top = int((y - h / 2.) * image_h)
orig_bottom = int((y + h / 2.) * image_h)
if (orig_left < 0): orig_left = 0
if (orig_right > image_w - 1): orig_right = image_w - 1
if (orig_top < 0): orig_top = 0
if (orig_bottom > image_h - 1): orig_bottom = image_h - 1
bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom)
return bbox_cropping
def video_capture(frame_queue, darknet_image_queue):
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height),
interpolation=cv2.INTER_LINEAR)
frame_queue.put(frame)
img_for_detect = darknet.make_image(darknet_width, darknet_height, 3)
darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())
darknet_image_queue.put(img_for_detect)
cap.release()
def inference(darknet_image_queue, detections_queue, fps_queue):
while cap.isOpened():
darknet_image = darknet_image_queue.get()
prev_time = time.time()
print("Network: ", network)
detections = darknet.detect_image(network, class_names, darknet_image, thresh=args.thresh)
detections_queue.put(detections)
fps = int(1/(time.time() - prev_time))
fps_queue.put(fps)
print("FPS: {}".format(fps))
darknet.print_detections(detections, args.ext_output)
darknet.free_image(darknet_image)
cap.release()
def drawing(frame_queue, detections_queue, fps_queue):
random.seed(3) # deterministic bbox colors
video = set_saved_video(cap, args.out_filename, (video_width, video_height))
while cap.isOpened():
frame = frame_queue.get()
detections = detections_queue.get()
fps = fps_queue.get()
detections_adjusted = []
if frame is not None:
for label, confidence, bbox in detections:
bbox_adjusted = convert2original(frame, bbox)
detections_adjusted.append((str(label), confidence, bbox_adjusted))
image = darknet.draw_boxes(detections_adjusted, frame, class_colors)
if len(detections):
name = []
count = []
for detect in detections:
name_tag = detect[0].decode()
if name_tag in name:
count[name.index(name_tag)] += 1
else:
name.append(name_tag)
count.append(1)
for index in len(name):
cv2.putText(image, name[index] + ": " + str(count[index]), (10, 30 + 30 * index), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
if not args.dont_show:
cv2.imshow('Inference', image)
if args.out_filename is not None:
video.write(image)
if cv2.waitKey(fps) == 27:
break
cap.release()
video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
frame_queue = Queue()
darknet_image_queue = Queue(maxsize=1)
detections_queue = Queue(maxsize=1)
fps_queue = Queue(maxsize=1)
args = parser()
check_arguments_errors(args)
network, class_names, class_colors = darknet.load_network(
args.config_file,
args.data_file,
args.weights,
batch_size=1
)
darknet_width = darknet.network_width(network)
darknet_height = darknet.network_height(network)
input_path = str2int(args.input)
cap = cv2.VideoCapture(input_path)
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()
Thread(target=inference, args=(darknet_image_queue, detections_queue, fps_queue)).start()
Thread(target=drawing, args=(frame_queue, detections_queue, fps_queue)).start()
谢谢您的帮助
I followed a video from Youtube about how to run Yolov4 using Darknet.
The thing is that when I run the program using the .exe file, it run perfectly with great FPS.
And now I have to change some part of the code to do the things I want, meaning that I have to run the code using its python code and not using the .exe file anymore.
But before any change, I tested to run it with no change but I only have the video running with LOW FPS and no bounding box shown up.
After some research I found that the "detections" variable is empty, and the problem comes on the "detect_image" function it self. The thing is that I can't figure out why it is empty even I know that there is something he should detect on it.
Here are the code from darknet_video.py
from ctypes import *
import random
import os
import cv2
import time
import darknet
import argparse
from threading import Thread, enumerate
from queue import Queue
def parser():
parser = argparse.ArgumentParser(description="YOLO Object Detection")
parser.add_argument("--input", type=str, default=0,
help="video source. If empty, uses webcam 0 stream")
parser.add_argument("--out_filename", type=str, default="",
help="inference video name. Not saved if empty")
parser.add_argument("--weights", default="yolov4.weights",
help="yolo weights path")
parser.add_argument("--dont_show", action='store_true',
help="windown inference display. For headless systems")
parser.add_argument("--ext_output", action='store_true',
help="display bbox coordinates of detected objects")
parser.add_argument("--config_file", default="./cfg/yolov4.cfg",
help="path to config file")
parser.add_argument("--data_file", default="./cfg/coco.data",
help="path to data file")
parser.add_argument("--thresh", type=float, default=.25,
help="remove detections with confidence below this value")
return parser.parse_args()
def str2int(video_path):
"""
argparse returns and string althout webcam uses int (0, 1 ...)
Cast to int if needed
"""
try:
return int(video_path)
except ValueError:
return video_path
def check_arguments_errors(args):
assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
if not os.path.exists(args.config_file):
raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
if not os.path.exists(args.weights):
raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
if not os.path.exists(args.data_file):
raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
if str2int(args.input) == str and not os.path.exists(args.input):
raise(ValueError("Invalid video path {}".format(os.path.abspath(args.input))))
def set_saved_video(input_video, output_video, size):
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
fps = int(input_video.get(cv2.CAP_PROP_FPS))
video = cv2.VideoWriter(output_video, fourcc, fps, size)
return video
def convert2relative(bbox):
"""
YOLO format use relative coordinates for annotation
"""
x, y, w, h = bbox
_height = darknet_height
_width = darknet_width
return x/_width, y/_height, w/_width, h/_height
def convert2original(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_x = int(x * image_w)
orig_y = int(y * image_h)
orig_width = int(w * image_w)
orig_height = int(h * image_h)
bbox_converted = (orig_x, orig_y, orig_width, orig_height)
return bbox_converted
def convert4cropping(image, bbox):
x, y, w, h = convert2relative(bbox)
image_h, image_w, __ = image.shape
orig_left = int((x - w / 2.) * image_w)
orig_right = int((x + w / 2.) * image_w)
orig_top = int((y - h / 2.) * image_h)
orig_bottom = int((y + h / 2.) * image_h)
if (orig_left < 0): orig_left = 0
if (orig_right > image_w - 1): orig_right = image_w - 1
if (orig_top < 0): orig_top = 0
if (orig_bottom > image_h - 1): orig_bottom = image_h - 1
bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom)
return bbox_cropping
def video_capture(frame_queue, darknet_image_queue):
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height),
interpolation=cv2.INTER_LINEAR)
frame_queue.put(frame)
img_for_detect = darknet.make_image(darknet_width, darknet_height, 3)
darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())
darknet_image_queue.put(img_for_detect)
cap.release()
def inference(darknet_image_queue, detections_queue, fps_queue):
while cap.isOpened():
darknet_image = darknet_image_queue.get()
prev_time = time.time()
print("Network: ", network)
detections = darknet.detect_image(network, class_names, darknet_image, thresh=args.thresh)
detections_queue.put(detections)
fps = int(1/(time.time() - prev_time))
fps_queue.put(fps)
print("FPS: {}".format(fps))
darknet.print_detections(detections, args.ext_output)
darknet.free_image(darknet_image)
cap.release()
def drawing(frame_queue, detections_queue, fps_queue):
random.seed(3) # deterministic bbox colors
video = set_saved_video(cap, args.out_filename, (video_width, video_height))
while cap.isOpened():
frame = frame_queue.get()
detections = detections_queue.get()
fps = fps_queue.get()
detections_adjusted = []
if frame is not None:
for label, confidence, bbox in detections:
bbox_adjusted = convert2original(frame, bbox)
detections_adjusted.append((str(label), confidence, bbox_adjusted))
image = darknet.draw_boxes(detections_adjusted, frame, class_colors)
if len(detections):
name = []
count = []
for detect in detections:
name_tag = detect[0].decode()
if name_tag in name:
count[name.index(name_tag)] += 1
else:
name.append(name_tag)
count.append(1)
for index in len(name):
cv2.putText(image, name[index] + ": " + str(count[index]), (10, 30 + 30 * index), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
if not args.dont_show:
cv2.imshow('Inference', image)
if args.out_filename is not None:
video.write(image)
if cv2.waitKey(fps) == 27:
break
cap.release()
video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
frame_queue = Queue()
darknet_image_queue = Queue(maxsize=1)
detections_queue = Queue(maxsize=1)
fps_queue = Queue(maxsize=1)
args = parser()
check_arguments_errors(args)
network, class_names, class_colors = darknet.load_network(
args.config_file,
args.data_file,
args.weights,
batch_size=1
)
darknet_width = darknet.network_width(network)
darknet_height = darknet.network_height(network)
input_path = str2int(args.input)
cap = cv2.VideoCapture(input_path)
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()
Thread(target=inference, args=(darknet_image_queue, detections_queue, fps_queue)).start()
Thread(target=drawing, args=(frame_queue, detections_queue, fps_queue)).start()
Thank you for helping
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论