弗兰恩(Flann)时如何提高检测效率?
我正在尝试提高对象检测的速度。我正在使用opencv
和orb
brute Force Kepoint匹配。
我有30秒的键>键入
运行,目前需要23秒。目前有74张图像,这很好,但是这个数字可能会增加。
到目前为止我
- 将我的搜索区域降低至绝对最小的
- 74张图像,以至于
我尝试进行预处理以删除匹配中的一些混乱的绝对最小尺寸,但这影响了我显着找到成功匹配的能力。
我也尝试过;
- SIF,Surf,MatchTemplate,Canny和其他一些方法。 Orb Brute是我最好的距离。
这是我的确切工作代码,而不是伪代码,而不是MVP,因为我认为我想加快的代码的剪辑版本是没有意义的。
有什么方法可以提高我的代码效率吗?
import cv2 as cv
import os
import glob
import pyautogui as py
from time import sleep
from windowcapture import WindowCapture
from vision import Vision
# Change the working directory to the folder this script is in.
os.chdir(r'C:\test')
avoid = glob.glob(r"C:\Users\test\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
counter = 0
for i in image_list:
counter += 1
needle_img = i[0]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_haystack = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
sleep(3)
break
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
窗capture班级
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 5
self.w = self.w - border_pixels
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
@staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
视觉课
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points
I'm trying to improve the speed of my object detection. I'm using OpenCV
and ORB
brute force keypoint matching.
I've got 30 seconds for keypoint_detection
to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.
So far I've;
- Reduced my search area down to the absolute minimum
- Refined my 74 images down to the absolute minimum size possible
I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.
I've also tried;
- SIF, SURF, MatchTemplate, Canny and a few other methods. ORB brute force is my best match by a significant margin.
This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.
Is there any way to improve the efficiency of my code?
import cv2 as cv
import os
import glob
import pyautogui as py
from time import sleep
from windowcapture import WindowCapture
from vision import Vision
# Change the working directory to the folder this script is in.
os.chdir(r'C:\test')
avoid = glob.glob(r"C:\Users\test\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
counter = 0
for i in image_list:
counter += 1
needle_img = i[0]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_haystack = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
sleep(3)
break
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
WindowCapture Class
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 5
self.w = self.w - border_pixels
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
@staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
Vision Class
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论