弗兰恩（Flann）时如何提高检测效率？

发布于 2025-01-25 21:49:38 字数 7134 浏览 1 评论 0原文

我正在尝试提高对象检测的速度。我正在使用opencv和orb brute Force Kepoint匹配。

我有30秒的键>键入运行，目前需要23秒。目前有74张图像，这很好，但是这个数字可能会增加。

到目前为止我

将我的搜索区域降低至绝对最小的
74张图像，以至于

我尝试进行预处理以删除匹配中的一些混乱的绝对最小尺寸，但这影响了我显着找到成功匹配的能力。

我也尝试过；

SIF，Surf，MatchTemplate，Canny和其他一些方法。 Orb Brute是我最好的距离。

这是我的确切工作代码，而不是伪代码，而不是MVP，因为我认为我想加快的代码的剪辑版本是没有意义的。

有什么方法可以提高我的代码效率吗？

  import cv2 as cv
  import os
  import glob
  import pyautogui as py
  from time import sleep
  from windowcapture import WindowCapture
  from vision import Vision

  # Change the working directory to the folder this script is in.
  os.chdir(r'C:\test')

  avoid = glob.glob(r"C:\Users\test\*.png")

  def loadImages(directory):
    # Intialise empty array
    image_list = []
    # Add images to array
    for i in directory:
        img = cv.imread(i, cv.IMREAD_UNCHANGED)
        image_list.append((img, i))
    return image_list

  # initialize the WindowCapture class
  wincap = WindowCapture()

  def keypoint_detection(image_list):
      counter = 0
      for i in image_list:
          counter += 1       
          needle_img = i[0]

          # load image to find
          objectToFind = Vision(needle_img)
          # get an updated image of the screen
          keypoint_haystack = wincap.get_haystack()
          # crop the image
          x, w, y, h = [600,700,20,50]
          keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]

          kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
          match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)

          if match_points:
              # find the center point of all the matched features
              center_point = objectToFind.centeroid(match_points)
              # account for the width of the needle image that appears on the left
              center_point[0] += objectToFind.needle_w
              # drawn the found center point on the output image
              match_image = objectToFind.draw_crosshairs(match_image, [center_point])
            
              sleep(3)
              break

  while(True):
      ships_to_avoid = loadImages(avoid)
      keypoint_detection(ships_to_avoid)

窗capture班级

import numpy as np
import win32gui, win32ui, win32con

class WindowCapture:

    # properties
    w = 0
    h = 0
    hwnd = None
    cropped_x = 0
    cropped_y = 0
    offset_x = 0
    offset_y = 0

    # constructor
    def __init__(self, window_name=None):
        # find the handle for the window we want to capture.
        # if no window name is given, capture the entire screen
        if window_name is None:
            self.hwnd = win32gui.GetDesktopWindow()
        else:
            self.hwnd = win32gui.FindWindow(None, window_name)
            if not self.hwnd:
                raise Exception('Window not found: {}'.format(window_name))

        # get the window size
        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        # account for the window border and titlebar and cut them off
        border_pixels = 0
        titlebar_pixels = 5
        self.w = self.w - border_pixels
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

        # set the cropped coordinates offset so we can translate screenshot
        # images into actual screen positions
        self.offset_x = window_rect[0] + self.cropped_x
        self.offset_y = window_rect[1] + self.cropped_y

    def get_haystack(self):

        # get the window image data
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        # convert the raw data into a format opencv can read
        # dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        # free resources
        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())
        img = img[...,:3]
        img = np.ascontiguousarray(img)
        return img

    @staticmethod
    def list_window_names():
        def winEnumHandler(hwnd, ctx):
            if win32gui.IsWindowVisible(hwnd):
                print(hex(hwnd), win32gui.GetWindowText(hwnd))
        win32gui.EnumWindows(winEnumHandler, None)

    # translate a pixel position on a screenshot image to a pixel position on the screen.
    # pos = (x, y)
    def get_screen_position(self, pos):
        return (pos[0] + self.offset_x, pos[1] + self.offset_y)

视觉课

import cv2 as cv
import numpy as np


class Vision:
    # properties
    needle_img = None
    needle_w = 0
    needle_h = 0

    # constructor
    def __init__(self, needle_img_path):
        self.needle_img = needle_img_path

        # Save the dimensions of the needle image
        self.needle_w = self.needle_img.shape[1]
        self.needle_h = self.needle_img.shape[0]
                                 
    def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):

        orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
        keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
        orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
        keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)

        FLANN_INDEX_LSH = 6
        index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
        search_params = dict(checks=50)

        try:
            flann = cv.FlannBasedMatcher(index_params, search_params)
            matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
        except cv.error:
                return None, None, [], []
            
        # store all the good matches as per Lowe's ratio test.
        good = []
        points = []

        for pair in matches:
            if len(pair) == 2:
                if pair[0].distance < 0.7*pair[1].distance:
                    good.append(pair[0])

        if len(good) > min_match_count:
            print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
           
            for match in good:
                points.append(keypoints_haystack[match.trainIdx].pt)
        
        return keypoints_needle, keypoints_haystack, good, points

原文

I'm trying to improve the speed of my object detection. I'm using OpenCV and ORB brute force keypoint matching.

I've got 30 seconds for keypoint_detection to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.

So far I've;

Reduced my search area down to the absolute minimum
Refined my 74 images down to the absolute minimum size possible

I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.

I've also tried;

SIF, SURF, MatchTemplate, Canny and a few other methods. ORB brute force is my best match by a significant margin.

This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.

Is there any way to improve the efficiency of my code?

  import cv2 as cv
  import os
  import glob
  import pyautogui as py
  from time import sleep
  from windowcapture import WindowCapture
  from vision import Vision

  # Change the working directory to the folder this script is in.
  os.chdir(r'C:\test')

  avoid = glob.glob(r"C:\Users\test\*.png")

  def loadImages(directory):
    # Intialise empty array
    image_list = []
    # Add images to array
    for i in directory:
        img = cv.imread(i, cv.IMREAD_UNCHANGED)
        image_list.append((img, i))
    return image_list

  # initialize the WindowCapture class
  wincap = WindowCapture()

  def keypoint_detection(image_list):
      counter = 0
      for i in image_list:
          counter += 1       
          needle_img = i[0]

          # load image to find
          objectToFind = Vision(needle_img)
          # get an updated image of the screen
          keypoint_haystack = wincap.get_haystack()
          # crop the image
          x, w, y, h = [600,700,20,50]
          keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]

          kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
          match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)

          if match_points:
              # find the center point of all the matched features
              center_point = objectToFind.centeroid(match_points)
              # account for the width of the needle image that appears on the left
              center_point[0] += objectToFind.needle_w
              # drawn the found center point on the output image
              match_image = objectToFind.draw_crosshairs(match_image, [center_point])
            
              sleep(3)
              break

  while(True):
      ships_to_avoid = loadImages(avoid)
      keypoint_detection(ships_to_avoid)

WindowCapture Class

import numpy as np
import win32gui, win32ui, win32con

class WindowCapture:

    # properties
    w = 0
    h = 0
    hwnd = None
    cropped_x = 0
    cropped_y = 0
    offset_x = 0
    offset_y = 0

    # constructor
    def __init__(self, window_name=None):
        # find the handle for the window we want to capture.
        # if no window name is given, capture the entire screen
        if window_name is None:
            self.hwnd = win32gui.GetDesktopWindow()
        else:
            self.hwnd = win32gui.FindWindow(None, window_name)
            if not self.hwnd:
                raise Exception('Window not found: {}'.format(window_name))

        # get the window size
        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        # account for the window border and titlebar and cut them off
        border_pixels = 0
        titlebar_pixels = 5
        self.w = self.w - border_pixels
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

        # set the cropped coordinates offset so we can translate screenshot
        # images into actual screen positions
        self.offset_x = window_rect[0] + self.cropped_x
        self.offset_y = window_rect[1] + self.cropped_y

    def get_haystack(self):

        # get the window image data
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        # convert the raw data into a format opencv can read
        # dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        # free resources
        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())
        img = img[...,:3]
        img = np.ascontiguousarray(img)
        return img

    @staticmethod
    def list_window_names():
        def winEnumHandler(hwnd, ctx):
            if win32gui.IsWindowVisible(hwnd):
                print(hex(hwnd), win32gui.GetWindowText(hwnd))
        win32gui.EnumWindows(winEnumHandler, None)

    # translate a pixel position on a screenshot image to a pixel position on the screen.
    # pos = (x, y)
    def get_screen_position(self, pos):
        return (pos[0] + self.offset_x, pos[1] + self.offset_y)

Vision Class

import cv2 as cv
import numpy as np


class Vision:
    # properties
    needle_img = None
    needle_w = 0
    needle_h = 0

    # constructor
    def __init__(self, needle_img_path):
        self.needle_img = needle_img_path

        # Save the dimensions of the needle image
        self.needle_w = self.needle_img.shape[1]
        self.needle_h = self.needle_img.shape[0]
                                 
    def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):

        orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
        keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
        orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
        keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)

        FLANN_INDEX_LSH = 6
        index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
        search_params = dict(checks=50)

        try:
            flann = cv.FlannBasedMatcher(index_params, search_params)
            matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
        except cv.error:
                return None, None, [], []
            
        # store all the good matches as per Lowe's ratio test.
        good = []
        points = []

        for pair in matches:
            if len(pair) == 2:
                if pair[0].distance < 0.7*pair[1].distance:
                    good.append(pair[0])

        if len(good) > min_match_count:
            print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
           
            for match in good:
                points.append(keypoints_haystack[match.trainIdx].pt)
        
        return keypoints_needle, keypoints_haystack, good, points

分享到QQ

分享到微博