如何正确读取EasyOr的文本？

发布于 2025-02-04 09:24:51 字数 2874 浏览 5 评论 0原文

我正在尝试从相机模块中读取图像，到目前为止，我必须使用自适应过滤来以这种方式处理图像。此外，我做了很多操纵来裁剪ROI并阅读文本。但是，它正在读取数字，但没有读数数字旁边的单位，这些数字的大小相对较小。如何解决这个问题？

import easyocr 
import cv2
import numpy as np

import matplotlib.pyplot as plt
import time
import urllib.request
url = 'http://192.168.137.108/cam-hi.jpg'
while True:
    img_resp=urllib.request.urlopen(url)
    imgnp=np.array(bytearray(img_resp.read()),dtype=np.uint8)
    image = cv2.imdecode(imgnp,-1)
    image = cv2.medianBlur(image,7)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)    #to gray convert
    th3 = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
                cv2.THRESH_BINARY,11,2) #adaptive threshold gaussian filter used
    kernel = np.ones((5,5),np.uint8)
    opening = cv2.morphologyEx(th3, cv2.MORPH_OPEN, kernel)
    

    x = 0   #to save the position, width and height for contours(later used)
    y = 0
    w = 0
    h = 0

    cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    threshold =  10
    font = cv2.FONT_HERSHEY_SIMPLEX  
    org = (50, 50) 
    fontScale = 1 
    color = (0, 0, 0)
    thickness = 2
        
    for c in cnts:
        
        approx = cv2.approxPolyDP(c,0.01*cv2.arcLength(c,True),True)
        area = cv2.contourArea(c)   
        if  len(approx) == 4 and area > 100000:   #manual area value used to find ROI for rectangular contours
        
            cv2.drawContours(image,[c], 0, (0,255,0), 3)
            n = approx.ravel()
            font = cv2.FONT_HERSHEY_SIMPLEX
            (x, y, w, h) = cv2.boundingRect(c)
            old_img = opening[y:y+h, x:x+w]  #selecting the ROI
            width, height = old_img.shape
            cropped_img = old_img[50:int(width/2), 0:height] #cropping half of the frame of ROI to just focus on the number
            
            new = reader.readtext(cropped_img)   #reading text using easyocr
            if(new == []): 
                text = 'none'
            else:
                text = new
                print(text)
#                 cv2.rectangle(cropped_img, tuple(text[0][0][0]), tuple(text[0][0][2]), (0, 0, 0), 2)
                if(text[0][2] > 0.5): #checking the confidence level
                    
                    cv2.putText(cropped_img, text[0][1], org, font, fontScale, color, thickness, cv2.LINE_AA)        
            cv2.imshow('frame1',cropped_img)
    key = cv2.waitKey(5) 

    if key == 27:
        break

cv2.waitKey(0)
cv2.destroyAllWindows()

原文

I am trying to read images from a camera module and so far I got to process the image this way using adaptive filtering. Besides, I did a lot of manipulation to crop the ROI and read the text. However, it is reading the number but not the units beside the numbers, which are comparatively small in size. How do I solve this problem?

import easyocr 
import cv2
import numpy as np

import matplotlib.pyplot as plt
import time
import urllib.request
url = 'http://192.168.137.108/cam-hi.jpg'
while True:
    img_resp=urllib.request.urlopen(url)
    imgnp=np.array(bytearray(img_resp.read()),dtype=np.uint8)
    image = cv2.imdecode(imgnp,-1)
    image = cv2.medianBlur(image,7)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)    #to gray convert
    th3 = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
                cv2.THRESH_BINARY,11,2) #adaptive threshold gaussian filter used
    kernel = np.ones((5,5),np.uint8)
    opening = cv2.morphologyEx(th3, cv2.MORPH_OPEN, kernel)
    

    x = 0   #to save the position, width and height for contours(later used)
    y = 0
    w = 0
    h = 0

    cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    threshold =  10
    font = cv2.FONT_HERSHEY_SIMPLEX  
    org = (50, 50) 
    fontScale = 1 
    color = (0, 0, 0)
    thickness = 2
        
    for c in cnts:
        
        approx = cv2.approxPolyDP(c,0.01*cv2.arcLength(c,True),True)
        area = cv2.contourArea(c)   
        if  len(approx) == 4 and area > 100000:   #manual area value used to find ROI for rectangular contours
        
            cv2.drawContours(image,[c], 0, (0,255,0), 3)
            n = approx.ravel()
            font = cv2.FONT_HERSHEY_SIMPLEX
            (x, y, w, h) = cv2.boundingRect(c)
            old_img = opening[y:y+h, x:x+w]  #selecting the ROI
            width, height = old_img.shape
            cropped_img = old_img[50:int(width/2), 0:height] #cropping half of the frame of ROI to just focus on the number
            
            new = reader.readtext(cropped_img)   #reading text using easyocr
            if(new == []): 
                text = 'none'
            else:
                text = new
                print(text)
#                 cv2.rectangle(cropped_img, tuple(text[0][0][0]), tuple(text[0][0][2]), (0, 0, 0), 2)
                if(text[0][2] > 0.5): #checking the confidence level
                    
                    cv2.putText(cropped_img, text[0][1], org, font, fontScale, color, thickness, cv2.LINE_AA)        
            cv2.imshow('frame1',cropped_img)
    key = cv2.waitKey(5) 

    if key == 27:
        break

cv2.waitKey(0)
cv2.destroyAllWindows()

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

迷雾森÷林ヴ 2025-02-11 09:24:51

这是我能得到的最好的。希腊符号' mu '被确定为' p '。我还尝试搜索与easyocr相关的希腊语言模型，但找不到任何。

这是我所做的：

在选定的轮廓上执行了大小图像
，并将其裁剪为最大的图像，然后将
裁剪的图像转换为实验室颜色空间
手动执行的A-enchannel上的二进制阈值，

我得到了以下内容：

将此图像作为输入传递给easyocr：

from easyocr import Reader
reader = Reader(['en'])

# input is the cropped image
results = reader.readtext(crop_img)

# convert to LAB space
lab = cv2.cvtColor(crop_img, cv2.COLOR_BGR2LAB)

# threshold on A-channel
r,th = cv2.threshold(lab[:,:,1],125,255,cv2.THRESH_BINARY_INV)

# create copy of cropped image
crop_img2 = crop_img.copy()

# draw only first 5 results for clarity
# borrowed from: https://pyimagesearch.com/2020/09/14/getting-started-with-easyocr-for-optical-character-recognition/
for (bbox, text, prob) in results[:5]:
  (tl, tr, br, bl) = bbox
  tl = (int(tl[0]), int(tl[1]))
  tr = (int(tr[0]), int(tr[1]))
  br = (int(br[0]), int(br[1]))
  bl = (int(bl[0]), int(bl[1]))
  crop_img2 = cv2.rectangle(crop_img2, tl, br, (0, 0, 255), 3)
  crop_img2 = cv2.putText(crop_img2, text, (tl[0], tl[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (0, 0, 0), 5)

This is the best I could get. The Greek symbol 'mu' is identified as 'p'. I also tried searching for Greek language model related to easyocr but could not find any.

Here is what I did:

Performed Otsu Threshold on the entire image
Selected contour with largest area and cropped it
Converted the cropped image to LAB color space
Manually performed binary threshold on A-channel

I got the following:

Passed this image as input to easyocr:

from easyocr import Reader
reader = Reader(['en'])

# input is the cropped image
results = reader.readtext(crop_img)

# convert to LAB space
lab = cv2.cvtColor(crop_img, cv2.COLOR_BGR2LAB)

# threshold on A-channel
r,th = cv2.threshold(lab[:,:,1],125,255,cv2.THRESH_BINARY_INV)

# create copy of cropped image
crop_img2 = crop_img.copy()

# draw only first 5 results for clarity
# borrowed from: https://pyimagesearch.com/2020/09/14/getting-started-with-easyocr-for-optical-character-recognition/
for (bbox, text, prob) in results[:5]:
  (tl, tr, br, bl) = bbox
  tl = (int(tl[0]), int(tl[1]))
  tr = (int(tr[0]), int(tr[1]))
  br = (int(br[0]), int(br[1]))
  bl = (int(bl[0]), int(bl[1]))
  crop_img2 = cv2.rectangle(crop_img2, tl, br, (0, 0, 255), 3)
  crop_img2 = cv2.putText(crop_img2, text, (tl[0], tl[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (0, 0, 0), 5)

回复收藏 0 原文

雾里花 2025-02-11 09:24:51

如果您尝试清除图像并将路径传递到下面的方法，则可以尝试

def text_extraction(image, lang_code='en'):
    reader = easyocr.Reader([lang_code], gpu=False)
    roi = cv2.imread(image)#[85:731, 265:1275]
    output = reader.readtext(roi)
    # it returns list of tuple with ([x,y coordinates],text,text_threshold)
    return output

If you try to clear the image and pass path to below method it work try

def text_extraction(image, lang_code='en'):
    reader = easyocr.Reader([lang_code], gpu=False)
    roi = cv2.imread(image)#[85:731, 265:1275]
    output = reader.readtext(roi)
    # it returns list of tuple with ([x,y coordinates],text,text_threshold)
    return output

回复收藏 0 原文

~没有更多了~