将现实世界图片与 3D 模型匹配不准确的问题

发布于 2025-01-09 20:16:22 字数 5906 浏览 1 评论 0原文

假设我有一个固定的物理相机和静态场景。我对物理世界进行了点云扫描，因此我可以使用基本曲面和立方体对现实世界进行简单的重建。统一简单重建点云扫描

下一步是使用棋盘和 PnP 计算真实世界的相机姿态。经过计算，我使用得到的Tvec、Rvec和ProjectPoint以世界单位绘制了一个虚拟立方体，它显示得很完美，表明相机位姿在opencv框架内是有效的。

在 PnP 之后验证相机姿势

但是，当我将生成的相机转换放入 Unity 中时，相机转换与现实世界的估计相比，似乎偏离了半米。理想情况下，我想要实现的是现实世界图像和统一相机视图图像（这是物理世界的数字孪生）之间的像素完美对齐。

Tvec Rvec

感谢您提前提供的见解！

下面是calculatePnP的代码

import numpy as np
import cv2
import glob
import os
from scipy.spatial.transform import Rotation

# Used to draw standard axis
# https://numpy.org/doc/stable/reference/generated/numpy.ravel.html
def draw(img, corners, imgPoints):
    corner = tuple(corners[0].ravel())
    print("int(corner[0])):\n {0}".format(int(corner[0])))
    print("int(corner[1])):\n {0}".format(int(corner[1])))
    print("int(tuple(imgPoints[0].ravel())[0]):\n {0}".format(int(tuple(imgPoints[0].ravel())[0])))
    print("int(tuple(imgPoints[0].ravel())[1]):\n {0}".format(int(tuple(imgPoints[0].ravel())[1])))
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[0].ravel())[0]),int(tuple(imgPoints[0].ravel())[1])), (255,0,0), 5)
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[1].ravel())[0]),int(tuple(imgPoints[1].ravel())[1])), (0,255,0), 5)
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[2].ravel())[0]),int(tuple(imgPoints[2].ravel())[1])), (0,0,255), 5)
    return img

# used to draw a standard cube (1,1,1)
# opencv official
def drawCube(img, corners, imgpts):
    imgpts = np.int32(imgpts).reshape(-1,2)
    # draw ground floor in green
    img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
    # draw pillars in blue color
    for i,j in zip(range(4),range(4,8)):
        img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
    # draw top layer in red color
    img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
    return img

# Load the camera calibration data
os.chdir('C:/Users/')
with np.load('opencvcalib.npz') as calibData:
    mtx, dist, rvecs, tvecs = [calibData[i] for i in ('mtx', 'dist', 'rvecs', 'tvecs')]

print("Previously calibrated dist:\n {0}".format(dist))
print("mtx:\n {0}".format(mtx))
# Define the chess board rows and columns
rows = 9
cols = 6

# Set the termination criteria for the corner sub-pixel algorithm
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS, 30, 0.001)

# Prepare the object points: (0,0,0), (1,0,0), (2,0,0), ..., (6,5,0). They are the same for all images
objectPoints = np.zeros((rows * cols,3), np.float32)
objectPoints[:,  :2] = np.mgrid[0:rows, 0:cols].T.reshape(-1, 2)
print("objpts before divide:\n {0}".format(objectPoints))
#scale object points to real world, 1unit = 0.034m, should divide by 29.41176470588235
objectPoints = objectPoints / 29.41176470588235
print("divided objpts:\n {0}".format(objectPoints))

# Create the axis points, unit is meters, here shortest X axis is 10cm.
axisPoints = np.float32([[0.1, 0, 0], [0, 0.2, 0], [0, 0, -0.3]]).reshape(-1, 3)
#this unit is per checkerboard square
#axisPoints = np.float32([[1, 0, 0], [0, 2, 0], [0, 0, -5]]).reshape(-1, 3)

# Loop over the image files
os.chdir('C:/Users/Calibration/extrinsics')
img = cv2.imread("ext3.jpg");
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Find the chess board corners, and visualize
ret, corners = cv2.findChessboardCorners(gray, (rows, cols), None)
imgchkbrd = cv2.drawChessboardCorners(img, (9,6), corners, ret)

cv2.imwrite('corners.jpg', imgchkbrd)


# Make sure the chess board pattern was found in the image
if ret == True:
    # Refine the corner position
    corners = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)

    # Find the rotation and translation vectors
    val, rvecs, tvecs, inliers = cv2.solvePnPRansac(objectPoints, corners, mtx, dist)
    #success, rvecs, tvecs = cv2.solvePnP(objectPoints, corners, mtx, dist,flags=cv2.SOLVEPNP_ITERATIVE)
    #print("objpts:\n {0}".format(objectPoints))
    print("corners:\n {0}".format(corners))
    print ("Rotation Vector:\n {0}".format(rvecs))
    print ("Translation Vector:\n {0}".format(tvecs))

    #https://stackoverflow.com/questions/16265714/camera-pose-estimation-opencv-pnp
    Rt = cv2.Rodrigues(rvecs)
    print ("Rt tuple:\n {0}".format(Rt))
    R = Rt.transpose()# 'tuple' object has no attribute 'transpose'
    pos = -R * tvecs #pos is the position of the camera expressed in the global frame
    roll = atan2(-R[2][1], R[2][2])
    pitch = asin(R[2][0])
    yaw = atan2(-R[1][0], R[0][0])
    print ("pos of camera:\n {0}".format(pos))
    #position of camera would be {- transpose( r ) * t }

    #quaternion
    r = Rotation.from_rotvec(rvecs.T)
    quaternion = r.as_quat()
    print("Quaternion1:\n {0}".format(quaternion))
 RotationMatrix,_ = cv2.Rodrigues(rvecs)
    print("RotationMatrix:\n {0}".format(RotationMatrix))

    # Project the 3D axis points to the image plane
    axisImgPoints, jac = cv2.projectPoints(axisPoints, rvecs, tvecs, mtx, dist)
        
        # Draw the axis lines
    img = draw(img, corners, axisImgPoints)

    #render a cube
    CubeAxis = np.float32([[0,0,0], [0,0.034,0], [0.034,0.034,0], [0.034,0,0],
                   [0,0,-0.034],[0,0.034,-0.034],[0.034,0.034,-0.034],[0.034,0,-0.034] ])
    axisImgPoints, jac = cv2.projectPoints(CubeAxis, rvecs, tvecs, mtx, dist)
    img2 = drawCube(img, corners, axisImgPoints)
    
    # Display the image
    cv2.imshow('chess board', img2)
    cv2.imwrite('checkerboardpnp3.png', img2)
    cv2.waitKey(0)

cv2.destroyAllWindows()

原文

Suppose that I have a fixed physical camera and static scene. I have a point cloud scan of the physical world, so I can use basic surfaces and cubes to perform a simple reconstruction of the real world.
Simple reconstruction in unity
Pointcloud scan

Next step is calculate real world camera pose using checkerboard and PnP. After calculation, I used the resulting Tvec, Rvec, and ProjectPoint to draw a virtual cube in world unit, it shows up perfectly, showing that the camera pose is valid within opencv framework.

Verify camera pose after PnP

However, when I put the resulting camera transformation in Unity, the camera translation seems to be off by half a meter compared to the physical world estimate. Ideally what I would like to achieve is a pixel-perfect alignment between a real world image and a unity camera view image (which is a digital twin of the physical world).

Tvec Rvec

Thank you for your insights in advance!

Below is code for calculatePnP

import numpy as np
import cv2
import glob
import os
from scipy.spatial.transform import Rotation

# Used to draw standard axis
# https://numpy.org/doc/stable/reference/generated/numpy.ravel.html
def draw(img, corners, imgPoints):
    corner = tuple(corners[0].ravel())
    print("int(corner[0])):\n {0}".format(int(corner[0])))
    print("int(corner[1])):\n {0}".format(int(corner[1])))
    print("int(tuple(imgPoints[0].ravel())[0]):\n {0}".format(int(tuple(imgPoints[0].ravel())[0])))
    print("int(tuple(imgPoints[0].ravel())[1]):\n {0}".format(int(tuple(imgPoints[0].ravel())[1])))
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[0].ravel())[0]),int(tuple(imgPoints[0].ravel())[1])), (255,0,0), 5)
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[1].ravel())[0]),int(tuple(imgPoints[1].ravel())[1])), (0,255,0), 5)
    img = cv2.line(img, (int(corner[0]), int(corner[1])), (int(tuple(imgPoints[2].ravel())[0]),int(tuple(imgPoints[2].ravel())[1])), (0,0,255), 5)
    return img

# used to draw a standard cube (1,1,1)
# opencv official
def drawCube(img, corners, imgpts):
    imgpts = np.int32(imgpts).reshape(-1,2)
    # draw ground floor in green
    img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
    # draw pillars in blue color
    for i,j in zip(range(4),range(4,8)):
        img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
    # draw top layer in red color
    img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
    return img

# Load the camera calibration data
os.chdir('C:/Users/')
with np.load('opencvcalib.npz') as calibData:
    mtx, dist, rvecs, tvecs = [calibData[i] for i in ('mtx', 'dist', 'rvecs', 'tvecs')]

print("Previously calibrated dist:\n {0}".format(dist))
print("mtx:\n {0}".format(mtx))
# Define the chess board rows and columns
rows = 9
cols = 6

# Set the termination criteria for the corner sub-pixel algorithm
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS, 30, 0.001)

# Prepare the object points: (0,0,0), (1,0,0), (2,0,0), ..., (6,5,0). They are the same for all images
objectPoints = np.zeros((rows * cols,3), np.float32)
objectPoints[:,  :2] = np.mgrid[0:rows, 0:cols].T.reshape(-1, 2)
print("objpts before divide:\n {0}".format(objectPoints))
#scale object points to real world, 1unit = 0.034m, should divide by 29.41176470588235
objectPoints = objectPoints / 29.41176470588235
print("divided objpts:\n {0}".format(objectPoints))

# Create the axis points, unit is meters, here shortest X axis is 10cm.
axisPoints = np.float32([[0.1, 0, 0], [0, 0.2, 0], [0, 0, -0.3]]).reshape(-1, 3)
#this unit is per checkerboard square
#axisPoints = np.float32([[1, 0, 0], [0, 2, 0], [0, 0, -5]]).reshape(-1, 3)

# Loop over the image files
os.chdir('C:/Users/Calibration/extrinsics')
img = cv2.imread("ext3.jpg");
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Find the chess board corners, and visualize
ret, corners = cv2.findChessboardCorners(gray, (rows, cols), None)
imgchkbrd = cv2.drawChessboardCorners(img, (9,6), corners, ret)

cv2.imwrite('corners.jpg', imgchkbrd)


# Make sure the chess board pattern was found in the image
if ret == True:
    # Refine the corner position
    corners = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)

    # Find the rotation and translation vectors
    val, rvecs, tvecs, inliers = cv2.solvePnPRansac(objectPoints, corners, mtx, dist)
    #success, rvecs, tvecs = cv2.solvePnP(objectPoints, corners, mtx, dist,flags=cv2.SOLVEPNP_ITERATIVE)
    #print("objpts:\n {0}".format(objectPoints))
    print("corners:\n {0}".format(corners))
    print ("Rotation Vector:\n {0}".format(rvecs))
    print ("Translation Vector:\n {0}".format(tvecs))

    #https://stackoverflow.com/questions/16265714/camera-pose-estimation-opencv-pnp
    Rt = cv2.Rodrigues(rvecs)
    print ("Rt tuple:\n {0}".format(Rt))
    R = Rt.transpose()# 'tuple' object has no attribute 'transpose'
    pos = -R * tvecs #pos is the position of the camera expressed in the global frame
    roll = atan2(-R[2][1], R[2][2])
    pitch = asin(R[2][0])
    yaw = atan2(-R[1][0], R[0][0])
    print ("pos of camera:\n {0}".format(pos))
    #position of camera would be {- transpose( r ) * t }

    #quaternion
    r = Rotation.from_rotvec(rvecs.T)
    quaternion = r.as_quat()
    print("Quaternion1:\n {0}".format(quaternion))
 RotationMatrix,_ = cv2.Rodrigues(rvecs)
    print("RotationMatrix:\n {0}".format(RotationMatrix))

    # Project the 3D axis points to the image plane
    axisImgPoints, jac = cv2.projectPoints(axisPoints, rvecs, tvecs, mtx, dist)
        
        # Draw the axis lines
    img = draw(img, corners, axisImgPoints)

    #render a cube
    CubeAxis = np.float32([[0,0,0], [0,0.034,0], [0.034,0.034,0], [0.034,0,0],
                   [0,0,-0.034],[0,0.034,-0.034],[0.034,0.034,-0.034],[0.034,0,-0.034] ])
    axisImgPoints, jac = cv2.projectPoints(CubeAxis, rvecs, tvecs, mtx, dist)
    img2 = drawCube(img, corners, axisImgPoints)
    
    # Display the image
    cv2.imshow('chess board', img2)
    cv2.imwrite('checkerboardpnp3.png', img2)
    cv2.waitKey(0)

cv2.destroyAllWindows()

分享到QQ

分享到微博