cv.solvePnP估计人脸姿态时,对输入数据不响应?
from __future__ import print_function
import os
import cv2
import sys
import numpy as np
import math
import json
# 3D model points.
MODEL_POINTS_68 = np.array([
[-53.30021857, -15.27635071, 60.43970857],
[-52.85842643, -1.810875, 59.18390214],
[-51.25744357, 11.67454214, 58.42188571],
[-48.626315, 24.63487214, 57.20655286],
[-43.58355, 36.41352214, 53.98811286],
[-35.42554, 46.354015, 48.35620143],
[-25.24848857, 54.04395214, 39.57465214],
[-13.64442429, 59.62245214, 28.96935357],
[ -0.80573857, 61.05416214, 25.76168714],
[ 11.60732, 59.40927786, 30.09232929],
[ 22.44441643, 53.81683, 40.08901643],
[ 32.24683929, 46.23280714, 48.37045 ],
[ 40.083355, 36.30976214, 53.26906357],
[ 45.11549929, 24.19315786, 55.59597786],
[ 47.84661071, 11.00381857, 56.59250643],
[ 49.47115857, -2.32260857, 57.92187643],
[ 50.10645643, -15.67903571, 60.06320429],
[-44.53299214, -29.24790071, 36.57320143],
[-37.510265, -35.96803857, 31.583005 ],
[-27.87970214, -38.27258143, 26.70722214],
[-18.03538357, -37.58489857, 21.67254214],
[ -9.18749714, -34.48015571, 17.84333 ],
[ 7.73560929, -34.98320857, 17.78346857],
[ 17.05676643, -38.20546071, 21.16640143],
[ 26.50843214, -38.83083714, 25.66079071],
[ 35.68873143, -36.34843286, 30.21249643],
[ 42.01933429, -29.83979571, 34.68843286],
[ -0.40917357, -24.128035, 16.83373214],
[ -0.30141, -16.12798571, 11.28371929],
[ -0.16755643, -8.23575357, 5.47430286],
[ 0., 0., -0. ],
[-10.55176571, 1.85589643, -14.38000214],
[ -5.12874214, 3.39399214, -16.81191714],
[ 0.39708571, 4.68778571, -18.53174857],
[ 5.90892786, 3.32928929, -16.92552929],
[ 10.86739357, 1.88789, -14.89868357],
[-32.89092143, -26.76529357, 5.027135 ],
[-26.91049143, -30.52179286, 2.15801214],
[-19.91704, -30.50822643, 0.96687786],
[-14.03447714, -26.25338714, -0.07934857],
[-20.194975, -25.09606643, -0.105195 ],
[-27.20172714, -24.94217357, 1.05472286],
[ 13.76133429, -26.45164714, -0.47553286],
[ 19.92442214, -30.95888929, 0.1769 ],
[ 26.74109214, -30.79344429, 1.21173929],
[ 32.26486071, -27.20465357, 3.49583071],
[ 27.28318143, -25.38001714, 0.202115 ],
[ 20.54642071, -25.34592071, -0.837625 ],
[-20.65447643, 20.43765429, -1.60022143],
[-12.52371, 15.83727643, -11.38166786],
[ -4.77470714, 13.59217929, -16.15096786],
[ 0.27214357, 14.80079857, -16.96316929],
[ 5.98245929, 13.59675714, -16.22999643],
[ 13.48329857, 15.99579214, -11.150485 ],
[ 20.56743714, 20.05708857, -2.29813786],
[ 13.61255286, 25.92732, -10.70571214],
[ 6.39741071, 28.31041071, -16.110175 ],
[ 0.272535, 28.85403357, -16.85116143],
[ -5.30635357, 28.454575, -16.00436143],
[-12.97188143, 26.19849929, -10.80136214],
[-17.41249286, 20.48412214, -3.41834571],
[ -4.92688071, 18.19712571, -14.92410143],
[ 0.24333071, 18.58162071, -15.87177071],
[ 6.03194429, 18.09014143, -15.01822857],
[ 17.48176643, 20.23072, -4.08055429],
[ 6.03511857, 21.85444, -14.76534929],
[ 0.14665857, 22.43481286, -15.64547857],
[ -5.14161857, 22.03205429, -14.52001571]], dtype=float)
class PoseEstimator:
"""Estimate head pose according to the facial landmarks"""
def __init__(self, img_size=(192, 256)):
self.size = img_size
self.landmarks_general_68 = MODEL_POINTS_68
self.focal_length = 962
# self.focal_length = self.size[1]
self.camera_center = (self.size[1] / 2, self.size[0] / 2)
self.camera_matrix = np.array(
[[self.focal_length, 0, self.camera_center[0]],
[0, self.focal_length, self.camera_center[1]],
[0, 0, 1]], dtype=float)
# Assuming no lens distortion
self.dist_coeefs = np.zeros((4, 1))
# Rotation vector and translation vector; 原代码给它设置了初值,不知道怎么来的?
"""self.r_vec = np.array([[0], [0], [0]], dtype=float)
self.t_vec = np.array([[0], [0], [0]], dtype=float)"""
self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
self.t_vec = np.array([[-14.97821226], [-10.62040383], [-2053.03596872]])
def get_euler_angle(self, rotation_vector):
# calc rotation angles
theta = cv2.norm(rotation_vector, cv2.NORM_L2)
# transform to quaterniond
w = math.cos(theta / 2)
x = math.sin(theta / 2)*rotation_vector[0][0] / theta
y = math.sin(theta / 2)*rotation_vector[1][0] / theta
z = math.sin(theta / 2)*rotation_vector[2][0] / theta
# pitch (x-axis rotation)
t0 = 2.0 * (w*x + y*z)
t1 = 1.0 - 2.0*(x**2 + y**2)
pitch = math.atan2(t0, t1)
# yaw (y-axis rotation)
t2 = 2.0 * (w*y - z*x)
if t2 > 1.0:
t2 = 1.0
if t2 < -1.0:
t2 = -1.0
yaw = math.asin(t2)
# roll (z-axis rotation)
t3 = 2.0 * (w*z + x*y)
t4 = 1.0 - 2.0*(y**2 + z**2)
roll = math.atan2(t3, t4)
return pitch, yaw, roll
def solve_pose_by_68_points(self, image_points):
print(self.landmarks_general_68.shape)
print(image_points.shape)
success, rotation_vector, translation_vector = cv2.solvePnP(
self.landmarks_general_68,
image_points,
self.camera_matrix,
self.dist_coeefs,
rvec=self.r_vec,
tvec=self.t_vec,
flags=cv2.SOLVEPNP_ITERATIVE,
useExtrinsicGuess=True)
#flags=SOLVEPNP_EPNP)
print(success)
print(self.r_vec)
return rotation_vector, translation_vector
if __name__ == "__main__":
img = cv2.imread(IMAGE_PATH)
img = cv2.resize(img, (192, 256))
landmarks_load = json.load(open(LANDMARKS_ROOT, 'r'))
landmarks_98 = get_landmarks(IMAGE_PATH, landmarks_load)
landmarks_98 = np.array(landmarks_98, dtype=float)*4
landmarks_68 = get_68from98(landmarks_98)
print(landmarks_68)
pose_estimator = PoseEstimator(img_size=img.shape)
rotation_vector, translation_vector = pose_estimator.solve_pose_by_68_points(landmarks_68)
pitch, yaw, roll = pose_estimator.get_euler_angle(rotation_vector)
def _radian2angle(r):
return (r/math.pi)*180
Y, X, Z = map(_radian2angle, [pitch, yaw, roll])
line = 'Y:{:.1f}\nX:{:.1f}\nZ:{:.1f}'.format(Y,X,Z)
print('{},{}'.format(os.path.basename(IMAGE_PATH), line.replace('\n',',')))
print('yaw:{}, pitch:{}, roll:{}'.format(yaw, pitch, roll))
print('Translation vector:{}'.format(translation_vector))
参照https://blog.csdn.net/ChuiGeD...,进行人脸姿态估计,使用cv.solvePnP函数将68点标准3D人脸模型拟合图片上预标注的68点2D关键点。
我的代码修改了参考代码中r_vec与t_vec的初始值;将68个3D点进行了坐标轴方向变换(x轴左负右正、y轴上负下正、z轴近负远正),原点变换(鼻尖,点[30]),尺度缩放(近似图片中人脸的平均尺寸)。根据KDEF数据集的相机参数推算了焦距,其余相机内参维持了原始代码的值。
经测试发现,计算得到的角度完全不符合实际,且采用不同姿态的图片数据输入,获取的输出差别很细微。
而当我大幅度修改焦距、或者将r_vec、t_vec还原为参考代码的初值时,结果会发生显著变化且仍错误,且仍不对图片输入的变化起反应。
r_vec与t_vec根据函数描述似乎是用来存储输出结果的容器,不理解为什么它们初值的变化会影响输出结果?感觉是函数没有完成预期的计算效果?
人脸姿态估计这一步我也不需要做得很精确,但是对我项目的后续工作不可或缺,在网上查了一圈基本是用的这个cv函数模块。请问最好有使用过这个函数或者完成过类似姿态估计任务的大佬,分析一下问题或者分享一下经验?谢谢!!~~
后续我会贴出测试用的正面脸、侧面脸、3D点截取前两维的3D点数据,作为测试输入。这样的话,2D点的输入处理函数就可以忽略。
正面人脸:
LANDMARKS_68_S = np.array([
[43.0, 126.66666412353516],
[42.0, 133.33333587646484],
[46.0, 148.0],
[47.0, 162.66666412353516],
[50.0, 177.33333587646484],
[57.0, 189.3333282470703],
[68.0, 198.6666717529297],
[83.0, 211.1111094156901],
[97.0, 217.3333282470703],
[109.0, 210.66666412353516],
[120.0, 201.3333282470703],
[128.0, 192.0],
[138.0, 181.33333587646484],
[142.0, 168.0],
[146.0, 156.0],
[83.0, 211.1111094156901],
[143.0, 121.33333587646484],
[45.0, 121.33333587646484],
[54.0, 114.66666793823242],
[66.0, 116.0],
[73.0, 117.33333587646484],
[82.0, 118.66666412353516],
[106.0, 118.66666412353516],
[114.0, 118.66666793823242],
[121.0, 118.66666793823242],
[129.0, 112.0],
[137.0, 118.66666412353516],
[93.0, 129.3333282470703],
[93.0, 134.6666717529297],
[95.0, 150.6666717529297],
[93.0, 158.6666717529297],
[81.0, 161.3333282470703],
[89.0, 164.0],
[95.0, 169.3333282470703],
[103.0, 164.0],
[109.0, 158.6666717529297],
[57.0, 132.0],
[61.0, 124.0],
[75.0, 126.66666412353516],
[81.0, 129.3333282470703],
[75.0, 137.3333282470703],
[65.0, 137.3333282470703],
[107.0, 132.0],
[111.0, 126.66666412353516],
[121.0, 124.0],
[131.0, 124.0],
[127.0, 129.3333282470703],
[113.0, 132.0],
[69.0, 172.0],
[83.0, 169.3333282470703],
[91.0, 169.3333282470703],
[93.0, 169.3333282470703],
[101.0, 166.6666717529297],
[109.0, 169.3333282470703],
[121.0, 169.3333282470703],
[115.0, 182.6666717529297],
[105.0, 188.0],
[97.0, 188.0],
[85.0, 190.6666717529297],
[79.0, 182.6666717529297],
[73.0, 172.0],
[85.0, 172.0],
[93.0, 172.0],
[105.0, 174.6666717529297],
[117.0, 172.0],
[109.0, 182.6666717529297],
[97.0, 185.3333282470703],
[83.0, 185.3333282470703]], dtype=float)
右转45度人脸:
LANDMARKS_68_HR = np.array([
[65.0, 121.33333587646484],
[66.0, 133.33333587646484],
[67.0, 148.0],
[68.0, 172.0],
[73.0, 181.3333282470703],
[84.0, 196.0],
[99.0, 202.6666717529297],
[117.66666666666667, 211.1111094156901],
[135.0, 209.3333282470703],
[139.0, 201.33333587646484],
[143.0, 192.0],
[147.0, 178.6666717529297],
[150.0, 169.33333587646484],
[155.0, 158.66666412353516],
[159.0, 153.33333587646484],
[117.66666666666667, 211.1111094156901],
[157.0, 126.66666412353516],
[95.0, 121.33333587646484],
[103.0, 120.0],
[115.0, 116.0],
[122.0, 116.0],
[131.0, 121.33333206176758],
[145.0, 121.33333206176758],
[149.0, 122.66666793823242],
[154.0, 118.66666793823242],
[156.0, 117.33333587646484],
[159.0, 113.33333587646484],
[143.0, 132.0],
[143.0, 140.0],
[147.0, 153.3333282470703],
[149.0, 156.0],
[129.0, 161.3333282470703],
[133.0, 164.0],
[143.0, 164.0],
[145.0, 161.3333282470703],
[151.0, 161.3333282470703],
[101.0, 132.0],
[109.0, 124.0],
[123.0, 124.0],
[127.0, 132.0],
[121.0, 137.3333282470703],
[111.0, 134.6666717529297],
[145.0, 132.0],
[145.0, 126.66666412353516],
[153.0, 124.0],
[153.0, 124.0],
[153.0, 129.3333282470703],
[145.0, 129.3333282470703],
[111.0, 172.0],
[121.0, 169.3333282470703],
[139.0, 174.6666717529297],
[143.0, 172.0],
[143.0, 174.6666717529297],
[145.0, 172.0],
[145.0, 172.0],
[141.0, 182.6666717529297],
[143.0, 188.0],
[139.0, 190.6666717529297],
[127.0, 185.3333282470703],
[119.0, 185.3333282470703],
[111.0, 172.0],
[127.0, 172.0],
[137.0, 172.0],
[143.0, 172.0],
[141.0, 172.0],
[143.0, 180.0],
[139.0, 185.3333282470703],
[121.0, 180.0]], dtype=float)
标准人脸:
LANDMARKS_68 = np.array([
[42.69978143, 112.72364929],
[43.14157357, 126.189125],
[44.74255643, 139.67454214],
[47.373685, 152.63487214],
[52.41645, 164.41352214],
[60.57446, 174.354015],
[70.75151143, 182.04395214],
[82.35557571, 187.62245214],
[95.19426143, 189.05416214000002],
[107.60732, 187.40927786],
[118.44441643, 181.81683],
[128.24683929, 174.23280714],
[136.08335499999998, 164.30976214],
[141.11549929, 152.19315785999999],
[143.84661071, 139.00381857],
[145.47115857, 125.67739143],
[146.10645643, 112.32096429],
[51.46700786, 98.75209929],
[58.489735, 92.03196143],
[68.12029786, 89.72741857],
[77.96461643, 90.41510142999999],
[86.81250286, 93.51984429000001],
[103.73560929, 93.01679143],
[113.05676643, 89.79453929],
[122.50843214, 89.16916286],
[131.68873143, 91.65156714],
[138.01933429000002, 98.16020429],
[95.59082643, 103.871965],
[95.69859, 111.87201429],
[95.83244357, 119.76424643],
[96.0, 128.0],
[85.44823429, 129.85589643],
[90.87125786, 131.39399214],
[96.39708571, 132.68778571],
[101.90892786, 131.32928929],
[106.86739357, 129.88789],
[63.10907857, 101.23470643],
[69.08950856999999, 97.47820714],
[76.08296, 97.49177356999999],
[81.96552286, 101.74661286],
[75.805025, 102.90393356999999],
[68.79827286, 103.05782643],
[109.76133429000001, 101.54835286],
[115.92442214, 97.04111071],
[122.74109214, 97.20655571],
[128.26486071, 100.79534643],
[123.28318143, 102.61998286],
[116.54642071, 102.65407929],
[75.34552357, 148.43765429],
[83.47629, 143.83727643],
[91.22529286, 141.59217929],
[96.27214357, 142.80079857],
[101.98245929, 141.59675714],
[109.48329857, 143.99579214],
[116.56743714, 148.05708857],
[109.61255286, 153.92732],
[102.39741071, 156.31041070999999],
[96.272535, 156.85403357],
[90.69364643, 156.454575],
[83.02811857, 154.19849929],
[78.58750714, 148.48412214],
[91.07311929, 146.19712571],
[96.24333071, 146.58162071],
[102.03194429, 146.09014143],
[113.48176643, 148.23072],
[102.03511857, 149.85444],
[96.14665857, 150.43481286],
[90.85838143, 150.03205429]], dtype=float)
目前的估计效果,绿点为预标注的2D关键点;人脸图片仅为展示,不参与估计运算。(正面图上传失败,试了几次了;稍后再试试。。)
正面脸:Y:10.9 X:-2.7 Z:-179.2
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论