手把手写深度学习(27)：如果获得相机位姿态的plücker embedding？以RealEstate10K为例

手把手写深度学习(0)：专栏文章导航

前言：用plücker embedding表示相机的位姿是一种非常常用的方法，这篇博客以RealEstate10K数据集为例子，详细讲解如何从相机的轨迹坐标中获得plücker embedding，用于下一步模型的学习。

初始化相机参数

计算相机的相对位姿

读取轨迹文件中的相机位姿信息

将位姿信息解析为Camera对象列表

计算目标图像与原始位姿图像的宽高比

构建相机内参矩阵

根据轨迹文件获取Plücker嵌入

完整代码

示例文件

初始化相机参数

class Camera(object):def __init__(self, entry):"""初始化相机参数。参数:- entry: 一个包含相机内参和外参信息的列表。该方法从entry中提取相机的焦距和光心参数，并将其存储为实例变量。同时，它还根据entry中的外参信息构建相机在世界坐标系下的变换矩阵，并计算其逆矩阵。"""# 解包entry列表的前四个元素，对应相机内参的fx, fy, cx, cyfx, fy, cx, cy = entry[1:5]# 将解包的相机内参赋值给实例变量self.fx = fxself.fy = fyself.cx = cxself.cy = cy# 从entry列表的第7个元素开始，解包出3x4的矩阵，表示相机在外的空间变换w2c_mat = np.array(entry[7:]).reshape(3, 4)# 构建4x4单位矩阵，并将前3行的变换矩阵赋值给它，得到完整的相机到世界坐标的变换矩阵w2c_mat_4x4 = np.eye(4)w2c_mat_4x4[:3, :] = w2c_mat# 存储相机到世界坐标的变换矩阵作为实例变量self.w2c_mat = w2c_mat_4x4# 计算世界到相机坐标的变换矩阵的逆，得到相机到世界坐标的变换矩阵self.c2w_mat = np.linalg.inv(w2c_mat_4x4)

计算相机的相对位姿

def get_relative_pose(cam_params, zero_first_frame_scale):"""计算相机的相对位姿。该函数根据一组相机参数和是否对第一帧进行缩放的标志，计算出所有相机帧相对第一帧的位姿。参数:- cam_params: 一个列表，包含每个相机帧的参数，每个参数包括世界到相机的矩阵(w2c_mat)和相机到世界的矩阵(c2w_mat)。- zero_first_frame_scale: 布尔值，指示是否忽略第一帧的缩放信息。返回:- ret_poses: 相对位姿矩阵的列表，数组类型为numpy.float32。"""# 提取所有相机帧的绝对世界到相机矩阵和相机到世界矩阵abs_w2cs = [cam_param.w2c_mat for cam_param in cam_params]abs_c2ws = [cam_param.c2w_mat for cam_param in cam_params]# 使用第一个相机帧作为源相机的相机到世界矩阵source_cam_c2w = abs_c2ws[0]# 根据zero_first_frame_scale参数决定cam_to_origin的距离if zero_first_frame_scale:# 如果标志为真，相机到原点的距离为0cam_to_origin = 0else:# 如果标志为假，计算相机到原点的距离cam_to_origin = np.linalg.norm(source_cam_c2w[:3, 3])# 构建目标相机的相机到世界矩阵target_cam_c2w = np.array([[1, 0, 0, 0],[0, 1, 0, -cam_to_origin],[0, 0, 1, 0],[0, 0, 0, 1]])# 计算绝对坐标系到相对坐标系的转换矩阵abs2rel = target_cam_c2w @ abs_w2cs[0]# 计算所有相机帧相对第一帧的位姿ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]]# 将结果转换为numpy数组并返回ret_poses = np.array(ret_poses, dtype=np.float32)return ret_poses

读取轨迹文件中的相机位姿信息

    with open(trajectory_file, 'r') as f:poses = f.readlines()poses = [pose.strip().split(' ') for pose in poses[1:]]

将位姿信息解析为Camera对象列表

    cam_params = [[float(x) for x in pose] for pose in poses]cam_params = [Camera(cam_param) for cam_param in cam_params]

计算目标图像与原始位姿图像的宽高比

    # 计算目标图像与原始位姿图像的宽高比，并据此调整相机参数sample_wh_ratio = image_width / image_heightpose_wh_ratio = original_pose_width / original_pose_heightif pose_wh_ratio > sample_wh_ratio:resized_ori_w = image_height * pose_wh_ratiofor cam_param in cam_params:cam_param.fx = resized_ori_w * cam_param.fx / image_widthelse:resized_ori_h = image_width / pose_wh_ratiofor cam_param in cam_params:cam_param.fy = resized_ori_h * cam_param.fy / image_height

构建相机内参矩阵

    intrinsic = np.asarray([[cam_param.fx * image_width,cam_param.fy * image_height,cam_param.cx * image_width,cam_param.cy * image_height]for cam_param in cam_params], dtype=np.float32)K = torch.as_tensor(intrinsic)[None]  # [1, 1, 4]

根据轨迹文件获取Plücker嵌入

def get_plucker_embedding(trajectory_file, image_width, image_height, original_pose_width, original_pose_height, device='cpu'):"""根据轨迹文件获取Plücker嵌入。参数:- trajectory_file: 轨迹文件路径，文件中包含相机的位姿信息。- image_width: 目标图像的宽度。- image_height: 目标图像的高度。- original_pose_width: 原始位姿图像的宽度。- original_pose_height: 原始位姿图像的高度。- device: 计算设备，默认为'cpu'。返回:- plucker_embedding: Plücker嵌入张量。"""# 读取轨迹文件中的相机位姿信息with open(trajectory_file, 'r') as f:poses = f.readlines()poses = [pose.strip().split(' ') for pose in poses[1:]]# 将位姿信息解析为Camera对象列表cam_params = [[float(x) for x in pose] for pose in poses]cam_params = [Camera(cam_param) for cam_param in cam_params]# 计算目标图像与原始位姿图像的宽高比，并据此调整相机参数sample_wh_ratio = image_width / image_heightpose_wh_ratio = original_pose_width / original_pose_heightif pose_wh_ratio > sample_wh_ratio:resized_ori_w = image_height * pose_wh_ratiofor cam_param in cam_params:cam_param.fx = resized_ori_w * cam_param.fx / image_widthelse:resized_ori_h = image_width / pose_wh_ratiofor cam_param in cam_params:cam_param.fy = resized_ori_h * cam_param.fy / image_height# 构建相机内参矩阵intrinsic = np.asarray([[cam_param.fx * image_width,cam_param.fy * image_height,cam_param.cx * image_width,cam_param.cy * image_height]for cam_param in cam_params], dtype=np.float32)K = torch.as_tensor(intrinsic)[None]  # [1, 1, 4]# 获取相机间的相对位姿，并转换为张量c2ws = get_relative_pose(cam_params, zero_first_frame_scale=True)c2ws = torch.as_tensor(c2ws)[None]  # [1, n_frame, 4, 4]# 计算Plücker嵌入，并调整张量维度顺序及存储连续性plucker_embedding = ray_condition(K, c2ws, image_height, image_width, device=device)       # b f h w 6plucker_embedding = plucker_embedding.permute(0, 1, 4, 2, 3).contiguous().to(device=device)return plucker_embedding

完整代码

import torch
import numpy as np
from packaging import version as pverclass Camera(object):def __init__(self, entry):"""初始化相机参数。参数:- entry: 一个包含相机内参和外参信息的列表。该方法从entry中提取相机的焦距和光心参数，并将其存储为实例变量。同时，它还根据entry中的外参信息构建相机在世界坐标系下的变换矩阵，并计算其逆矩阵。"""# 解包entry列表的前四个元素，对应相机内参的fx, fy, cx, cyfx, fy, cx, cy = entry[1:5]# 将解包的相机内参赋值给实例变量self.fx = fxself.fy = fyself.cx = cxself.cy = cy# 从entry列表的第7个元素开始，解包出3x4的矩阵，表示相机在外的空间变换w2c_mat = np.array(entry[7:]).reshape(3, 4)# 构建4x4单位矩阵，并将前3行的变换矩阵赋值给它，得到完整的相机到世界坐标的变换矩阵w2c_mat_4x4 = np.eye(4)w2c_mat_4x4[:3, :] = w2c_mat# 存储相机到世界坐标的变换矩阵作为实例变量self.w2c_mat = w2c_mat_4x4# 计算世界到相机坐标的变换矩阵的逆，得到相机到世界坐标的变换矩阵self.c2w_mat = np.linalg.inv(w2c_mat_4x4)def custom_meshgrid(*args):if pver.parse(torch.__version__) < pver.parse('1.10'):return torch.meshgrid(*args)else:return torch.meshgrid(*args, indexing='ij')def get_relative_pose(cam_params, zero_first_frame_scale):"""计算相机的相对位姿。该函数根据一组相机参数和是否对第一帧进行缩放的标志，计算出所有相机帧相对第一帧的位姿。参数:- cam_params: 一个列表，包含每个相机帧的参数，每个参数包括世界到相机的矩阵(w2c_mat)和相机到世界的矩阵(c2w_mat)。- zero_first_frame_scale: 布尔值，指示是否忽略第一帧的缩放信息。返回:- ret_poses: 相对位姿矩阵的列表，数组类型为numpy.float32。"""# 提取所有相机帧的绝对世界到相机矩阵和相机到世界矩阵abs_w2cs = [cam_param.w2c_mat for cam_param in cam_params]abs_c2ws = [cam_param.c2w_mat for cam_param in cam_params]# 使用第一个相机帧作为源相机的相机到世界矩阵source_cam_c2w = abs_c2ws[0]# 根据zero_first_frame_scale参数决定cam_to_origin的距离if zero_first_frame_scale:# 如果标志为真，相机到原点的距离为0cam_to_origin = 0else:# 如果标志为假，计算相机到原点的距离cam_to_origin = np.linalg.norm(source_cam_c2w[:3, 3])# 构建目标相机的相机到世界矩阵target_cam_c2w = np.array([[1, 0, 0, 0],[0, 1, 0, -cam_to_origin],[0, 0, 1, 0],[0, 0, 0, 1]])# 计算绝对坐标系到相对坐标系的转换矩阵abs2rel = target_cam_c2w @ abs_w2cs[0]# 计算所有相机帧相对第一帧的位姿ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]]# 将结果转换为numpy数组并返回ret_poses = np.array(ret_poses, dtype=np.float32)return ret_posesdef get_plucker_embedding(trajectory_file, image_width, image_height, original_pose_width, original_pose_height, device='cpu'):"""根据轨迹文件获取Plücker嵌入。参数:- trajectory_file: 轨迹文件路径，文件中包含相机的位姿信息。- image_width: 目标图像的宽度。- image_height: 目标图像的高度。- original_pose_width: 原始位姿图像的宽度。- original_pose_height: 原始位姿图像的高度。- device: 计算设备，默认为'cpu'。返回:- plucker_embedding: Plücker嵌入张量。"""# 读取轨迹文件中的相机位姿信息with open(trajectory_file, 'r') as f:poses = f.readlines()poses = [pose.strip().split(' ') for pose in poses[1:]]# 将位姿信息解析为Camera对象列表cam_params = [[float(x) for x in pose] for pose in poses]cam_params = [Camera(cam_param) for cam_param in cam_params]# 计算目标图像与原始位姿图像的宽高比，并据此调整相机参数sample_wh_ratio = image_width / image_heightpose_wh_ratio = original_pose_width / original_pose_heightif pose_wh_ratio > sample_wh_ratio:resized_ori_w = image_height * pose_wh_ratiofor cam_param in cam_params:cam_param.fx = resized_ori_w * cam_param.fx / image_widthelse:resized_ori_h = image_width / pose_wh_ratiofor cam_param in cam_params:cam_param.fy = resized_ori_h * cam_param.fy / image_height# 构建相机内参矩阵intrinsic = np.asarray([[cam_param.fx * image_width,cam_param.fy * image_height,cam_param.cx * image_width,cam_param.cy * image_height]for cam_param in cam_params], dtype=np.float32)K = torch.as_tensor(intrinsic)[None]  # [1, 1, 4]# 获取相机间的相对位姿，并转换为张量c2ws = get_relative_pose(cam_params, zero_first_frame_scale=True)c2ws = torch.as_tensor(c2ws)[None]  # [1, n_frame, 4, 4]# 计算Plücker嵌入，并调整张量维度顺序及存储连续性plucker_embedding = ray_condition(K, c2ws, image_height, image_width, device=device)       # b f h w 6plucker_embedding = plucker_embedding.permute(0, 1, 4, 2, 3).contiguous().to(device=device)return plucker_embeddingif __name__ == "__main__":trajectory_file = "assets/pose_files/0f47577ab3441480_svd.txt"image_width = 576image_height = 320original_pose_width = 1280original_pose_height = 720plucker_embedding = get_plucker_embedding(trajectory_file, image_width, image_height, original_pose_width, original_pose_height)print("plucker_embedding: ", plucker_embedding.size())

示例文件

assets/pose_files/0f47577ab3441480_svd.txt 文件：

https://www.youtube.com/watch?v=in69BD2eZqg
196429567 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.998043656 -0.008898078 0.061884791 0.025798442 0.009229627 0.999944508 -0.005073697 0.590754668 -0.061836209 0.005634944 0.998070419 -0.233247137
196696500 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.997390270 -0.009296595 0.071597412 0.013903395 0.009683816 0.999940276 -0.005063088 0.639742116 -0.071546070 0.005743211 0.997420788 -0.192511620
196996800 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.996610820 -0.009389835 0.081723645 0.001640113 0.009820240 0.999939919 -0.004866215 0.671455284 -0.081673041 0.005652268 0.996643126 -0.133688656
197263733 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.996082366 -0.010138805 0.087847337 -0.016342035 0.010684925 0.999926388 -0.005748684 0.672634560 -0.087782584 0.006664804 0.996117353 -0.063974549
197564033 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.995849669 -0.009882330 0.090475440 -0.039230446 0.010261126 0.999940395 -0.003722523 0.652124926 -0.090433262 0.004635453 0.995891750 0.029309661
197830967 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.995805681 -0.010101050 0.090933874 -0.063966908 0.010504867 0.999936998 -0.003963242 0.612482840 -0.090888113 0.004901868 0.995849073 0.150119615
198131267 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.995871365 -0.010241830 0.090195827 -0.078531843 0.010570447 0.999939144 -0.003166437 0.555611063 -0.090157904 0.004106774 0.995918989 0.288647339
198398200 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.996041775 -0.010097718 0.088310808 -0.082279252 0.010292084 0.999945521 -0.001745854 0.514557838 -0.088288367 0.002647846 0.996091425 0.410873608
198698500 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.997259557 -0.009423457 0.073379643 -0.066031947 0.009896892 0.999932468 -0.006090916 0.457751923 -0.073317289 0.006800454 0.997285485 0.567670483
198965433 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.998894870 -0.008021683 0.046310849 -0.031951124 0.008127374 0.999964774 -0.002094371 0.405029944 -0.046292417 0.002468442 0.998924911 0.707543520
199265733 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.999987304 -0.004264246 -0.002686474 -0.003940793 0.004252980 0.999982178 -0.004185534 0.350251174 0.002704274 0.004174056 0.999987602 0.860228697
199532667 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.998511136 0.001477750 -0.054528367 0.001275705 -0.001786051 0.999982715 -0.005605645 0.289542627 0.054519139 0.005694689 0.998496473 1.006409043
199832967 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.991945446 0.009338647 -0.126321211 -0.042639243 -0.010065611 0.999936223 -0.005117786 0.198872046 0.126265362 0.006348064 0.991976202 1.178174631
200133267 0.507650910 0.902490531 0.500000000 0.500000000 0.000000000 0.000000000 0.973961055 0.020193946 -0.225814342 -0.167000046 -0.021393530 0.999767005 -0.002866175 0.129793413 0.225703865 0.007622509 0.974166155 1.328984423