一、引言
本次参考的是VirConv生成虚拟点的方法:
VirConv/tools/PENet at master · hailanyi/VirConv · GitHubVirtual Sparse Convolution for Multimodal 3D Object Detection - VirConv/tools/PENet at master · hailanyi/VirConvhttps://github.com/hailanyi/VirConv/tree/master/tools/PENet里面用到了一些小技巧,直接绕开评估,直接使用预训练的权重生成虚拟点。本文主要介绍如何使用各种各样的深度补全方法生成虚拟点,辅助3D目标检测:
二、更改推理py文件
不同深度补全更改代码的方式是不同,有些甚至非常难改,可惜不能一点通。整体套路都是一样的,首先我们需要知道的一点就是,如果我们仅生成虚拟点,那么gt标签,评估函数什么的都不重要了,想去掉就去掉。因此,我们最主要的就是如何读取我们自己的数据集以及如何准确的实现深度图到3D LiDAR坐标系的投影。
下面我们以当前SOTA LRRU为例,具体操作如下:
1、重写Dataloader
我们需要自己写一个读取KITTI 3D目标检测数据集的DataLoader,从而替换掉原来读取KITTI DC的DataLoader。主要更改的是读取方式,比如calib的读取,lidar的读取,以及图像的读取。(也不是说他的不好,主要是封装的太死了,我直接改用我写好的。)
调用:
if self.arg.dataset == 'kitti': # 我们新加的my_loaderrgb, dep = self.my_loader[index]gt = NoneK = Nonepaths = Noneelse: # 原来的读取方法dep, gt, K, rgb, paths = self.__getraw__(index)
下面是我参考VirConv的方法改好的完整代码:
class KittiDepthSemi(data.Dataset):"""A data loader for the Kitti dataset"""def __init__(self, split, args, arg):self.args = argsself.split = splitself.arg = argif arg.dataset == 'kitti-dc':self.paths = get_kittipaths(split, args)elif arg.dataset == 'kitti':self.my_loader = MyLoader(arg.detpath)self.transforms = kittitransformsself.ipfill = fill_in_fastdef __getraw__(self, index):dep = read_depth(self.paths['dep'][index]) if \(self.paths['dep'][index] is not None) else Nonegt = read_depth(self.paths['gt'][index]) if \self.paths['gt'][index] is not None else Nonergb = read_rgb(self.paths['rgb'][index]) if \(self.paths['rgb'][index] is not None) else Noneif self.paths['K'][index] is not None:if self.split == 'train' or (self.split == 'val' and self.args.val == 'full'):calib = read_calib_file(self.paths['K'][index])K_cam = Noneif 'image_02' in self.paths['rgb'][index]:K_cam = np.reshape(calib['P_rect_02'], (3, 4))elif 'image_03' in self.paths['rgb'][index]:K_cam = np.reshape(calib['P_rect_03'], (3, 4))K = [K_cam[0, 0], K_cam[1, 1], K_cam[0, 2], K_cam[1, 2]]else:f_calib = open(self.paths['K'][index], 'r')K_cam = f_calib.readline().split(' ')f_calib.close()K = [float(K_cam[0]), float(K_cam[4]), float(K_cam[2]),float(K_cam[5])]else:K = Nonereturn dep, gt, K, rgb, self.paths['dep'][index]def __getitem__(self, index):if self.arg.dataset == 'kitti':rgb, dep = self.my_loader[index]gt = NoneK = Nonepaths = Noneelse:dep, gt, K, rgb, paths = self.__getraw__(index)dep, gt, K, rgb = self.transforms(self.split, self.args, dep, gt, K, rgb)dep_np = dep.numpy().squeeze(0)dep_clear, _ = outlier_removal(dep_np)dep_clear = np.expand_dims(dep_clear, 0)dep_clear_torch = torch.from_numpy(dep_clear)# ip_basic filldep_np = dep.numpy().squeeze(0)dep_np_ip = np.copy(dep_np)dep_ip = self.ipfill(dep_np_ip, max_depth=100.0,extrapolate=True, blur_type='gaussian')dep_ip_torch = torch.from_numpy(dep_ip)dep_ip_torch = dep_ip_torch.to(dtype=torch.float32)# gt = dep_clear_torch if gt==None else gtcandidates = {'dep': dep, 'dep_clear': dep_clear_torch, 'gt': gt, 'rgb': rgb, 'ip': dep_ip_torch}items = {key: valfor key, val in candidates.items() if val is not None}if (self.args.debug_dp or self.args.test) and self.arg.dataset =='kitti-dc':items['d_path'] = pathsreturn itemsdef __len__(self):if self.args.toy_test:return self.args.toy_test_numberelse:try:return len(self.paths['gt'])except:return len(self.my_loader)
MyLoader:
class MyLoader():def __init__(self, root_path=''):self.root_path = root_pathself.file_list = self.include_all_files()def include_all_files(self):velo_path = os.path.join(self.root_path, 'velodyne')all_files = os.listdir(velo_path)all_files.sort()all_files = [x[0:6] for x in all_files]return all_filesdef __len__(self):return len(self.file_list)def __getitem__(self, item):file_idx = self.file_list[item]# 读取kitti的基本信息file_image_path = os.path.join(self.root_path, 'image_2', file_idx + '.png')file_velo_path = os.path.join(self.root_path, 'velodyne', file_idx + '.bin')file_calib = os.path.join(self.root_path, 'calib', file_idx + '.txt')# 返回的是个Calibration的对象:初始化了P2,P3,Tr_velo2cam,R0calib = calibration_kitti.Calibration(file_calib)# 读取点云和imgpoints = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4)# points = layer_point_discard(points, 0.9)image = np.array(io.imread(file_image_path), dtype=np.int32)image = image[:352, :1216]# 这里将点云rgb, depth = load_depth_input(calib, image, points)
2、投影并保存为虚拟点
这一块其实挺简单的,我们只需要找到预测出来深度图的位置,一般都挺明显的,例如:
outputf = net(samplef)
里面保存着预测好的深度图,然后我们需要根据刚刚读取好的calib将深度图投影到3D空间中:
def save_depth_as_points(depth, idx, root_path, args):file_idx = str(idx).zfill(6)# 保存成png,同时也保存成bin文件file_image_path = os.path.join(root_path, 'image_2', file_idx + '.png')file_velo_path = os.path.join(root_path, 'velodyne', file_idx + '.bin')file_calib = os.path.join(root_path, 'calib', file_idx + '.txt')calib = calibration_kitti.Calibration(file_calib)# ############################改动1:最最最重要!!!!!!calib修正##################################### 这里calib不修正的话,会导致最后投影直接出问题。calib.cv -= args.top_crop# 读取lidar文件lidar = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4)# 读取图像信息image = np.array(io.imread(file_image_path), dtype=np.int32)# ###############################2、改动2:rgb这里要改成252################################## 这里千万千万不要改,如果把这个改成100:352,或者是:252就会出现img投影和depth投影不能完全重合的情况,具体看qq发的效果图image = image[:352, :1216]# #######################保存rgb##################### paths_rgb = os.path.join(root_path, 'velodyne_depth')# out_path_img = os.path.join(paths_rgb, file_idx + '_rgb.png')# cv2.imwrite(out_path_img, image)# 将lidar数据转换到相机pts_rect = calib.lidar_to_rect(lidar[:, 0:3])# 截取FOV视角的lidar点云fov_flag = get_fov_flag(pts_rect, image.shape, calib)lidar = lidar[fov_flag]# lidar = layer_point_discard(lidar, 0.7)# liar深度保存位置paths = os.path.join(root_path, 'velodyne_depth_lrru')if not os.path.exists(paths):os.makedirs(paths)# 文件格式out_path = os.path.join(paths, file_idx + '.npy')# ##############################2、改动2:这里也要改成252#################################333depth = depth.squeeze(0).permute(1,2,0).cpu().detach().numpy()# # ###############depth_img保存初始深度图##################### root_path = '/media/xd/7fffee8d-7279-4e6a-80bf-7880ff37da7a/xyy/LRRU/vision/corrupted_image/'# if not os.path.exists(root_path):# os.makedirs(root_path)# total_path = root_path + 'pre_init' + str(idx) + '.jpg'# cv2.imwrite(total_path, depth)## out_path_img = os.path.join(paths, file_idx + '.png')# img = np.squeeze(depth)# img = (img * 256).astype('uint16')# cv2.imwrite(out_path_img, img)# ######################保存最终合并gt信息的深度补全数据point###################### calib.cv -= args.top_crop# calib.cu -= args.top_crop# calib.fv -= args.top_crop# 将深度图转换成voxelfinal_points, new_lidar, new_p = depth2pointsrgbp(depth, image, calib, lidar)# 保存成float16类型final_points = final_points.astype(np.float16)new_lidar = new_lidar.astype(np.float16)# vision(final_points)# vision(new_lidar)# 保存成npynp.save(out_path, final_points)return len(final_points)
最后可视化的结果如下:
3、完整代码
完整的LRRU,TWISE,DySPN,PENet,CompletionFormer等各种补全生成虚拟点的方法会在我论文录用后开源。