https://arxiv.org/abs/1911.11236
搭建训练环境
git clone https://github.com/QingyongHu/RandLA-Net.git
- 搭建 python 环境 , 这里我用的
3.9
conda create -n randlanet python=3.9 source activate randlanet pip install tensorflow==2.15.0 -i https://pypi.tuna.tsinghua.edu.cn/simple --timeout=120 pip install -r helper_requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple pip install Cython -i https://pypi.tuna.tsinghua.edu.cn/simple conda install -c conda-forge scikit-learn
- cd
utils/cpp_wrappers/cpp_subsampling/
, 执行python setup.py build_ext --inplace
, 输出grid_subsampling.cpython-39-x86_64-linux-gnu.so
- cd
nearest_neighbors
, 执行python setup.py build_ext --inplace
, 输出nearest_neighbors.cpython-39-x86_64-linux-gnu.so
制作数据集
- 这里我用
CloudCompare
标注的数据集 , 具体标注方法,上网找找. - 创建
make_train_dataset.py
, 开始生成训练数据集# 写这段代码的时候,只有上帝和我知道它是干嘛的 # 现在,只有上帝知道 # @File : make_cloud_train_datasets.py # @Author : J. # @desc : 生成 RandLanet 训练数据集from sklearn.neighbors import KDTree from os.path import join, exists, dirname, abspath import numpy as np import os, glob, pickle import sysBASE_DIR = dirname(abspath(__file__)) ROOT_DIR = dirname(BASE_DIR) sys.path.append(BASE_DIR) sys.path.append(ROOT_DIR) from helper_ply import write_ply from helper_tool import DataProcessing as DPgrid_size = 0.01 dataset_path = './data/sample/original_data' original_pc_folder = join(dirname(dataset_path), 'original_ply') sub_pc_folder = join(dirname(dataset_path), 'input_{:.3f}'.format(grid_size)) os.mkdir(original_pc_folder) if not exists(original_pc_folder) else None os.mkdir(sub_pc_folder) if not exists(sub_pc_folder) else Nonerailway_cnt = 0 backgroud_cnt = 0 for pc_path in glob.glob(join(dataset_path, '*.txt')):file_name = os.path.basename(pc_path)[:-4]if exists(join(sub_pc_folder, file_name + '_KDTree.pkl')):continuepc = np.loadtxt(pc_path)labels = pc[:, -1].astype(np.uint8)values , counts = np.unique(labels,return_counts = True)for i in range(len(values)):# 我标注2个类别(包含背景类别)# 统计每个类别点的数量if values[i] == 0:backgroud_cnt = backgroud_cnt + counts[i]elif values[i] == 1:railway_cnt = railway_cnt + counts[i]full_ply_path = join(original_pc_folder, file_name + '.ply')# Subsample to save spacesub_points, sub_colors, sub_labels = DP.grid_sub_sampling(pc[:, :3].astype(np.float32),pc[:, 3:6].astype(np.uint8), labels, 0.01)sub_labels = np.squeeze(sub_labels)write_ply(full_ply_path, (sub_points, sub_colors, sub_labels), ['x', 'y', 'z', 'red', 'green', 'blue', 'class'])# save sub_cloud and KDTree filesub_xyz, sub_colors, sub_labels = DP.grid_sub_sampling(sub_points, sub_colors, sub_labels, grid_size)sub_colors = sub_colors / 255.0sub_labels = np.squeeze(sub_labels)sub_ply_file = join(sub_pc_folder, file_name + '.ply')write_ply(sub_ply_file, [sub_xyz, sub_colors, sub_labels], ['x', 'y', 'z', 'red', 'green', 'blue', 'class'])search_tree = KDTree(sub_xyz, leaf_size=50)kd_tree_file = join(sub_pc_folder, file_name + '_KDTree.pkl')with open(kd_tree_file, 'wb') as f:pickle.dump(search_tree, f)proj_idx = np.squeeze(search_tree.query(sub_points, return_distance=False))proj_idx = proj_idx.astype(np.int32)proj_save = join(sub_pc_folder, file_name + '_proj.pkl')with open(proj_save, 'wb') as f:pickle.dump([proj_idx, labels], f) # 统计每个类别个数 print("----> backgroud_cnt: " + str(backgroud_cnt)) print("----> railway_cnt: " + str(railway_cnt))
- 修改
helper_tools.py
#import cpp_wrappers.cpp_subsampling.grid_subsampling as cpp_subsampling#import nearest_neighbors.lib.python.nearest_neighbors as nearest_neighbors# 修改成import utils.cpp_wrappers.cpp_subsampling.grid_subsampling as cpp_subsamplingimport utils.nearest_neighbors.nearest_neighbors as nearest_neighbors... # 复制一个 起个自己名字 class ConfigSample:k_n = 16 # KNNnum_layers = 5 # Number of layersnum_points = 16000 # Number of input points# 包含背景类别,如果想排除背景类别, 修改 ignored_labelsnum_classes = 2 # Number of valid classes sub_grid_size = 0.01 # preprocess_parameter # Todobatch_size = 4 # batch_size during trainingval_batch_size = 2 # batch_size during validation and testtrain_steps = 500 # Number of steps per epochsval_steps = 3 # Number of validation steps per epochsub_sampling_ratio = [4, 4, 4, 4, 2] # sampling ratio of random sampling at each layerd_out = [16, 64, 128, 256, 512] # feature dimensionnoise_init = 3.5 # noise initial parametermax_epoch = 100 # maximum epoch during traininglearning_rate = 1e-2 # initial learning ratelr_decays = {i: 0.95 for i in range(0, 500)} # decay rate of learning ratetrain_sum_dir = 'train_log'saving = Truesaving_path = Noneaugment_scale_anisotropic = Trueaugment_symmetries = [True, False, False]augment_rotation = 'vertical'augment_scale_min = 0.8augment_scale_max = 1.2augment_noise = 0.001augment_occlusion = 'none'augment_color = 0.8@staticmethoddef get_class_weights(dataset_name):# pre-calculate the number of points in each categorynum_per_class = []if dataset_name is 'S3DIS':num_per_class = np.array([3370714, 2856755, 4919229, 318158, 375640, 478001, 974733,650464, 791496, 88727, 1284130, 229758, 2272837], dtype=np.int32)elif dataset_name is 'Semantic3D':num_per_class = np.array([5181602, 5012952, 6830086, 1311528, 10476365, 946982, 334860, 269353],dtype=np.int32)elif dataset_name is 'SemanticKITTI':num_per_class = np.array([55437630, 320797, 541736, 2578735, 3274484, 552662, 184064, 78858,240942562, 17294618, 170599734, 6369672, 230413074, 101130274, 476491114,9833174, 129609852, 4506626, 1168181])# TODO 增加一个自己的类别elif dataset_name is 'Sample':# 每一个类别点的数量num_per_class = np.array([4401119, 148313])weight = num_per_class / float(sum(num_per_class))ce_label_weight = 1 / (weight + 0.02)return np.expand_dims(ce_label_weight, axis=0) ...
训练
- main_Sample.py (拷贝 main_S3DIS.py)
from os.path import join, exists
from RandLANet import Network
from tester_Railway import ModelTester
from helper_ply import read_ply
from helper_tool import Plot
from helper_tool import DataProcessing as DP
from helper_tool import ConfigRailway as cfg
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
import pickle, argparse, osclass Railway:def __init__(self):self.name = 'Sample'# 最好给绝对路径self.path = '/home/ab/workspace/train/randla-net-tf2-main/data/sample'self.label_to_names = {0: 'background', 1: 'sample'}self.num_classes = len(self.label_to_names)self.label_values = np.sort([k for k, v in self.label_to_names.items()])self.label_to_idx = {l: i for i, l in enumerate(self.label_values)}# 如果想忽略背景类别 np.sort([0])#self.ignored_labels = np.sort([0]) # TODOself.ignored_labels = np.sort([]) # TODOself.original_folder = join(self.path, 'original_data')self.full_pc_folder = join(self.path, 'original_ply')self.sub_pc_folder = join(self.path, 'input_{:.3f}'.format(cfg.sub_grid_size))#训练、验证、测试数据都在original_data数据集中,需要做划分self.val_split = ["20240430205457370","20240430205527591"] self.test_split= ["20240430205530638"]# Initial training-validation-testing filesself.train_files = []self.val_files = []self.test_files = []cloud_names = [file_name[:-4] for file_name in os.listdir(self.original_folder) if file_name[-4:] == '.txt']#根据文件名划分训练、验证、测试数据集for pc_name in cloud_names:pc_file=join(self.sub_pc_folder, pc_name + '.ply')if pc_name in self.val_split:self.val_files.append(pc_file)elif pc_name in self.test_split:self.test_files.append(pc_file)else:self.train_files.append(pc_file)# Initiate containersself.val_proj = []self.val_labels = []self.test_proj = []self.test_labels = []self.possibility = {}self.min_possibility = {}self.class_weight = {}self.input_trees = {'training': [], 'validation': [], 'test': []}self.input_colors = {'training': [], 'validation': [], 'test': []}self.input_labels = {'training': [], 'validation': []}# Ascii files dict for testingself.ascii_files = {'20240430205530638.ply': '20240430205530638-reduced.labels'}self.load_sub_sampled_clouds(cfg.sub_grid_size)def load_sub_sampled_clouds(self, sub_grid_size):tree_path = join(self.path, 'input_{:.3f}'.format(sub_grid_size))files = np.hstack((self.train_files, self.val_files, self.test_files))for i, file_path in enumerate(files):cloud_name = file_path.split('/')[-1][:-4]print('Load_pc_' + str(i) + ': ' + cloud_name)if file_path in self.val_files:cloud_split = 'validation'elif file_path in self.train_files:cloud_split = 'training'else:cloud_split = 'test'# Name of the input fileskd_tree_file = join(tree_path, '{:s}_KDTree.pkl'.format(cloud_name))sub_ply_file = join(tree_path, '{:s}.ply'.format(cloud_name))# read ply with datadata = read_ply(sub_ply_file)sub_colors = np.vstack((data['red'], data['green'], data['blue'])).Tif cloud_split == 'test':sub_labels = Noneelse:sub_labels = data['class']# Read pkl with search treewith open(kd_tree_file, 'rb') as f:search_tree = pickle.load(f)self.input_trees[cloud_split] += [search_tree]self.input_colors[cloud_split] += [sub_colors]if cloud_split in ['training', 'validation']:self.input_labels[cloud_split] += [sub_labels]# Get validation and test re_projection indicesprint('\nPreparing reprojection indices for validation and test')for i, file_path in enumerate(files):# get cloud name and splitcloud_name = file_path.split('/')[-1][:-4]# Validation projection and labelsif file_path in self.val_files:proj_file = join(tree_path, '{:s}_proj.pkl'.format(cloud_name))with open(proj_file, 'rb') as f:proj_idx, labels = pickle.load(f)self.val_proj += [proj_idx]self.val_labels += [labels]# Test projectionif file_path in self.test_files:proj_file = join(tree_path, '{:s}_proj.pkl'.format(cloud_name))with open(proj_file, 'rb') as f:proj_idx, labels = pickle.load(f)self.test_proj += [proj_idx]self.test_labels += [labels]print('finished')return# Generate the input data flowdef get_batch_gen(self, split):if split == 'training':num_per_epoch = cfg.train_steps * cfg.batch_sizeelif split == 'validation':num_per_epoch = cfg.val_steps * cfg.val_batch_sizeelif split == 'test':num_per_epoch = cfg.val_steps * cfg.val_batch_size# Reset possibilityself.possibility[split] = []self.min_possibility[split] = []self.class_weight[split] = []# Random initializefor i, tree in enumerate(self.input_trees[split]):self.possibility[split] += [np.random.rand(tree.data.shape[0]) * 1e-3]self.min_possibility[split] += [float(np.min(self.possibility[split][-1]))]if split != 'test':_, num_class_total = np.unique(np.hstack(self.input_labels[split]), return_counts=True)self.class_weight[split] += [np.squeeze([num_class_total / np.sum(num_class_total)], axis=0)]def spatially_regular_gen():# Generator loopfor i in range(num_per_epoch): # num_per_epoch# Choose the cloud with the lowest probabilitycloud_idx = int(np.argmin(self.min_possibility[split]))# choose the point with the minimum of possibility in the cloud as query pointpoint_ind = np.argmin(self.possibility[split][cloud_idx])# Get all points within the cloud from tree structurepoints = np.array(self.input_trees[split][cloud_idx].data, copy=False)# print("points........." + str(points.shape))# Center point of input regioncenter_point = points[point_ind, :].reshape(1, -1)# Add noise to the center pointnoise = np.random.normal(scale=cfg.noise_init / 10, size=center_point.shape)pick_point = center_point + noise.astype(center_point.dtype)query_idx = self.input_trees[split][cloud_idx].query(pick_point, k=cfg.num_points)[1][0]# Shuffle indexquery_idx = DP.shuffle_idx(query_idx)# Get corresponding points and colors based on the indexqueried_pc_xyz = points[query_idx]queried_pc_xyz[:, 0:2] = queried_pc_xyz[:, 0:2] - pick_point[:, 0:2]queried_pc_colors = self.input_colors[split][cloud_idx][query_idx]if split == 'test':queried_pc_labels = np.zeros(queried_pc_xyz.shape[0])queried_pt_weight = 1else:queried_pc_labels = self.input_labels[split][cloud_idx][query_idx]queried_pc_labels = np.array([self.label_to_idx[l] for l in queried_pc_labels])queried_pt_weight = np.array([self.class_weight[split][0][n] for n in queried_pc_labels])# Update the possibility of the selected pointsdists = np.sum(np.square((points[query_idx] - pick_point).astype(np.float32)), axis=1)delta = np.square(1 - dists / np.max(dists)) * queried_pt_weightself.possibility[split][cloud_idx][query_idx] += deltaself.min_possibility[split][cloud_idx] = float(np.min(self.possibility[split][cloud_idx]))if True:yield (queried_pc_xyz,queried_pc_colors.astype(np.float32),queried_pc_labels,query_idx.astype(np.int32),np.array([cloud_idx], dtype=np.int32))gen_func = spatially_regular_gengen_types = (tf.float32, tf.float32, tf.int32, tf.int32, tf.int32)gen_shapes = ([None, 3], [None, 3], [None], [None], [None])return gen_func, gen_types, gen_shapesdef get_tf_mapping(self):# Collect flat inputsdef tf_map(batch_xyz, batch_features, batch_labels, batch_pc_idx, batch_cloud_idx):batch_features = tf.map_fn(self.tf_augment_input, [batch_xyz, batch_features], dtype=tf.float32)input_points = []input_neighbors = []input_pools = []input_up_samples = []for i in range(cfg.num_layers):neigh_idx = tf.py_func(DP.knn_search, [batch_xyz, batch_xyz, cfg.k_n], tf.int32)sub_points = batch_xyz[:, :tf.shape(batch_xyz)[1] // cfg.sub_sampling_ratio[i], :]pool_i = neigh_idx[:, :tf.shape(batch_xyz)[1] // cfg.sub_sampling_ratio[i], :]up_i = tf.py_func(DP.knn_search, [sub_points, batch_xyz, 1], tf.int32)input_points.append(batch_xyz)input_neighbors.append(neigh_idx)input_pools.append(pool_i)input_up_samples.append(up_i)batch_xyz = sub_pointsinput_list = input_points + input_neighbors + input_pools + input_up_samplesinput_list += [batch_features, batch_labels, batch_pc_idx, batch_cloud_idx]return input_listreturn tf_map# data augmentation@staticmethoddef tf_augment_input(inputs):xyz = inputs[0]features = inputs[1]theta = tf.random_uniform((1,), minval=0, maxval=2 * np.pi)# Rotation matricesc, s = tf.cos(theta), tf.sin(theta)cs0 = tf.zeros_like(c)cs1 = tf.ones_like(c)R = tf.stack([c, -s, cs0, s, c, cs0, cs0, cs0, cs1], axis=1)stacked_rots = tf.reshape(R, (3, 3))# Apply rotationstransformed_xyz = tf.reshape(tf.matmul(xyz, stacked_rots), [-1, 3])# Choose random scales for each examplemin_s = cfg.augment_scale_minmax_s = cfg.augment_scale_maxif cfg.augment_scale_anisotropic:s = tf.random_uniform((1, 3), minval=min_s, maxval=max_s)else:s = tf.random_uniform((1, 1), minval=min_s, maxval=max_s)symmetries = []for i in range(3):if cfg.augment_symmetries[i]:symmetries.append(tf.round(tf.random_uniform((1, 1))) * 2 - 1)else:symmetries.append(tf.ones([1, 1], dtype=tf.float32))s *= tf.concat(symmetries, 1)# Create N x 3 vector of scales to multiply with stacked_pointsstacked_scales = tf.tile(s, [tf.shape(transformed_xyz)[0], 1])# Apply scalestransformed_xyz = transformed_xyz * stacked_scalesnoise = tf.random_normal(tf.shape(transformed_xyz), stddev=cfg.augment_noise)transformed_xyz = transformed_xyz + noise# rgb = features[:, :3]# stacked_features = tf.concat([transformed_xyz, rgb], axis=-1)return transformed_xyzdef init_input_pipeline(self):print('Initiating input pipelines')cfg.ignored_label_inds = [self.label_to_idx[ign_label] for ign_label in self.ignored_labels]gen_function, gen_types, gen_shapes = self.get_batch_gen('training')gen_function_val, _, _ = self.get_batch_gen('validation')gen_function_test, _, _ = self.get_batch_gen('test')self.train_data = tf.data.Dataset.from_generator(gen_function, gen_types, gen_shapes)self.val_data = tf.data.Dataset.from_generator(gen_function_val, gen_types, gen_shapes)self.test_data = tf.data.Dataset.from_generator(gen_function_test, gen_types, gen_shapes)self.batch_train_data = self.train_data.batch(cfg.batch_size)self.batch_val_data = self.val_data.batch(cfg.val_batch_size)self.batch_test_data = self.test_data.batch(cfg.val_batch_size)map_func = self.get_tf_mapping()self.batch_train_data = self.batch_train_data.map(map_func=map_func)self.batch_val_data = self.batch_val_data.map(map_func=map_func)self.batch_test_data = self.batch_test_data.map(map_func=map_func)self.batch_train_data = self.batch_train_data.prefetch(cfg.batch_size)self.batch_val_data = self.batch_val_data.prefetch(cfg.val_batch_size)self.batch_test_data = self.batch_test_data.prefetch(cfg.val_batch_size)iter = tf.data.Iterator.from_structure(self.batch_train_data.output_types, self.batch_train_data.output_shapes)self.flat_inputs = iter.get_next()self.train_init_op = iter.make_initializer(self.batch_train_data)self.val_init_op = iter.make_initializer(self.batch_val_data)self.test_init_op = iter.make_initializer(self.batch_test_data)if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--gpu', type=int, default=0, help='the number of GPUs to use [default: 0]')parser.add_argument('--mode', type=str, default='train', help='options: train, test, vis')parser.add_argument('--model_path', type=str, default='None', help='pretrained model path')FLAGS = parser.parse_args()GPU_ID = FLAGS.gpuos.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID)os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'Mode = FLAGS.modedataset = Railway()dataset.init_input_pipeline()if Mode == 'train':model = Network(dataset, cfg)model.train(dataset)elif Mode == 'test':cfg.saving = Falsemodel = Network(dataset, cfg)if FLAGS.model_path is not 'None':chosen_snap = FLAGS.model_pathelse:chosen_snapshot = -1logs = np.sort([os.path.join('results', f) for f in os.listdir('results') if f.startswith('Log')])chosen_folder = logs[-1]snap_path = join(chosen_folder, 'snapshots')snap_steps = [int(f[:-5].split('-')[-1]) for f in os.listdir(snap_path) if f[-5:] == '.meta']chosen_step = np.sort(snap_steps)[-1]chosen_snap = os.path.join(snap_path, 'snap-{:d}'.format(chosen_step))print(".............. chosen_snap:" + chosen_snap)tester = ModelTester(model, dataset, restore_snap=chosen_snap)tester.test(model, dataset)else:################### Visualize data ###################with tf.Session() as sess:sess.run(tf.global_variables_initializer())sess.run(dataset.train_init_op)while True:flat_inputs = sess.run(dataset.flat_inputs)pc_xyz = flat_inputs[0]sub_pc_xyz = flat_inputs[1]labels = flat_inputs[21]Plot.draw_pc_sem_ins(pc_xyz[0, :, :], labels[0, :])Plot.draw_pc_sem_ins(sub_pc_xyz[0, :, :], labels[0, 0:np.shape(sub_pc_xyz)[1]])
- 开始训练
python main_Sample.py --mode train --gpu 0
参考
- https://github.com/QingyongHu/RandLA-Net
- https://blog.csdn.net/weixin_40653140/article/details/130285289