import copy
import os
from functools import partial
import numpy as np
import torch
from horizon_plugin_pytorch.quantization import March
from hat.data.collates.collates import collate_lidar3d
from hat.utils.config import ConfigVersion
from hat.visualize.lidar_det import lidar_det_visualize
VERSION = ConfigVersion.v2
training_step = os.environ.get("HAT_TRAINING_STEP", "float")
task_name = "centerpoint_pointpillar_nuscenes"
batch_size_per_gpu = 4
device_ids = [0]
ckpt_dir = f"/open_explorer/tmp_models/{task_name}"
# datadir settings
data_rootdir = "./tmp_data/nuscenes/lidar_seg/v1.0-trainval"
meta_rootdir = "./tmp_data/nuscenes/meta"
gt_data_root = "./tmp_nuscenes/lidar"
log_loss_show = 200
cudnn_benchmark = True
seed = NULL
log_rank_zero_only = True
march = March.BAYES
norm_cfg = NULL
qat_mode = "fuse_bn"
convert_mode = "fx"
# Voxelization cfg
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
voxel_size = [0.2, 0.2, 8]
max_num_points = 20
max_voxels = (30000, 40000)
class_names = [
"car",
"truck",
"pedestrian",
]
tasks = [
dict(num_class=1, class_names=["car"]),
dict(num_class=1, class_names=["truck"]),
dict(num_class=1, class_names=["pedestrian"]),
]
common_heads = dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)
)
with_velocity = "vel" in common_heads.keys()
def get_feature_map_size(point_cloud_range, voxel_size):
point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
voxel_size = np.array(voxel_size, dtype=np.float32)
grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size
grid_size = np.round(grid_size).astype(np.int64)
return grid_size
# model settings
model = dict(
type="CenterPointDetector",
feature_map_shape=get_feature_map_size(point_cloud_range, voxel_size),
pre_process=dict(
type="CenterPointPreProcess",
pc_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels_num=max_voxels,
max_points_in_voxel=max_num_points,
norm_range=[-51.2, -51.2, -5.0, 0.0, 51.2, 51.2, 3.0, 255.0],
norm_dims=[0, 1, 2, 3],
),
reader=dict(
type="PillarFeatureNet",
num_input_features=5,
num_filters=(64,),
with_distance=False,
pool_size=(max_num_points, 1),
voxel_size=voxel_size,
pc_range=point_cloud_range,
bn_kwargs=norm_cfg,
quantize=True,
use_4dim=True,
use_conv=True,
hw_reverse=True,
),
backbone=dict(
type="PointPillarScatter",
num_input_features=64,
use_horizon_pillar_scatter=True,
quantize=True,
),
neck=dict(
type="SECONDNeck",
in_feature_channel=64,
down_layer_nums=[3, 5, 5],
down_layer_strides=[2, 2, 2],
down_layer_channels=[64, 128, 256],
up_layer_strides=[0.5, 1, 2],
up_layer_channels=[128, 128, 128],
bn_kwargs=norm_cfg,
quantize=True,
use_relu6=False,
),
head=dict(
type="CenterPointHead",
in_channels=sum([128, 128, 128]),
tasks=tasks,
share_conv_channels=64,
share_conv_num=1,
common_heads=common_heads,
head_conv_channels=64,
init_bias=-2.19,
final_kernel=3,
),
targets=dict(
type="CenterPointLidarTarget",
grid_size=[512, 512, 1],
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
tasks=tasks,
dense_reg=1,
max_objs=500,
gaussian_overlap=0.1,
min_radius=2,
out_size_factor=4,
norm_bbox=True,
with_velocity=with_velocity,
),
loss=dict(
type="CenterPointLoss",
loss_cls=dict(type="GaussianFocalLoss", loss_weight=1.0),
loss_bbox=dict(
type="L1Loss",
reduction="mean",
loss_weight=0.25,
),
with_velocity=with_velocity,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
),
postprocess=dict(
type="CenterPointPostProcess",
tasks=tasks,
norm_bbox=True,
bbox_coder=dict(
type="CenterPointBBoxCoder",
pc_range=point_cloud_range[:2],
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=100,
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
),
# test_cfg
max_pool_nms=False,
score_threshold=0.1,
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
min_radius=[4, 12, 10, 1, 0.85, 0.175],
out_size_factor=4,
nms_type="rotate",
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2,
box_size=9,
),
)
# model settings
deploy_model = dict(
type="CenterPointDetector",
feature_map_shape=get_feature_map_size(point_cloud_range, voxel_size),
is_deploy=True,
reader=dict(
type="PillarFeatureNet",
num_input_features=5,
num_filters=(64,),
with_distance=False,
pool_size=(max_num_points, 1),
voxel_size=voxel_size,
pc_range=point_cloud_range,
bn_kwargs=norm_cfg,
quantize=True,
use_4dim=True,
use_conv=True,
hw_reverse=True,
),
backbone=dict(
type="PointPillarScatter",
num_input_features=64,
use_horizon_pillar_scatter=True,
quantize=True,
),
neck=dict(
type="SECONDNeck",
in_feature_channel=64,
down_layer_nums=[3, 5, 5],
down_layer_strides=[2, 2, 2],
down_layer_channels=[64, 128, 256],
up_layer_strides=[0.5, 1, 2],
up_layer_channels=[128, 128, 128],
bn_kwargs=norm_cfg,
quantize=True,
use_relu6=False,
),
head=dict(
type="CenterPointHead",
in_channels=sum([128, 128, 128]),
tasks=tasks,
share_conv_channels=64,
share_conv_num=1,
common_heads=common_heads,
head_conv_channels=64,
init_bias=-2.19,
final_kernel=3,
),
)
deploy_inputs = dict(
features=torch.randn((1, 5, 20, 40000), dtype=torch.float32),
coors=torch.zeros([40000, 4]).int(),
)
# deploy_inputs = dict(
# points=[
# torch.randn(150000, 4),
# ],
# )
db_sampler = dict(
type="DataBaseSampler",
enable=True,
root_path="/open_explorer/tmp_data/kitti3d/",
db_info_path="/open_explorer/tmp_data/kitti3d/kitti3d_dbinfos_train.pkl",
sample_groups=[
dict(car=2),
dict(truck=3),
dict(pedestrian=2),
],
db_prep_steps=[
dict(
type="DBFilterByDifficulty",
filter_by_difficulty=[-1],
),
dict(
type="DBFilterByMinNumPoint",
filter_by_min_num_points=dict(
car=5,
truck=5,
pedestrian=5,
),
),
],
global_random_rotation_range_per_object=[0, 0],
rate=1.0,
)
# train_dataset = dict(
# type="NuscenesLidarDataset",
# num_sweeps=9,
# data_path=os.path.join(data_rootdir, "train_lmdb"),
# info_path=os.path.join(gt_data_root, "nuscenes_infos_train.pkl"),
# load_dim=5,
# use_dim=[0, 1, 2, 3, 4],
# pad_empty_sweeps=True,
# remove_close=True,
# use_valid_flag=True,
# classes=class_names,
# transforms=[
# dict(
# type="PointCloudPreprocess",
# mode="train",
# current_mode="train",
# class_names=class_names,
# shuffle_points=True,
# min_points_in_gt=-1,
# flip_both=True,
# global_rot_noise=[-0.3925, 0.3925],
# global_scale_noise=[0.95, 1.05],
# db_sampler=db_sampler,
# ),
# dict(
# type="ObjectRangeFilter",
# point_cloud_range=point_cloud_range,
# ),
# dict(type="LidarReformat", with_gt=True),
# ],
# )
data_loader = dict(
type=torch.utils.data.DataLoader,
dataset=dict(
type="Kitti3D",
data_path="/open_explorer/tmp_data/kitti3d/train_lmdb",
transforms=[
dict(
type="ObjectSample",
class_names=class_names,
remove_points_after_sample=False,
db_sampler=db_sampler,
),
dict(
type="ObjectNoise",
gt_rotation_noise=[-0.15707963267, 0.15707963267],
gt_loc_noise_std=[0.25, 0.25, 0.25],
global_random_rot_range=[0, 0],
num_try=100,
),
dict(
type="PointRandomFlip",
probability=0.5,
),
dict(
type="PointGlobalRotation",
rotation=[-0.78539816, 0.78539816],
),
dict(
type="PointGlobalScaling",
min_scale=0.95,
max_scale=1.05,
),
dict(
type="ShufflePoints",
shuffle=True,
),
dict(
type="ObjectRangeFilter",
# point_cloud_range=pc_range,
point_cloud_range=point_cloud_range,
),
dict(type="LidarReformat"),
],
),
sampler=dict(type=torch.utils.data.DistributedSampler),
batch_size=batch_size_per_gpu,
shuffle=False,
num_workers=1,
pin_memory=False,
collate_fn=collate_lidar3d,
)
# val_dataset = dict(
# type="NuscenesLidarDataset",
# test_mode=True,
# num_sweeps=9,
# data_path=os.path.join(data_rootdir, "val_lmdb"),
# load_dim=5,
# use_dim=[0, 1, 2, 3, 4],
# pad_empty_sweeps=True,
# remove_close=True,
# classes=class_names,
# transforms=[
# dict(type="LidarReformat", with_gt=False),
# ],
# )
val_data_loader = dict(
type=torch.utils.data.DataLoader,
dataset=dict(
type="Kitti3D",
data_path="/open_explorer/tmp_data/kitti3d/val_lmdb",
transforms=[
dict(type="LidarReformat"),
],
),
batch_size=batch_size_per_gpu,
shuffle=False,
num_workers=1,
pin_memory=False,
collate_fn=collate_lidar3d,
)
def loss_collector(outputs: dict):
losses = []
for _, loss in outputs.items():
losses.append(loss)
return losses
batch_processor = dict(
type="MultiBatchProcessor",
need_grad_update=True,
loss_collector=loss_collector,
)
val_batch_processor = dict(
type="MultiBatchProcessor",
need_grad_update=False,
loss_collector=NULL,
)
def update_metric(metrics, batch, model_outs):
for metric in metrics:
metric.update(batch, model_outs)
def update_loss(metrics, batch, model_outs):
for metric in metrics:
metric.update(model_outs)
val_metric_updater = dict(
type="MetricUpdater",
metric_update_func=update_metric,
step_log_freq=10000,
epoch_log_freq=1,
log_prefix="Validation " + task_name,
)
loss_show_update = dict(
type="MetricUpdater",
metric_update_func=update_loss,
step_log_freq=log_loss_show,
epoch_log_freq=1,
log_prefix="loss_" + task_name,
)
stat_callback = dict(
type="StatsMonitor",
log_freq=log_loss_show,
)
ckpt_callback = dict(
type="Checkpoint",
save_dir=ckpt_dir,
name_prefix=training_step + "-",
strict_match=True,
save_interval=1,
# mode="max",
mode=NULL,
)
val_callback = dict(
type="Validation",
data_loader=val_data_loader,
batch_processor=val_batch_processor,
callbacks=[val_metric_updater],
val_model=NULL,
val_on_train_end=True,
val_interval=30,
log_interval=200,
)
trace_callback = dict(
type="SaveTraced",
save_dir=ckpt_dir,
trace_inputs=deploy_inputs,
)
grad_callback = dict(
type="GradScale",
module_and_scale=[],
clip_grad_norm=35,
clip_norm_type=2,
)
# val_nuscenes_metric = dict(
# type="NuscenesMetric",
# data_root=meta_rootdir,
# version="v1.0-trainval",
# use_lidar=True,
# classes=class_names,
# save_prefix="./WORKSPACE/results" + task_name,
# )
float_trainer = dict(
type="distributed_data_parallel_trainer",
model=model,
data_loader=data_loader,
optimizer=dict(
type=torch.optim.AdamW,
betas=(0.95, 0.99),
lr=2e-4,
weight_decay=0.01,
),
batch_processor=batch_processor,
num_epochs=20,
device=NULL,
callbacks=[
stat_callback,
loss_show_update,
dict(
type="CyclicLrUpdater",
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
step_log_interval=200,
),
grad_callback,
val_callback,
ckpt_callback,
],
sync_bn=True,
train_metrics=dict(
type="LossShow",
),
# val_metrics=[val_nuscenes_metric],
val_metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
)
calibration_data_loader = copy.deepcopy(data_loader)
calibration_data_loader.pop("sampler") # Calibration do not support DDP or DP
calibration_batch_processor = copy.deepcopy(val_batch_processor)
calibration_trainer = dict(
type="Calibrator",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "float-checkpoint-last.pth.tar"
),
allow_miss=True,
verbose=True,
),
dict(type="Float2Calibration", convert_mode=convert_mode),
],
),
data_loader=calibration_data_loader,
batch_processor=calibration_batch_processor,
num_steps=100,
device=NULL,
callbacks=[
stat_callback,
val_callback,
ckpt_callback,
],
# val_metrics=[val_nuscenes_metric],
val_metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
log_interval=20,
)
qat_trainer = dict(
type="distributed_data_parallel_trainer",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
qconfig_params=dict(
activation_qat_qkwargs=dict(
averaging_constant=0,
),
weight_qat_qkwargs=dict(
averaging_constant=1,
),
),
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "calibration-checkpoint-last.pth.tar"
),
),
],
),
data_loader=data_loader,
optimizer=dict(
type=torch.optim.SGD,
weight_decay=0.0,
lr=2e-4,
momentum=0.9,
),
batch_processor=batch_processor,
num_epochs=10,
device=NULL,
callbacks=[
stat_callback,
loss_show_update,
dict(
type="CyclicLrUpdater",
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
step_log_interval=200,
),
grad_callback,
val_callback,
ckpt_callback,
],
train_metrics=dict(
type="LossShow",
),
# val_metrics=[val_nuscenes_metric],
val_metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
)
# just for saving int_infer pth and pt
int_infer_trainer = dict(
type="Trainer",
model=deploy_model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "qat-checkpoint-last.pth.tar"
),
),
dict(type="QAT2Quantize", convert_mode=convert_mode),
],
),
data_loader=NULL,
optimizer=NULL,
batch_processor=NULL,
num_epochs=0,
device=NULL,
callbacks=[ckpt_callback, trace_callback],
)
compile_dir = os.path.join(ckpt_dir, "compile")
compile_cfg = dict(
march=march,
name=task_name,
out_dir=compile_dir,
hbm=os.path.join(compile_dir, "model.hbm"),
layer_details=True,
input_source=["ddr"],
opt="O3",
output_layout="NHWC",
)
# predictor
float_predictor = dict(
type="Predictor",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
converters=[
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "float-checkpoint-last.pth.tar"
),
),
],
),
data_loader=[val_data_loader],
batch_processor=val_batch_processor,
device=NULL,
# metrics=[val_nuscenes_metric],
metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
callbacks=[
val_metric_updater,
],
log_interval=100,
)
calibration_predictor = dict(
type="Predictor",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "calibration-checkpoint-last.pth.tar"
),
),
],
),
data_loader=[val_data_loader],
batch_processor=val_batch_processor,
device=NULL,
# metrics=[val_nuscenes_metric],
metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
callbacks=[
val_metric_updater,
],
log_interval=100,
)
qat_predictor = dict(
type="Predictor",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "qat-checkpoint-last.pth.tar"
),
),
],
),
data_loader=[val_data_loader],
batch_processor=val_batch_processor,
device=NULL,
# metrics=[val_nuscenes_metric],
metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
callbacks=[
val_metric_updater,
],
log_interval=100,
)
int_infer_predictor = dict(
type="Predictor",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "qat-checkpoint-last.pth.tar"
),
),
dict(type="QAT2Quantize", convert_mode=convert_mode),
],
),
data_loader=[val_data_loader],
batch_processor=val_batch_processor,
device=NULL,
# metrics=[val_nuscenes_metric],
metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
callbacks=[
val_metric_updater,
],
log_interval=100,
)
infer_ckpt = int_infer_trainer["model_convert_pipeline"]["converters"][1][
"checkpoint_path"
]
align_bpu_predictor = dict(
type="Predictor",
model=model,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=infer_ckpt,
),
dict(type="QAT2Quantize", convert_mode=convert_mode),
],
),
data_loader=val_data_loader,
# metrics=[val_nuscenes_metric],
metrics=dict(
type="Kitti3DMetricDet",
compute_aos=True,
current_classes=class_names,
difficultys=[0, 1, 2],
),
callbacks=[
val_metric_updater,
],
log_interval=1,
)
def process_inputs(infer_inputs, transforms=NULL):
points = np.fromfile(
infer_inputs["input_points"], dtype=np.float32
).reshape((-1, 5))
points = torch.from_numpy(points)
model_input = {
"points": [points],
}
if transforms is not NULL:
model_input = transforms(model_input)
return model_input, points
def process_outputs(model_outs, viz_func, vis_inputs):
preds = model_outs[0]
viz_func(vis_inputs, preds)
return NULL
infer_cfg = dict(
model=model,
infer_inputs=dict(
input_points="/open_explorer/tmp_orig_data/kitti3d/training/velodyne/000000.bin",
),
process_inputs=process_inputs,
viz_func=partial(
lidar_det_visualize, score_thresh=0.4, is_plot=True, reverse=True
),
process_outputs=process_outputs,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT", convert_mode=convert_mode),
dict(
type="LoadCheckpoint",
checkpoint_path=infer_ckpt,
),
dict(type="QAT2Quantize", convert_mode=convert_mode),
],
),
)
onnx_cfg = dict(
model=deploy_model,
stage="qat",
inputs=deploy_inputs,
model_convert_pipeline=dict(
type="ModelConvertPipeline",
qat_mode="fuse_bn",
converters=[
dict(type="Float2QAT"),
dict(
type="LoadCheckpoint",
checkpoint_path=os.path.join(
ckpt_dir, "qat-checkpoint-last.pth.tar"
),
),
],
),
)