|
|
#49.6 custom_av_dataset.yaml
DATASET: 'CustomAvDataset'
DATA_PATH: '../data/custom_av'
POINT_CLOUD_RANGE: [-74.88, -74.88, -2.0, 74.88, 74.88, 4.0] #
MAP_CLASS_TO_KITTI: {
'Vehicle': 'Car',
'Pedestrian': 'Pedestrian',
'Cyclist': 'Cyclist',
}
DATA_SPLIT: {
'train': train,
'test': val
}
INFO_PATH: {
'train': [custom_av_infos_train.pkl],
'test': [custom_av_infos_val.pkl],
}
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z'],
src_feature_list: ['x', 'y', 'z', 'intensity'],
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder',]
AUG_CONFIG_LIST:
- NAME: gt_sampling
USE_ROAD_PLANE: False
DB_INFO_PATH:
- custom_av_dbinfos_train.pkl
PREPARE: {
filter_by_min_points: ['Vehicle:5', 'Pedestrian:5', 'Cyclist:5'],
}
SAMPLE_GROUPS: ['Vehicle:10', 'Pedestrian:16', 'Cyclist:14']
NUM_POINT_FEATURES: 4
DATABASE_WITH_FAKELIDAR: False
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: True
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x', 'y']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
# 약한 증강용
# - NAME: random_world_rotation
# WORLD_ROT_ANGLE: [-0.39269908, 0.39269908] # ±22.5°로 축소
# - NAME: random_world_scaling
# WORLD_SCALE_RANGE: [0.98, 1.02]
- NAME: random_world_translation # 추가한 증강기법
NOISE_TRANSLATE_STD: [0.2, 0.2, 0.2]
- NAME: random_points_dropout
DROP_RATE: 0.15
ENSURE_MIN_KEEP: 90000
PROB: 0.4
- NAME: line_downsample
TARGET_LINES: 64
LINE_BINS: 128
TOLERANCE: 6
MIN_BIN_OCCUPANCY: 120
ELEVATION_FROM: 'auto'
PROB: 0.5
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.06, 0.06, 0.15]
MAX_POINTS_PER_VOXEL: 5
MAX_NUMBER_OF_VOXELS: {
'train': 200000,
'test': 300000
}
#voxel_rcnn_custom_49.6.yaml
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/custom_av_dataset.yaml
MODEL:
NAME: VoxelRCNN
VFE:
NAME: DynMeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
[ 'Vehicle', 'Pedestrian', 'Cyclist' ]
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 3 #2
SEPARATE_HEAD_CFG:
HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
HEAD_DICT: {
'center': { 'out_channels': 2, 'num_conv': 2 },
'center_z': { 'out_channels': 1, 'num_conv': 2 },
'dim': { 'out_channels': 3, 'num_conv': 2 },
'rot': { 'out_channels': 2, 'num_conv': 2 },
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.15
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 2.3, #1.0 2.0
'loc_weight': 2.3, #2.0
'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
}
CLASS_WEIGHTS: # heatmap 분류 손실용
Vehicle: 1.0
Pedestrian: 2.5
Cyclist: 2.7
LOC_CLASS_WEIGHTS:
Vehicle: 1.0
Pedestrian: 1.2
Cyclist: 1.5
POST_PROCESSING:
SCORE_THRESH: 0.1 # 0.1
POST_CENTER_LIMIT_RANGE: [ -75.0, -75.0, -2.0, 75.0, 75.0, 4.0 ]
MAX_OBJ_PER_SAMPLE: 500 #500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7 #0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 400
DEBUG_LABEL_HIST: True
ROI_HEAD:
NAME: VoxelRCNNHead
CLASS_AGNOSTIC: True #True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.65 # 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 4096 #1024
NMS_POST_MAXSIZE: 512 #100
NMS_THRESH: 0.6 #0.7
ROI_GRID_POOL:
FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4']
PRE_MLP: True
GRID_SIZE: 8
POOL_LAYERS:
x_conv2:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 0.4 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
x_conv3:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 0.8 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
x_conv4:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 1.6 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 192
FG_RATIO: 0.5
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.70
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.14 #0.1 #0.2 #0.12
OUTPUT_RAW_SCORE: False
EVAL_METRIC: waymo
NMS_CONFIG:
MULTI_CLASSES_NMS: False #False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.3 #0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.001
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.35
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
#voxelNext_custom_.yaml
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/custom_av_dataset.yaml
MODEL:
NAME: VoxelNeXt
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8xVoxelNeXt
SPCONV_KERNEL_SIZES: [5, 5, 3, 3]
OUT_CHANNEL: 256 #256
CHANNELS: [32, 64, 128, 256, 256] # 마지막 256
DENSE_HEAD:
NAME: VoxelNeXtHead
IOU_BRANCH: True #True
CLASS_AGNOSTIC: False
INPUT_FEATURES: 256
CLASS_NAMES_EACH_HEAD: [
['Vehicle', 'Pedestrian', 'Cyclist']
]
SHARED_CONV_CHANNEL: 256 #256
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2 # 2 VoxelNext-K3에서 3x3으로 변형했는데 2퍼 상승됨
SEPARATE_HEAD_CFG:
HEAD_ORDER: ['center', 'center_z', 'dim', 'rot']
HEAD_DICT: {
'center': {'out_channels': 2, 'num_conv': 2},
'center_z': {'out_channels': 1, 'num_conv': 2},
'dim': {'out_channels': 3, 'num_conv': 2},
'rot': {'out_channels': 2, 'num_conv': 2},
'iou': {'out_channels': 1, 'num_conv': 2},
}
RECTIFIER: [0.68, 0.71, 0.65]
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.2 # 0.1 -> 0.2 -> 0.1
MIN_RADIUS: 2 # 2 -> 3 -> 2
LOSS_CONFIG:
CLASS_LOSS_WEIGHTS: [1.5, 3.0, 2.5] # 2.0 1.7
LOSS_WEIGHTS: {
'cls_weight': 1.3,
'loc_weight': 2.0,
iou_weight: 1.5,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
SCORE_THRESH: 0.1 # 0.1 -> 0.2~0.3
POST_CENTER_LIMIT_RANGE: [-75.0, -75.0, -2.0, 75.0, 75.0, 4.0]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_THRESH: [0.65, 0.7, 0.7] #0.7
NMS_PRE_MAXSIZE: [2048, 1024, 1024] #4096 #[4096]
NMS_POST_MAXSIZE: [200, 150, 150] #500
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
EVAL_METRIC: waymo
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 1
NUM_EPOCHS: 40
OPTIMIZER: adam_onecycle
LR: 0.0015 #0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.35
DIV_FACTOR: 10
DECAY_STEP_LIST: [15, 25]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: True
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
#voxelNext_custom_av_dataset.yaml
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/custom_av_dataset.yaml
MODEL:
NAME: VoxelNeXt
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8xVoxelNeXt
SPCONV_KERNEL_SIZES: [5, 5, 3, 3]
OUT_CHANNEL: 256 #256
CHANNELS: [32, 64, 128, 256, 256] # 마지막 256
DENSE_HEAD:
NAME: VoxelNeXtHead
IOU_BRANCH: True #True
CLASS_AGNOSTIC: False
INPUT_FEATURES: 256
CLASS_NAMES_EACH_HEAD: [
['Vehicle', 'Pedestrian', 'Cyclist']
]
SHARED_CONV_CHANNEL: 256 #256
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2 # 2 VoxelNext-K3에서 3x3으로 변형했는데 2퍼 상승됨
SEPARATE_HEAD_CFG:
HEAD_ORDER: ['center', 'center_z', 'dim', 'rot']
HEAD_DICT: {
'center': {'out_channels': 2, 'num_conv': 2},
'center_z': {'out_channels': 1, 'num_conv': 2},
'dim': {'out_channels': 3, 'num_conv': 2},
'rot': {'out_channels': 2, 'num_conv': 2},
'iou': {'out_channels': 1, 'num_conv': 2},
}
RECTIFIER: [0.68, 0.71, 0.65]
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.2 # 0.1 -> 0.2 -> 0.1
MIN_RADIUS: 2 # 2 -> 3 -> 2
LOSS_CONFIG:
CLASS_LOSS_WEIGHTS: [1.5, 3.0, 2.5] # 2.0 1.7
# - Pedestrian/Cyclist의 long-tail 보정: 분류 히트맵 손실에 더 큰 가중
# - Vehicle은 충분히 자주 등장하므로 1.0 유지
# FOCAL_ALPHA: 0.25 # 클래스 불균형 완화(포컬 논문 기본값)
# FOCAL_GAMMA: 2.0 # hard-example에 더 집중
LOSS_WEIGHTS: {
'cls_weight': 1.3,
'loc_weight': 2.0,
iou_weight: 1.5,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
# LOSS_CONFIG:
# LOSS_WEIGHTS: {
# 'cls_weight': 1.5, # 1.0 → 1.5 : coarse grid에서 분류 신호 강화
# 'loc_weight': 1.5, # 2.0 → 1.5 : 과도한 회귀 가중 완화(불안정성 감소)
# # [cx, cy, cz, w, l, h, rot_sin, rot_cos] 가정
# 'code_weights': [0.75, 0.75, 1.0, 1.2, 1.2, 1.2, 1.1, 1.1]
# # - center xy: 0.75로 약간 낮춤(양자화 한계 고려)
# # - dim / rot: ↑ (IoU 개선에 직결, AP/APH 방어)
# }
POST_PROCESSING:
SCORE_THRESH: 0.1 # 0.1 -> 0.2~0.3
POST_CENTER_LIMIT_RANGE: [-75.0, -75.0, -2.0, 75.0, 75.0, 4.0]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_THRESH: [0.65, 0.7, 0.7] #0.7
NMS_PRE_MAXSIZE: [2048, 1024, 1024] #4096 #[4096]
NMS_POST_MAXSIZE: [200, 150, 150] #500
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
EVAL_METRIC: waymo
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 1
NUM_EPOCHS: 40
OPTIMIZER: adam_onecycle
LR: 0.0015 #0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.35
DIV_FACTOR: 10
DECAY_STEP_LIST: [15, 25]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: True
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
제출한 voxelNext 모델은 IOU_BREANCH를 False로 하여 학습을 진행하였다. 남은 시간이 촉박하여 모델을 경량화 하여 성능을 확인하기 위해서 그런 것이고 현재 학습하고 있는 voxelNext 모델은 IOU_BREANCH를 True로 옵션을 줘서 제대로 학습을 진행하고 있다.
#49.2 제출결과
#49.6 제출결과
#45점 voxelNext 제출결과
| 성능지표 (2024) | 베이스라인 | voxelNext | Voxel_RCNN_60epoch | Voxel_RCNN_75epoch | 49점 | 49.6 | voxelNext46 |
| VEHICLE_AP/L1 | 0.8911 | 0.8255 | 0.8492 | 0.8560 | 0.8893 | 0.8964 | 0.8624 |
| VEHICLE_AP/L2 | 0.8801 | 0.8069 | 0.8366 | 0.8389 | 0.8777 | 0.8802 | 0.8498 |
| PEDESTRIAN_AP/L1 | 0.9023 | 0.6777 | 0.7389 | 0.7501 | 0.8203 | 0.8267 | 0.7520 |
| PEDESTRIAN_AP/L2 | 0.8920 | 0.6562 | 0.7231 | 0.7357 | 0.8056 | 0.8106 | 0.7416 |
| CYCLIST_AP/L1 | 0.8962 | 0.7923 | 0.8173 | 0.8231 | 0.9059 | 0.9103 | 0.8214 |
| CYCLIST_AP/L2 | 0.8829 | 0.7732 | 0.8042 | 0.8146 | 0.8914 | 0.8984 | 0.8110 |
| 추론시간(4070SUPERTi) | 47ms | 115ms | 48ms | 48ms | 65ms | 77ms | 180ms |
| 성능지표 (2025) | 베이스라인 | voxelNext | Voxel_RCNN_60epoch | Voxel_RCNN_75epoch | 49점 | 49.6 | voxelNext 46점 |
| VEHICLE_AP/L1 | 0.8611 | 0.7980 | 0.8250 | 0.8290 | 0.8642 | 0.8534 | 0.8156 |
| VEHICLE_AP/L2 | 0.8433 | 0.7763 | 0.8051 | 0.8091 | 0.8456 | 0.8346 | 0.7986 |
| PEDESTRIAN_AP/L1 | 0.8396 | 0.6245 | 0.6391 | 0.6551 | 0.7438 | 0.7264 | 0.6742 |
| PEDESTRIAN_AP/L2 | 0.8185 | 0.6113 | 0.6145 | 0.6298 | 0.7189 | 0.7014 | 0.6523 |
| CYCLIST_AP/L1 | 0.8784 | 0.7743 | 0.8108 | 0.8145 | 0.8828 | 0.8830 | 0.8677 |
| CYCLIST_AP/L2 | 0.8600 | 0.7642 | 0.7911 | 0.7945 | 0.8627 | 0.8622 | 0.8513 |
| 추론시간(4070SUPERTi) | 47ms | 115ms | 48ms | 48ms | 65ms | 77ms | 180ms |
