[자율주행 AI 챌린지] test_dataset 분석 및 모델 훈련 보고서

# test_dataset 시각화 코드 ----------------------------------------------------------------------------------------------------------------------------------------import open3d as o3dimport numpy as npimport osimport timeimport pickleimport argparse try:    import yaml    _HAS_YAML = Trueexcept Exception:    _HAS_YAML = False DATA_PATH       = "./data/custom_av"FRAME_LIST_FILE = os.path.join(DATA_PATH, "ImageSets", "test.txt")POINTS_FOLDER   = os.path.join(DATA_PATH, "points")RESULT_PKL      = "./voxel_rcnn_result.pkl"     VIEW_FILE       = os.path.join(DATA_PATH, "view.json") CLASS_COLOR = {    "Vehicle":    [1.0, 0.0, 0.0],    "Pedestrian": [0.0, 1.0, 0.0],    "Cyclist":    [0.0, 0.0, 1.0],}LABEL_COLOR = {      1: [1.0, 0.0, 0.0],    2: [0.0, 1.0, 0.0],    3: [0.0, 0.0, 1.0],} def read_thresholds_from_cfg(cfg_path):    """    cfg YAML에서 (score_thresh, nms_iou) 추출.    - MODEL.POST_PROCESSING.SCORE_THRESH    - MODEL.POST_PROCESSING.NMS_CONFIG.NMS_THRESH    클래스별 리스트일 경우 평균값 사용.    """    score = None    nms_iou = None    if not (_HAS_YAML and cfg_path and os.path.exists(cfg_path)):        return score, nms_iou    try:        with open(cfg_path, 'r') as f:            y = yaml.safe_load(f) or {}        pp = ((y.get('MODEL') or {}).get('POST_PROCESSING') or {})        s = pp.get('SCORE_THRESH', None) or pp.get('SCORE_THRESH_LIST', None)        if isinstance(s, (list, tuple)):            score = float(np.mean(s))        elif s is not None:            score = float(s)        nc = pp.get('NMS_CONFIG', {}) or {}        nt = nc.get('NMS_THRESH', None) or nc.get('NMS_THRESH_TEST', None) or nc.get('NMS_THRESH_LIST', None)        if isinstance(nt, (list, tuple)):            nms_iou = float(np.mean(nt))        elif nt is not None:            nms_iou = float(nt)    except Exception as e:        print(f"[WARN] cfg read failed: {e}")    return score, nms_iou def parse_args_and_apply_cfg():    global SCORE_THR, NMS_IOU_THR, RESULT_PKL, DATA_PATH, FRAME_LIST_FILE, POINTS_FOLDER, VIEW_FILE    ap = argparse.ArgumentParser()    ap.add_argument("--cfg_file", type=str, default=None, help="OpenPCDet cfg yaml")    ap.add_argument("--score_thr", type=float, default=None, help="override score threshold")    ap.add_argument("--nms_iou", type=float, default=None, help="override NMS IoU threshold (BEV AABB)")    ap.add_argument("--result_pkl", type=str, default=RESULT_PKL)    ap.add_argument("--data_path", type=str, default=DATA_PATH)    ap.add_argument("--frame_list", type=str, default=None, help="path to test.txt (override)")    args, _ = ap.parse_known_args()     s, n = read_thresholds_from_cfg(args.cfg_file)    if s is not None and args.score_thr is None:        SCORE_THR = s    if n is not None and args.nms_iou is None:        NMS_IOU_THR = n     if args.score_thr is not None:        SCORE_THR = args.score_thr    if args.nms_iou is not None:        NMS_IOU_THR = args.nms_iou     RESULT_PKL = args.result_pkl    DATA_PATH = args.data_path    POINTS_FOLDER = os.path.join(DATA_PATH, "points")    VIEW_FILE = os.path.join(DATA_PATH, "view.json")     global frame_ids    frame_list = args.frame_list or os.path.join(DATA_PATH, "ImageSets", "test.txt")    with open(frame_list, 'r') as f:        frame_ids = [line.strip() for line in f.readlines()]     print(f"[INFO] SCORE_THR={SCORE_THR:.3f}, NMS_IOU_THR={NMS_IOU_THR:.3f}")    print(f"[INFO] RESULT_PKL={RESULT_PKL}")    print(f"[INFO] DATA_PATH={DATA_PATH}, POINTS_FOLDER={POINTS_FOLDER}")    return args #포인트 로더 def load_npy_pointcloud(file_path):    points = np.load(file_path)      xyz = points[:, :3]    pcd = o3d.geometry.PointCloud()    pcd.points = o3d.utility.Vector3dVector(xyz)    pcd.paint_uniform_color([1.0, 1.0, 1.0])      return pcd, xyz.shape[0] def create_bbox(center, size, yaw, color):    R = o3d.geometry.get_rotation_matrix_from_axis_angle([0, 0, yaw])    box = o3d.geometry.OrientedBoundingBox(center, R, size)    box.color = color    return box def load_predictions(pkl_path):    with open(pkl_path, 'rb') as f:        det_annos = pickle.load(f)     return {str(anno['frame_id']): anno for anno in det_annos} def boxes_lidar_to_bev_aabb_xyxy(boxes_lidar):    """    boxes_lidar: (N,7) [x,y,z,dx,dy,dz,yaw]    → (N,4) [x1,y1,x2,y2]  (회전 박스의 AABB)    """    out = np.zeros((len(boxes_lidar), 4), dtype=np.float32)    for i, (x, y, z, dx, dy, dz, yaw) in enumerate(boxes_lidar):        hx, hy = dx / 2.0, dy / 2.0        corners = np.array([[ hx,  hy],                            [ hx, -hy],                            [-hx,  hy],                            [-hx, -hy]], dtype=np.float32)        c, s = np.cos(yaw), np.sin(yaw)        R = np.array([[c, -s],                      [s,  c]], dtype=np.float32)        pts = (corners @ R.T) + np.array([x, y], dtype=np.float32)        x1, y1 = pts[:, 0].min(), pts[:, 1].min()        x2, y2 = pts[:, 0].max(), pts[:, 1].max()        out[i] = [x1, y1, x2, y2]    return out def iou_xyxy(a, b):    """    a: (4,), b: (M,4)  → IoU: (M,)    """    xx1 = np.maximum(a[0], b[:, 0])    yy1 = np.maximum(a[1], b[:, 1])    xx2 = np.minimum(a[2], b[:, 2])    yy2 = np.minimum(a[3], b[:, 3])    w = np.maximum(0.0, xx2 - xx1)    h = np.maximum(0.0, yy2 - yy1)    inter = w * h    area_a = (a[2] - a[0]) * (a[3] - a[1]) + 1e-6    area_b = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) + 1e-6    return inter / (area_a + area_b - inter + 1e-6) def nms_bev_aabb(boxes_xyxy, scores, iou_thr):    if len(boxes_xyxy) == 0:        return []    order = np.argsort(scores)[::-1]    keep = []    while order.size > 0:        i = order[0]        keep.append(i)        if order.size == 1:            break        ious = iou_xyxy(boxes_xyxy[i], boxes_xyxy[order[1:]])        remain = np.where(ious <= iou_thr)[0]        order = order[1:][remain]    return keep def visualize_frames():    args = parse_args_and_apply_cfg()     vis = o3d.visualization.VisualizerWithKeyCallback()    if not vis.create_window(window_name='LiDAR Viewer'):        print("[ERROR] Failed to create Open3D window.")        return    opt = vis.get_render_option()    opt.point_size = 1.0    opt.background_color = np.array([0.0, 0.0, 0.0])     # 예측 결과 로드    try:        preds_by_frame = load_predictions(RESULT_PKL)    except Exception as e:        print(f"[ERROR] Failed to read {RESULT_PKL}: {e}")        return     frame_idx = 0    last_key_time = 0    key_delay = 0.1  # seconds    coord = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=[0, 0, 0])     def debounce():        nonlocal last_key_time        now = time.time()        if now - last_key_time >= key_delay:            last_key_time = now            return True        return False    def update_scene():        vis.clear_geometries()         frame_id = frame_ids[frame_idx]        pc_path = os.path.join(POINTS_FOLDER, f"{frame_id}.npy")         # 포인트        if not os.path.exists(pc_path):            print(f"[WARN] point file not found: {pc_path}")            num_pts = 0        else:            pcd, num_pts = load_npy_pointcloud(pc_path)            vis.add_geometry(pcd)         # 예측        anno = preds_by_frame.get(str(frame_id))        if anno is None:            alt = str(frame_id).lstrip('0') or "0"            anno = preds_by_frame.get(alt)         num_boxes_raw = 0        num_boxes_after_score = 0        num_boxes_after_nms = 0         if anno is None:            print(f"[WARN] No prediction found for frame_id={frame_id}")        else:            boxes  = np.asarray(anno.get('boxes_lidar', []), dtype=np.float32)  # (N,7)            names  = np.asarray(anno.get('name', []))            scores = np.asarray(anno.get('score', []), dtype=np.float32)            labels = np.asarray(anno.get('pred_labels', []), dtype=np.int32)             num_boxes_raw = len(boxes)             # 1) score threshold            keep_score = np.arange(num_boxes_raw)            if scores.size:                keep_score = np.where(scores >= float(SCORE_THR))[0]            boxes_f  = boxes[keep_score]            scores_f = scores[keep_score] if scores.size else np.ones((len(keep_score),), dtype=np.float32)            names_f  = names[keep_score]  if names.size  else []            labels_f = labels[keep_score] if labels.size else []            num_boxes_after_score = len(boxes_f)                    keep_nms = np.arange(len(boxes_f))            if len(boxes_f) and NMS_IOU_THR is not None and NMS_IOU_THR > 0:                aabbs = boxes_lidar_to_bev_aabb_xyxy(boxes_f)                keep_nms = nms_bev_aabb(aabbs, scores_f, float(NMS_IOU_THR))            boxes_v  = boxes_f[keep_nms]            scores_v = scores_f[keep_nms]            names_v  = names_f[keep_nms]  if len(names_f)  else []            labels_v = labels_f[keep_nms] if len(labels_f) else []            num_boxes_after_nms = len(boxes_v)             #시각화            for i in range(len(boxes_v)):                x, y, z, dx, dy, dz, yaw = boxes_v[i].tolist()                if len(names_v):                    color = CLASS_COLOR.get(str(names_v[i]), [1, 0, 0])                elif len(labels_v):                    color = LABEL_COLOR.get(int(labels_v[i]), [1, 0, 0])                else:                    color = [1, 0, 0]                vis.add_geometry(create_bbox([x, y, z], [dx, dy, dz], yaw, color))                vis.add_geometry(create_heading_arrow([x, y, z], yaw))         vis.add_geometry(coord)        vis.poll_events()        vis.update_renderer()         if os.path.exists(VIEW_FILE):            try:                params = o3d.io.read_pinhole_camera_parameters(VIEW_FILE)                vis.get_view_control().convert_from_pinhole_camera_parameters(params)            except Exception:                pass         # --- 콘솔 출력: 프레임/포인트/박스 통계 ---        print(f"[Frame {frame_idx+1}/{len(frame_ids)}] id={frame_id} | points={num_pts} "              f"| boxes raw={num_boxes_raw}, after_score={num_boxes_after_score}, after_nms={num_boxes_after_nms}")     def next_frame(_):        nonlocal frame_idx        if debounce():            frame_idx = (frame_idx + 1) % len(frame_ids)            update_scene()        return False     def prev_frame(_):        nonlocal frame_idx        if debounce():            frame_idx = (frame_idx - 1 + len(frame_ids)) % len(frame_ids)            update_scene()        return False     def save_view(_):        params = vis.get_view_control().convert_to_pinhole_camera_parameters()        o3d.io.write_pinhole_camera_parameters(VIEW_FILE, params)        print(f"Viewpoint saved to {VIEW_FILE}")        return False     def quit_viewer(_):        print("Quitting viewer.")        vis.close()        return False     vis.register_key_callback(ord("D"), next_frame)    vis.register_key_callback(ord("A"), prev_frame)    vis.register_key_callback(ord("F"), save_view)    vis.register_key_callback(ord("Q"), quit_viewer)     update_scene()    vis.run() if __name__ == "__main__":    visualize_frames() ----------------------------------------------------------------------------------------------------------------------------------------<div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/ccb264033b01e767ad04c6e2823913fa0c0284aa_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/ccb264033b01e767ad04c6e2823913fa0c0284aa_re_1758530276921" data-origin-width="1912" data-origin-height="1044"><div class="figcaption">test_data1</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/a0b192c521a75ea9c7ce8650c13ef3b8d2257f96_re_1758530276922" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/a0b192c521a75ea9c7ce8650c13ef3b8d2257f96_re_1758530276922" data-origin-width="1906" data-origin-height="1051"><div class="figcaption">test_data2</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/27a913c2d2e85d1f0a2842055b27a74a4475bf82_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/27a913c2d2e85d1f0a2842055b27a74a4475bf82_re_1758530276921" data-origin-width="1913" data-origin-height="1074"><div class="figcaption">test_data3</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/6072b4d45efd106b7a0a0d6d5f895a673eb72686_re_1758530276920" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/6072b4d45efd106b7a0a0d6d5f895a673eb72686_re_1758530276920" data-origin-width="1907" data-origin-height="984"><div class="figcaption">test_data4</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/0248e4bca177cff214bc2d6f9a9b7937b3446098_re_1758530276920" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/0248e4bca177cff214bc2d6f9a9b7937b3446098_re_1758530276920" data-origin-width="1903" data-origin-height="1044"><div class="figcaption">test_data5</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/a5d4ec7172d14bbbbb3a3e2b250ffa41412a4a9c_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/a5d4ec7172d14bbbbb3a3e2b250ffa41412a4a9c_re_1758530276918" data-origin-width="1902" data-origin-height="1051"><div class="figcaption">test_data6</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/fb326340a982e1d062bfbc99f5030f36f34f2832_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/fb326340a982e1d062bfbc99f5030f36f34f2832_re_1758530276918" data-origin-width="1880" data-origin-height="1050"><div class="figcaption">test_data7</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/08971249faeffbde0b56647583cb03ac0ca40d7c_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/08971249faeffbde0b56647583cb03ac0ca40d7c_re_1758530276918" data-origin-width="927" data-origin-height="743"></div>boxes_raw=해당 프레임에서 예측한 원래 예측상자의 개수 after_score= 점수 임계값으로 걸러낸뒤 남은 상자의 개수 after_NMS= 점수 필터후 상자들에 NMS 적용한 뒤 남은 최종 상자 수 raw->after_score의 감소폭이 큰것으로 보아 모델의 예측점수가 낮은 상자를 많이 생산하는것으로 분석할수있다. 때문에 score_thresh를 조정하여 recall값을 올리는 방법을 생각하고 있다. 위 그림에서 볼 수 있듯이, 테스트 데이터셋은 주로 도심지 거리 환경에서 수집된 것으로 추측할 수 있다. 교차로와 건물로 추정되는 포인트 클라우드가 보이며 대부분 차량이 많은 것으로 보이며 보행자와 자전거등도 종종 출현한다. 또한 test데이터셋은 waymo데이터셋과 비슷하게 프레임이 연속적으로 이어지며 수십개의 프레임 이후 다른 장면으로 전환되는 것을 확인하였다. 때문에 장면이 여러개 지나갈때 이전에 정답으로 인식한 객체들을 계속 정답으로 예측해야 점수가 높게 나올것으로 보인다. 현재 Voxel_rcnn으로 테스트 데이터셋에 대해 예측결과를 시각화한 결과 여러개의 프레임이 지나가면 이전에 정답으로 인식했던 객체들을 잘 찾지 못하는 것을 확인했다. 또한 소형 객체(보행자,자전거)에 대해서는 중복박스가 많아 NMS_THRESH를 조정하여 점수를 올릴 수있을 것으로 보인다. <div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/c49c980145935233c986669ba6cc7e3c08c417fa_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/c49c980145935233c986669ba6cc7e3c08c417fa_re_1758530276921" data-origin-width="1906" data-origin-height="1050"><div class="figcaption">train_data1</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/73fa531143f79396d6552ba7457a9ba877608876_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/73fa531143f79396d6552ba7457a9ba877608876_re_1758530276921" data-origin-width="1915" data-origin-height="1071"><div class="figcaption">train_data2</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/c9a19d6feb27c9c4ef5f18085d687c388a2aca68_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/c9a19d6feb27c9c4ef5f18085d687c388a2aca68_re_1758530276921" data-origin-width="1910" data-origin-height="1049"><div class="figcaption">train_data3</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/fc0f4b5a6e7bbe40a5a4e15dc7b83687b2edf412_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/fc0f4b5a6e7bbe40a5a4e15dc7b83687b2edf412_re_1758530276921" data-origin-width="1902" data-origin-height="1041"><div class="figcaption">train_data4</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/616847979933e86b74f938ebd3edd2cfe996c8bc_re_1758530276921" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/616847979933e86b74f938ebd3edd2cfe996c8bc_re_1758530276921" data-origin-width="1907" data-origin-height="1041"><div class="figcaption">train_data5</div></div> <div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/08ae638870d737b38d2e4686521a49c99d88e6d1_re_1758530276922" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/08ae638870d737b38d2e4686521a49c99d88e6d1_re_1758530276922" data-origin-width="713" data-origin-height="465"><div class="figcaption">pointcloud분포도</div></div>train 데이터셋은 위 사진에서 볼 수 있듯이 64ch,128ch 두 라이다가 섞여 있는 데이터로 시각화 하여 정답(GT)라벨에 대해 바운딩 박스를 시각화 한 결과를 담고 있다. 데이터를 들여다 보면 위의 EDA결과 그래프와 같이 포인트 개수가 상이하여 학습 단계에서 test데이터셋(64ch로 생각)에 대해 어떻게 학습을 할 것인지가 중요한 key point같다.  모의 테스트 데이터셋에 사용된 라이다<div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/03a9feba697473007d76b694af13cc07e5a2739d" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/03a9feba697473007d76b694af13cc07e5a2739d" data-origin-width="768" data-origin-height="525"><div class="figcaption">사양표</div></div>채널:64ch 측정 범위/정확도: 0.3~200(m),정확도+-5cm/+-cm(1-200)FOV: 수평 360도, 수직40도(-25~15)각 해상도(수평/수직): 수평 0.2도(10 Hz), 0.4도(20 Hz) 수직은 채널에 따라 비균일주요 구간 0.167°(Ch 6~54)주기:10Hz 리턴모드: 싱글리턴    이번에 추가한 증강기법<div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/a191a98f6d0778084a0c7cabec4bdbfb5893bc0d" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/a191a98f6d0778084a0c7cabec4bdbfb5893bc0d" data-origin-width="452" data-origin-height="339"><div class="figcaption">dropout기법 적용이유</div></div> random_points_dropout: 일정 비율의 포인트를 임의로 제거하여 저밀도 데이터(64 ch) 를 모사하기 위해서 추가함현재 훈련데이터셋과 테스트데이터셋의 포인트 클라우드의 개수가 상이하여 포인트 밀도의 차이가 큰 것을 확인한 상황이다. 이 문제점을 극복하기 위하여 선택한 기법이 random_points_dropout이다. 이 증강기법은 domain gap을 줄여 64ch를 모방할 수 있는 환경으로 만들어 준다. <div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/9b321017a86cde54d4b6bec6818aed63d5e3c4ad" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/9b321017a86cde54d4b6bec6818aed63d5e3c4ad" data-origin-width="366" data-origin-height="462"><div class="figcaption">resampling을 해야하는 이유</div></div> line_downsample: 포인트 클라우드에서 수직 스캔 라인을 일정 비율로 제거하여 128 ch 데이터를 64 ch 수준으로 근사화 하기 위한 증강기법이다. 128ch의 라이다 데이터는 동일한 범위에서 더 많은 수직 스캔 라인을 포함한다. 단순한 points_dropout으로는 실제 64ch의 라이다의 구조적 특징을 재현하기 어렵기 때문에 채택을 하였다. 위 첨부사진을 보면 센서마다 레이저 층 수가 다르고 다른 레이저 층을 갖는 센서 사이의 성능 손실을 줄이기 위해 포인트 클라우드를 resampling을 해야한다 라고 주장하고 있다. 즉 domain gap 상황을 해결하기 위한 방법을 제시한 논문이다. <div class="figure-file" data-ke-type="file" data-file-src="https://t1.daumcdn.net/cafeattach/1RgNt/a39a6cf18919dc2e0d0e0cd7374ada349b9924d4?download" data-file-name="2311.10845v2.pdf" data-file-size="7189428" data-mimetype="application/pdf" data-ke-align="alignCenter"><div class="image"></div><div class="desc"><div class="filename">2311.10845v2.pdf</div><div class="size">6.86MB</div></div></div> Voxel_RCNN 훈련시 변경사항 VOXEL_SIZE [0.08, 0.08, 0.15] 조정 GAUSSIAN_OVERLAP 0.2, MIN_RADIUS 1LOSS_WEIGHTS cls_weight=2.0, loc_weight=2.0 클래스 불균형 해결방법으로 cls_weight를 조정SCORE_THRESH 0.15, NMS_THRESH 0.6 (DenseHead) / 0.65 (ROI Head) NMS와 score를 조정(시각화 하여 계속 조절할 예정) ROI_GRID_POOL.GRID_SIZE 8 ,RoI Pooling 해상도 증가:(PV-RCNN++논문에서는 6->8은 큰 이득은 없다고 하였지만 현재 소형 객체에 대한 recall값이 절실한 상황이라 8로 조정ROI_PER_IMAGE 192, CLS_FG_THRESH 0.70: ROI의 샘플수를 늘리고 foreground IoU임계값을 0.7로 낮춰 보행자와 자전거 클래스에 대한 긍정 샘플을 더 확보하기 위해 조정----------------------------------------------------------------------------------------------------------------------------------------<div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/0fabfd4f29d6f2ccd2f5500370b618dd3ccf8b2b_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/0fabfd4f29d6f2ccd2f5500370b618dd3ccf8b2b_re_1758530276918" data-origin-width="795" data-origin-height="833"><div class="figcaption">2025_voxelNext loss</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/01f6f633f2e7ea03a2659103dab8df9b67f18cc2_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/01f6f633f2e7ea03a2659103dab8df9b67f18cc2_re_1758530276918" data-origin-width="1160" data-origin-height="565"><div class="figcaption">2025_val데이터셋으로 검증 결과 voxelNext</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/7ec125ae3101018dad6ab7ffcce1705a0987b6ee_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/7ec125ae3101018dad6ab7ffcce1705a0987b6ee_re_1758530276918" data-origin-width="1154" data-origin-height="546"><div class="figcaption">2025_baseline_model 검증결과</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/4ea79c23a4585d1192339c5009e97db25a1ac9af" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/4ea79c23a4585d1192339c5009e97db25a1ac9af" data-origin-width="480" data-origin-height="480"><div class="figcaption">voxel_rcnn_60epoch 모의데이터셋 검증결과</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/eccbb202ee8a99ca90e22045140842372d7673cf" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/eccbb202ee8a99ca90e22045140842372d7673cf" data-origin-width="1090" data-origin-height="504"><div class="figcaption">2025_voxel_rcnn_60epoch검증 결과</div></div> <div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/e8ebabbf3164f5a65eeabe3b589a878b8069f798_re_1758530276918" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/e8ebabbf3164f5a65eeabe3b589a878b8069f798_re_1758530276918" data-origin-width="796" data-origin-height="1085"><div class="figcaption">2025_voxel_rcnn loss</div></div>#voxel_rcnn-custom.yaml ------------------------------------------------------------------------------------------------------------------------CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist'] DATA_CONFIG:    _BASE_CONFIG_: cfgs/dataset_configs/custom_av_dataset.yaml    DATA_PROCESSOR:        -   NAME: mask_points_and_boxes_outside_range            REMOVE_OUTSIDE_BOXES: True            STRICT_MASK: True         -   NAME: shuffle_points            SHUFFLE_ENABLED: {                'train': True,                'test': True            }         -   NAME: transform_points_to_voxels_placeholder            VOXEL_SIZE: [ 0.08, 0.08, 0.15 ] #0.1 0.15 MODEL:    NAME: VoxelRCNN     VFE:        NAME: DynMeanVFE     BACKBONE_3D:        NAME: VoxelBackBone8x     MAP_TO_BEV:        NAME: HeightCompression        NUM_BEV_FEATURES: 256     BACKBONE_2D:        NAME: BaseBEVBackbone         LAYER_NUMS: [5, 5]        LAYER_STRIDES: [1, 2]        NUM_FILTERS: [128, 256]        UPSAMPLE_STRIDES: [1, 2]        NUM_UPSAMPLE_FILTERS: [256, 256]     DENSE_HEAD:        NAME: CenterHead        CLASS_AGNOSTIC: False         CLASS_NAMES_EACH_HEAD: [             [ 'Vehicle', 'Pedestrian', 'Cyclist' ]        ]         SHARED_CONV_CHANNEL: 64        USE_BIAS_BEFORE_NORM: True        NUM_HM_CONV: 2        SEPARATE_HEAD_CFG:            HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]            HEAD_DICT: {                'center': { 'out_channels': 2, 'num_conv': 2 },                'center_z': { 'out_channels': 1, 'num_conv': 2 },                'dim': { 'out_channels': 3, 'num_conv': 2 },                'rot': { 'out_channels': 2, 'num_conv': 2 },            }         TARGET_ASSIGNER_CONFIG:            FEATURE_MAP_STRIDE: 8            NUM_MAX_OBJS: 500            GAUSSIAN_OVERLAP: 0.2             MIN_RADIUS: 1          LOSS_CONFIG:            LOSS_WEIGHTS: {                'cls_weight': 2.0,                 'loc_weight': 2.0,                 'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]            }         POST_PROCESSING:            SCORE_THRESH: 0.15             POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]            MAX_OBJ_PER_SAMPLE: 800             NMS_CONFIG:                NMS_TYPE: nms_gpu                NMS_THRESH: 0.6                 NMS_PRE_MAXSIZE: 4096                NMS_POST_MAXSIZE: 500         DEBUG_LABEL_HIST: True    ROI_HEAD:        NAME: VoxelRCNNHead        CLASS_AGNOSTIC: True          SHARED_FC: [256, 256]        CLS_FC: [256, 256]        REG_FC: [256, 256]        DP_RATIO: 0.3         NMS_CONFIG:            TRAIN:                NMS_TYPE: nms_gpu                MULTI_CLASSES_NMS: False                NMS_PRE_MAXSIZE: 9000                NMS_POST_MAXSIZE: 512                NMS_THRESH: 0.7             TEST:                NMS_TYPE: nms_gpu                MULTI_CLASSES_NMS: False                NMS_PRE_MAXSIZE: 4096                 NMS_POST_MAXSIZE: 512                 NMS_THRESH: 0.65                         ROI_GRID_POOL:            FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4']            PRE_MLP: True            GRID_SIZE: 8            POOL_LAYERS:                x_conv2:                    MLPS: [ [ 64, 64 ] ]                    QUERY_RANGES: [ [ 3, 3, 2 ] ]                    POOL_RADIUS: [ 0.4 ]                    NSAMPLE: [ 16 ]                    POOL_METHOD: max_pool                x_conv3:                    MLPS: [ [ 64, 64 ] ]                    QUERY_RANGES: [ [ 3, 3, 2 ] ]                    POOL_RADIUS: [ 0.8 ]                    NSAMPLE: [ 16 ]                    POOL_METHOD: max_pool                x_conv4:                    MLPS: [ [ 64, 64 ] ]                    QUERY_RANGES: [ [ 3, 3, 2 ] ]                    POOL_RADIUS: [ 1.6 ]                    NSAMPLE: [ 16 ]                    POOL_METHOD: max_pool         TARGET_CONFIG:            BOX_CODER: ResidualCoder            ROI_PER_IMAGE: 192            FG_RATIO: 0.5                       CLS_SCORE_TYPE: roi_iou             CLS_FG_THRESH: 0.70             CLS_BG_THRESH: 0.25            CLS_BG_THRESH_LO: 0.1            HARD_BG_RATIO: 0.8             REG_FG_THRESH: 0.55          LOSS_CONFIG:            CLS_LOSS: BinaryCrossEntropy            REG_LOSS: smooth-l1            CORNER_LOSS_REGULARIZATION: True            LOSS_WEIGHTS: {                'rcnn_cls_weight': 1.0,                'rcnn_reg_weight': 1.0,                'rcnn_corner_weight': 1.0,                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]            }     POST_PROCESSING:        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]        SCORE_THRESH: 0.15 #0.1 #0.2        OUTPUT_RAW_SCORE: False         EVAL‎_METRIC: waymo         NMS_CONFIG:            MULTI_CLASSES_NMS: False  #False            NMS_TYPE: nms_gpu            NMS_THRESH: 0.65 #0.7            NMS_PRE_MAXSIZE: 4096            NMS_POST_MAXSIZE: 500 OPTIMIZATION:    BATCH_SIZE_PER_GPU: 2    NUM_EPOCHS: 60     OPTIMIZER: adam_onecycle    LR: 0.008     WEIGHT_DECAY: 0.001    MOMENTUM: 0.9     MOMS: [0.95, 0.85]    PCT_START: 0.35 #0.4    DIV_FACTOR: 10    DECAY_STEP_LIST: [35, 45]    LR_DECAY: 0.1    LR_CLIP: 0.0000001     LR_WARMUP: False    WARMUP_EPOCH: 1     GRAD_NORM_CLIP: 10------------------------------------------------------------------------------------------------------------------------ <div class="table-wrap"><table data-ke-type="table" data-ke-align="alignLeft" style="width: 100.252%; height: 145px;" border="1"><tbody><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">성능지표 (2024) </td><td style="width: 27.3804%; height: 18px;">베이스라인  </td><td style="width: 22.2874%; height: 18px;">voxelNext</td><td style="width: 26.9587%; height: 18px;">Voxel_RCNN_60epoch</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">VEHICLE_AP/L1 </td><td style="width: 27.3804%; height: 18px;">0.8911</td><td style="width: 22.2874%; height: 18px;">0.8255</td><td style="width: 26.9587%; height: 18px;">0.8492</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">VEHICLE_AP/L2</td><td style="width: 27.3804%; height: 18px;">0.8801</td><td style="width: 22.2874%; height: 18px;"> 0.8069</td><td style="width: 26.9587%; height: 18px;">0.8366</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">PEDESTRIAN_AP/L1</td><td style="width: 27.3804%; height: 18px;">0.9023</td><td style="width: 22.2874%; height: 18px;">0.6777</td><td style="width: 26.9587%; height: 18px;">0.7389</td></tr><tr style="height: 19px;"><td style="width: 23.8174%; height: 19px;">PEDESTRIAN_AP/L2</td><td style="width: 27.3804%; height: 19px;">0.8920</td><td style="width: 22.2874%; height: 19px;">0.6562</td><td style="width: 26.9587%; height: 19px;">0.7231</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">CYCLIST_AP/L1</td><td style="width: 27.3804%; height: 18px;">0.8962</td><td style="width: 22.2874%; height: 18px;">0.7923</td><td style="width: 26.9587%; height: 18px;">0.8173</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">CYCLIST_AP/L2</td><td style="width: 27.3804%; height: 18px;">0.8829</td><td style="width: 22.2874%; height: 18px;">0.7732</td><td style="width: 26.9587%; height: 18px;">0.8042</td></tr><tr style="height: 18px;"><td style="width: 23.8174%; height: 18px;">추론시간(4070SUPERTi)</td><td style="width: 27.3804%; height: 18px;">47ms</td><td style="width: 22.2874%; height: 18px;">연산중 오류 발생</td><td style="width: 26.9587%; height: 18px;">48ms</td></tr></tbody></table></div> <div class="table-wrap"><table data-ke-type="table" data-ke-align="alignLeft" style="width: 100%; height: 144px;" border="1"><tbody><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">성능지표  (2025)</td><td style="width: 26.1034%; height: 18px;">베이스라인  </td><td style="width: 21.1854%; height: 18px;">voxelNext</td><td style="width: 25.599%; height: 18px;">Voxel_RCNN_60epoch</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">VEHICLE_AP/L1 </td><td style="width: 26.1034%; height: 18px;">0.8611</td><td style="width: 21.1854%; height: 18px;">0.7980</td><td style="width: 25.599%; height: 18px;">0.8250</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">VEHICLE_AP/L2</td><td style="width: 26.1034%; height: 18px;">0.8433</td><td style="width: 21.1854%; height: 18px;"> 0.7763</td><td style="width: 25.599%; height: 18px;">0.8051</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">PEDESTRIAN_AP/L1</td><td style="width: 26.1034%; height: 18px;">0.8396</td><td style="width: 21.1854%; height: 18px;">0.6245</td><td style="width: 25.599%; height: 18px;">0.6391</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">PEDESTRIAN_AP/L2</td><td style="width: 26.1034%; height: 18px;">0.8185</td><td style="width: 21.1854%; height: 18px;">0.6113</td><td style="width: 25.599%; height: 18px;">0.6145</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">CYCLIST_AP/L1</td><td style="width: 26.1034%; height: 18px;">0.8784</td><td style="width: 21.1854%; height: 18px;">0.7743</td><td style="width: 25.599%; height: 18px;">0.8108</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">CYCLIST_AP/L2</td><td style="width: 26.1034%; height: 18px;">0.8600</td><td style="width: 21.1854%; height: 18px;">0.7642</td><td style="width: 25.599%; height: 18px;">0.7911</td></tr><tr style="height: 18px;"><td style="width: 22.3203%; height: 18px;">추론시간(4070SUPERTi)</td><td style="width: 26.1034%; height: 18px;">47ms</td><td style="width: 21.1854%; height: 18px;">연산중 오류 발생</td><td style="width: 25.599%; height: 18px;">48ms</td></tr></tbody></table></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/f02c0b629cfbd30769769d903efcba5283acd6a7" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/f02c0b629cfbd30769769d903efcba5283acd6a7" data-origin-width="1167" data-origin-height="445"><div class="figcaption">voxel_rcnn추론결과</div></div><div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/1RgNt/43649c0531e113b5e779be71db06c09dc2f7fdf3" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/1RgNt/43649c0531e113b5e779be71db06c09dc2f7fdf3" data-origin-width="1116" data-origin-height="539"><div class="figcaption">baseline추론시간</div></div>VoxelNext는 추론결과 2.21의 GFLOPs가 나오는데 thop패키가 spconv 연산을 skip or deny 하는것으로 보인다. 때문에 VoxelNext 모델은 과감하게 배제하였다. 현재 학습이 완료 되었으며 loss가 지속적으로 하락하고 있는 추세이다. voxel_rcnn으로 baseline모델의 검증 데이터와 비교하여 점수가 낮다.학습이 끝나고 점수차이가 여전히 심하면 모델을 pv_rcnn++ 모델로 교체하여 이전에 추가한 증강 기법과 head부분의 소형객체 탐지 증가 기법을 추가하여 학습을 돌릴것이다.

카페정보

인공지능로봇연구실

실버 (공개)
카페지기 Sungryul Le..
회원수 78
방문수33
카페앱수3

카페 전체 메뉴

▲

카페 게시글

과제게시판 [자율주행 AI 챌린지] test_dataset 분석 및 모델 훈련 보고서

이승현 추천 0 조회 184 25.09.22 23:12 댓글 1

게시글 본문내용

다음검색

저작자 표시 컨텐츠변경 비영리

Sungryul Lee
25.09.23 11:16

첫댓글 1. 모의 테스트 데이터셋의 사양은 자세히 적을것, 출처, 센서사양(모델,채널), 갯수, 통계그래프(포인트갯수분포)
2. 표에 있는 3개의 모델이 동일한 모의 테스트 데이터셋을 이용해서 평가한것인지?
3. 로스그래프의 가로축은 에퍽단위로 수정할것
4. 아직 훈련이 진행중이라는건지?

검색 옵션 선택상자

댓글내용선택됨 옵션 더 보기

댓글내용

댓글 작성자

최신목록

인공지능로봇연구실

https://cafe.daum.net/SmartRobot

카페 전체 메뉴