Enhance support for YOLOv8 pose estimation models, accommodating mult…

…iple object classes and varying numbers of keypoints per class
zhulei2016 · Jun 18, 2024 · 5e98e03 · 5e98e03
1 parent 2f43a8a
commit 5e98e03
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 85 deletions.
diff --git a/anylabeling/configs/auto_labeling/yolov6lite_l_face.yaml b/anylabeling/configs/auto_labeling/yolov6lite_l_face.yaml
@@ -5,10 +5,9 @@ model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/y
 nms_threshold: 0.45
 confidence_threshold: 0.4
 classes:
-  - face
-five_key_points_classes:
-  - left_eye
-  - right_eye
-  - nost_tip
-  - left_mouth_corner
-  - right_mouth_corner
+  face:
+    - left_eye
+    - right_eye
+    - nost_tip
+    - left_mouth_corner
+    - right_mouth_corner
diff --git a/anylabeling/configs/auto_labeling/yolov6lite_m_face.yaml b/anylabeling/configs/auto_labeling/yolov6lite_m_face.yaml
@@ -5,10 +5,9 @@ model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/y
 nms_threshold: 0.45
 confidence_threshold: 0.4
 classes:
-  - face
-five_key_points_classes:
-  - left_eye
-  - right_eye
-  - nost_tip
-  - left_mouth_corner
-  - right_mouth_corner
+  face:
+    - left_eye
+    - right_eye
+    - nost_tip
+    - left_mouth_corner
+    - right_mouth_corner
diff --git a/anylabeling/configs/auto_labeling/yolov6lite_s_face.yaml b/anylabeling/configs/auto_labeling/yolov6lite_s_face.yaml
@@ -3,12 +3,11 @@ name: yolov6lite_s_face-r20230520
 display_name: YOLOv6Lite_s-Face MeiTuan
 model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov6lite_s_face.onnx
 nms_threshold: 0.45
-confidence_threshold: 0.4
+confidence_threshold: 0.1
 classes:
-  - face
-five_key_points_classes:
-  - left_eye
-  - right_eye
-  - nost_tip
-  - left_mouth_corner
-  - right_mouth_corner
+  face:
+    - left_eye
+    - right_eye
+    - nost_tip
+    - left_mouth_corner
+    - right_mouth_corner
diff --git a/anylabeling/configs/auto_labeling/yolov8n_pose.yaml b/anylabeling/configs/auto_labeling/yolov8n_pose.yaml
@@ -4,24 +4,24 @@ display_name: YOLOv8n-Pose Ultralytics
 model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v1.0.0/yolov8n-pose.onnx
 confidence_threshold: 0.5
 nms_threshold: 0.6
-show_boxes: True
+kpt_threshold: 0.1
+has_visible: true
 classes:
-  - person
-keypoints:
-  - nose
-  - left_eye
-  - right_eye
-  - left_ear
-  - right_ear
-  - left_shoulder
-  - right_shoulder
-  - left_elbow
-  - right_elbow
-  - left_wrist
-  - right_wrist
-  - left_hip
-  - right_hip
-  - left_knee
-  - right_knee
-  - left_ankle
-  - right_ankle
+  person:
+    - nose
+    - left_eye
+    - right_eye
+    - left_ear
+    - right_ear
+    - left_shoulder
+    - right_shoulder
+    - left_elbow
+    - right_elbow
+    - left_wrist
+    - right_wrist
+    - left_hip
+    - right_hip
+    - left_knee
+    - right_knee
+    - left_ankle
+    - right_ankle
diff --git a/anylabeling/configs/auto_labeling/yolov8x_pose_p6.yaml b/anylabeling/configs/auto_labeling/yolov8x_pose_p6.yaml
@@ -4,24 +4,24 @@ display_name: YOLOv8x-Pose-P6 Ultralytics
 model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v1.0.0/yolov8x-pose-p6.onnx
 confidence_threshold: 0.25
 nms_threshold: 0.6
-show_boxes: True
+kpt_threshold: 0.1
+has_visible: true
 classes:
-  - person
-keypoints:
-  - nose
-  - left_eye
-  - right_eye
-  - left_ear
-  - right_ear
-  - left_shoulder
-  - right_shoulder
-  - left_elbow
-  - right_elbow
-  - left_wrist
-  - right_wrist
-  - left_hip
-  - right_hip
-  - left_knee
-  - right_knee
-  - left_ankle
-  - right_ankle
+  person:
+    - nose
+    - left_eye
+    - right_eye
+    - left_ear
+    - right_ear
+    - left_shoulder
+    - right_shoulder
+    - left_elbow
+    - right_elbow
+    - left_wrist
+    - right_wrist
+    - left_hip
+    - right_hip
+    - left_knee
+    - right_knee
+    - left_ankle
+    - right_ankle
diff --git a/anylabeling/services/auto_labeling/__base__/yolo.py b/anylabeling/services/auto_labeling/__base__/yolo.py
@@ -81,11 +81,9 @@ def __init__(self, model_config, on_message) -> None:
             if not isinstance(self.input_height, int):
                 self.input_height = self.config.get("input_height", -1)
 
-        self.model_type = self.config["type"]
         self.replace = True
+        self.model_type = self.config["type"]
         self.classes = self.config.get("classes", [])
-        if isinstance(self.classes, dict):
-            self.classes = list(self.classes.values())
         self.stride = self.config.get("stride", 32)
         self.anchors = self.config.get("anchors", None)
         self.agnostic = self.config.get("agnostic", False)
@@ -124,12 +122,6 @@ def __init__(self, model_config, on_message) -> None:
         else:
             self.tracker = None
 
-        """Keypoints"""
-        self.keypoints = self.config.get("keypoints", [])
-        self.five_key_points_classes = self.config.get(
-            "five_key_points_classes", []
-        )
-
         if self.model_type in [
             "yolov5",
             "yolov6",
@@ -152,6 +144,21 @@ def __init__(self, model_config, on_message) -> None:
             "yolov8_obb",
         ]:
             self.task = "obb"
+        elif self.model_type in [
+            "yolov6_face",
+            "yolov8_pose",
+        ]:
+            self.task = "pose"
+            self.keypoints = {}
+            self.has_visible = self.config.get("has_visible", True)
+            self.kpt_threshold = self.config.get("kpt_threshold", 0.1)
+            for class_name, keypoints in self.classes.items():
+                self.keypoints[class_name] = keypoints
+            self.classes = list(self.classes.keys())
+            self.multi_label = True if len(self.classes) > 1 else False
+
+        if isinstance(self.classes, dict):
+            self.classes = list(self.classes.values())
 
     def set_auto_labeling_conf(self, value):
         """ set auto labeling confidence threshold """

diff --git a/anylabeling/services/auto_labeling/utils/points_conversion.py b/anylabeling/services/auto_labeling/utils/points_conversion.py
@@ -319,7 +319,7 @@ def rescale_box_and_landmark(input_shape, boxes, lmdks, image_shape):
     return np.round(boxes), np.round(lmdks)
 
 
-def rescale_tlwh(input_shape, boxes, image_shape, kpts=False):
+def rescale_tlwh(input_shape, boxes, image_shape, kpts=False, has_visible=True, multi_label=False):
     """Rescale the output to the original image shape"""
     ratio = min(
         input_shape[0] / image_shape[0],
@@ -337,10 +337,15 @@ def rescale_tlwh(input_shape, boxes, image_shape, kpts=False):
     boxes[:, 2] = np.clip((boxes[:, 0] + boxes[:, 2]), 0, image_shape[1])  # x2
     boxes[:, 3] = np.clip((boxes[:, 1] + boxes[:, 3]), 0, image_shape[0])  # y2
     if kpts:
-        num_kpts = boxes.shape[1] // 3
-        for i in range(2, num_kpts + 1):
-            boxes[:, i * 3 - 1] = (boxes[:, i * 3 - 1] - padding[0]) / ratio
-            boxes[:, i * 3] = (boxes[:, i * 3] - padding[1]) / ratio
+        start_index = 6 if multi_label else 5
+        num_kpts = boxes.shape[1] - start_index
+        interval = 3 if has_visible else 2
+        for i in range(0, num_kpts, interval):
+            boxes[:, start_index + i] = (boxes[:, start_index + i] - padding[0]) / ratio
+            boxes[:, start_index + i + 1] = (boxes[:, start_index + i + 1] - padding[1]) / ratio
+            # skip visible flag
+            if has_visible and (i + 1) % interval == 0:
+                continue
     return boxes
 
 

diff --git a/anylabeling/services/auto_labeling/yolov6_face.py b/anylabeling/services/auto_labeling/yolov6_face.py
@@ -163,16 +163,19 @@ def predict_shapes(self, image, image_path=None):
             x1, y1, x2, y2 = list(map(int, xyxy))
             lmdks = list(map(int, lmdks))
             label = str(self.classes[int(cls_id)])
-            rectangle_shape = Shape(label=label, shape_type="rectangle")
+            rectangle_shape = Shape(
+                label=label, shape_type="rectangle", group_id=int(i)
+            )
             rectangle_shape.add_point(QtCore.QPointF(x1, y1))
             rectangle_shape.add_point(QtCore.QPointF(x2, y1))
             rectangle_shape.add_point(QtCore.QPointF(x2, y2))
             rectangle_shape.add_point(QtCore.QPointF(x1, y2))
             shapes.append(rectangle_shape)
+            kpt_names = self.keypoints[label]
             for j in range(0, len(lmdks), 2):
                 x, y = lmdks[j], lmdks[j + 1]
                 point_shape = Shape(
-                    label=self.five_key_points_classes[j // 2],
+                    label=kpt_names[j // 2],
                     shape_type="point",
                     group_id=int(i),
                 )

diff --git a/anylabeling/services/auto_labeling/yolov8_pose.py b/anylabeling/services/auto_labeling/yolov8_pose.py
@@ -61,15 +61,24 @@ def predict_shapes(self, image, image_path=None):
         if len(results) == 0:
             return AutoLabelingResult([], replace=True)
         results = rescale_tlwh(
-            self.input_shape, results, image.shape, kpts=True
+            self.input_shape, results,
+            image.shape, kpts=True,
+            has_visible=self.has_visible,
+            multi_label=self.multi_label
         )
 
         shapes = []
         for group_id, r in enumerate(reversed(results)):
-            xyxy, _, kpts = r[:4], r[4], r[5:]
+            if self.multi_label:
+                xyxy, score, class_ids, kpts = r[:4], r[4], r[5], r[6:]
+            else:
+                class_ids = 0
+                xyxy, score, kpts = r[:4], r[4], r[5:]
             xmin, ymin, xmax, ymax = xyxy
+            class_name = str(self.classes[int(class_ids)])
             rectangle_shape = Shape(
-                label=str(self.classes[0]),
+                label=class_name,
+                score=score,
                 shape_type="rectangle",
                 group_id=group_id,
             )
@@ -79,12 +88,18 @@ def predict_shapes(self, image, image_path=None):
             rectangle_shape.add_point(QtCore.QPointF(xmin, ymax))
             shapes.append(rectangle_shape)
 
-            interval = 3
+            interval = 3 if self.has_visible else 2
             for i in range(0, len(kpts), interval):
-                x, y, kpt_score = kpts[i : i + 3]
+                if self.has_visible:
+                    x, y, kpt_score = kpts[i : i + 3]
+                else:
+                    x, y =  kpts[i : i + 2]
+                    kpt_score = 1.0
+                if x == 0 and y == 0:
+                    continue
                 inside_flag = point_in_bbox((x, y), xyxy)
-                if (kpt_score > self.conf_thres) and inside_flag:
-                    label = self.keypoints[int(i // interval)]
+                if (kpt_score > self.kpt_threshold) and inside_flag:
+                    label = self.keypoints[class_name][int(i // interval)]
                     point_shape = Shape(
                         label=label, shape_type="point", group_id=group_id
                     )