roboflow · grzegorz-roboflow · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024
@@ -8,14 +8,16 @@
 
 from inference.core.managers.base import ModelManager
 from inference.core.registries.roboflow import RoboflowModelRegistry
+from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.transformations.dynamic_zones import (
     OUTPUT_KEY as DYNAMIC_ZONES_OUTPUT_KEY,
 )
 from inference.core.workflows.core_steps.transformations.dynamic_zones import (
     TYPE as DYNAMIC_ZONES_TYPE,
 )
 from inference.core.workflows.core_steps.transformations.perspective_correction import (
-    OUTPUT_KEY as PERSPECTIVE_CORRECTION_OUTPUT_KEY,
+    OUTPUT_DETECTIONS_KEY as PERSPECTIVE_CORRECTION_OUTPUT_DETECTIONS_KEY,
+    OUTPUT_IMAGE_KEY as PERSPECTIVE_CORRECTION_OUTPUT_IMAGE_KEY,
 )
 from inference.core.workflows.core_steps.transformations.perspective_correction import (
     TYPE as PERSPECTIVE_CORRECTION_TYPE,
@@ -68,6 +70,7 @@ def parse_args() -> argparse.Namespace:
         default="",
         action=MustBeValidSVPosition,
     )
+    parser.add_argument("--warp-image", required=False, action="store_true")
     return parser.parse_args()
 
 
@@ -89,6 +92,7 @@ def parse_args() -> argparse.Namespace:
                 "type": "InferenceParameter",
                 "name": "extend_perspective_polygon_by_detections_anchor",
             },
+            {"type": "InferenceParameter", "name": "warp_image"},
         ],
         "steps": [
             {
@@ -183,10 +187,12 @@ def parse_args() -> argparse.Namespace:
                 "type": PERSPECTIVE_CORRECTION_TYPE,
                 "name": "coordinates_transformer",
                 "predictions": "$steps.not_zone_class_filter.predictions",
+                "images": "$inputs.image",
                 "perspective_polygons": f"$steps.dynamic_zones.{DYNAMIC_ZONES_OUTPUT_KEY}",
                 "transformed_rect_width": "$inputs.transformed_rect_width",
                 "transformed_rect_height": "$inputs.transformed_rect_height",
                 "extend_perspective_polygon_by_detections_anchor": "$inputs.extend_perspective_polygon_by_detections_anchor",
+                "warp_image": "$inputs.warp_image",
             },
         ],
         "outputs": [
@@ -202,8 +208,13 @@ def parse_args() -> argparse.Namespace:
             },
             {
                 "type": "JsonField",
-                "name": PERSPECTIVE_CORRECTION_OUTPUT_KEY,
-                "selector": f"$steps.coordinates_transformer.{PERSPECTIVE_CORRECTION_OUTPUT_KEY}",
+                "name": PERSPECTIVE_CORRECTION_OUTPUT_DETECTIONS_KEY,
+                "selector": f"$steps.coordinates_transformer.{PERSPECTIVE_CORRECTION_OUTPUT_DETECTIONS_KEY}",
+            },
+            {
+                "type": "JsonField",
+                "name": PERSPECTIVE_CORRECTION_OUTPUT_IMAGE_KEY,
+                "selector": f"$steps.coordinates_transformer.{PERSPECTIVE_CORRECTION_OUTPUT_IMAGE_KEY}",
             },
         ],
     }
@@ -216,6 +227,7 @@ def parse_args() -> argparse.Namespace:
         init_parameters={
             "workflows_core.model_manager": model_manager,
             "workflows_core.api_key": os.getenv("ROBOFLOW_API_KEY"),
+            "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
         },
     )
 
@@ -233,24 +245,25 @@ def parse_args() -> argparse.Namespace:
             "transformed_rect_width": args.transformed_rect_width,
             "transformed_rect_height": args.transformed_rect_height,
             "extend_perspective_polygon_by_detections_anchor": args.extend_perspective_polygon_by_detections_anchor,
+            "warp_image": args.warp_image,
         }
     )
 
-    if not result.get(DYNAMIC_ZONES_OUTPUT_KEY):
+    if not result[0].get(DYNAMIC_ZONES_OUTPUT_KEY):
         print("Nothing detected, script terminated.")
         exit(0)
 
     img = cv.imread(args.source_path)
 
     polygon_annotator = sv.PolygonAnnotator(thickness=10)
-    detections = result.get("predictions")[0]
+    detections = result[0].get("predictions")
     original_polygons = polygon_annotator.annotate(
         scene=img.copy(),
         detections=detections[detections["class_name"] != args.zones_from_class_name],
     )
     cv.drawContours(
         original_polygons,
-        [np.array(result.get(DYNAMIC_ZONES_OUTPUT_KEY)[0][0], dtype=int)],
+        [np.array(result[0].get(DYNAMIC_ZONES_OUTPUT_KEY), dtype=int)],
         -1,
         (0, 0, 255),
         10,
@@ -261,8 +274,9 @@ def parse_args() -> argparse.Namespace:
     blank_image = np.ones(
         (args.transformed_rect_width, args.transformed_rect_height, 3), np.uint8
     )
-    h, w, _ = blank_image.shape
-    corrected_detections = result.get(PERSPECTIVE_CORRECTION_OUTPUT_KEY)[0]
+    corrected_detections = result[0].get(PERSPECTIVE_CORRECTION_OUTPUT_DETECTIONS_KEY)
+    if args.warp_image:
+        blank_image = result[0].get(PERSPECTIVE_CORRECTION_OUTPUT_IMAGE_KEY)
     annotated_frame = polygon_annotator.annotate(
         blank_image,
         detections=corrected_detections[

@@ -4,18 +4,26 @@
 import cv2 as cv
 import numpy as np
 import supervision as sv
-from pydantic import ConfigDict, Field
+from pydantic import AliasChoices, ConfigDict, Field
 
 from inference.core.logger import logger
 from inference.core.workflows.constants import KEYPOINTS_XY_KEY_IN_SV_DETECTIONS
-from inference.core.workflows.entities.base import Batch, OutputDefinition
+from inference.core.workflows.entities.base import (
+    Batch,
+    OutputDefinition,
+    WorkflowImageData,
+)
 from inference.core.workflows.entities.types import (
+    BATCH_OF_IMAGES_KIND,
     BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
     BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+    BOOLEAN_KIND,
     INTEGER_KIND,
     LIST_OF_VALUES_KIND,
     STRING_KIND,
+    StepOutputImageSelector,
     StepOutputSelector,
+    WorkflowImageSelector,
     WorkflowParameterSelector,
 )
 from inference.core.workflows.prototypes.block import (
@@ -24,7 +32,8 @@
     WorkflowBlockManifest,
 )
 
-OUTPUT_KEY: str = "corrected_coordinates"
+OUTPUT_DETECTIONS_KEY: str = "corrected_coordinates"
+OUTPUT_IMAGE_KEY: str = "warped_image"
 TYPE: str = "PerspectiveCorrection"
 SHORT_DESCRIPTION = (
     "Correct coordinates of detections from plane defined by given polygon "
@@ -57,6 +66,12 @@ class PerspectiveCorrectionManifest(WorkflowBlockManifest):
         description="Predictions",
         examples=["$steps.object_detection_model.predictions"],
     )
+    images: Union[WorkflowImageSelector, StepOutputImageSelector] = Field(
+        title="Image to Crop",
+        description="The input image for this step.",
+        examples=["$inputs.image", "$steps.cropping.crops"],
+        validation_alias=AliasChoices("images", "image"),
+    )
     perspective_polygons: Union[list, StepOutputSelector(kind=[LIST_OF_VALUES_KIND])] = Field(  # type: ignore
         description="Perspective polygons (for each batch at least one must be consisting of 4 vertices)",
     )
@@ -72,6 +87,10 @@ class PerspectiveCorrectionManifest(WorkflowBlockManifest):
         description=f"If set, perspective polygons will be extended to contain all bounding boxes. Allowed values: {', '.join(sv.Position.list())}",
         default="",
     )
+    warp_image: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = Field(  # type: ignore
+        description=f"If set to True, image will be warped into transformed rect",
+        default=False,
+    )
 
     @classmethod
     def accepts_batch_input(cls) -> bool:
@@ -81,12 +100,18 @@ def accepts_batch_input(cls) -> bool:
     def describe_outputs(cls) -> List[OutputDefinition]:
         return [
             OutputDefinition(
-                name=OUTPUT_KEY,
+                name=OUTPUT_DETECTIONS_KEY,
                 kind=[
                     BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
                     BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
                 ],
             ),
+            OutputDefinition(
+                name=OUTPUT_IMAGE_KEY,
+                kind=[
+                    BATCH_OF_IMAGES_KIND,
+                ],
+            ),
         ]
 
 
@@ -307,11 +332,13 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]:
 
     async def run(
         self,
+        images: Batch[WorkflowImageData],
         predictions: Batch[sv.Detections],
         perspective_polygons: Batch[List[np.ndarray]],
         transformed_rect_width: int,
         transformed_rect_height: int,
         extend_perspective_polygon_by_detections_anchor: Optional[str],
+        warp_image: Optional[bool],
     ) -> BlockResult:
         if not self.perspective_transformers:
             try:
@@ -337,15 +364,33 @@ async def run(
                 )
 
         result = []
-        for detections, perspective_transformer in zip(
-            predictions, self.perspective_transformers
+        for detections, perspective_transformer, image in zip(
+            predictions, self.perspective_transformers, images
         ):
+            result_image = image
+            if warp_image:
+                # https://docs.opencv.org/4.9.0/da/d54/group__imgproc__transform.html#gaf73673a7e8e18ec6963e3774e6a94b87
+                result_image = cv.warpPerspective(
+                    src=image.numpy_image,
+                    M=perspective_transformer,
+                    dsize=(transformed_rect_width, transformed_rect_height),
+                )
+
             if detections is None or perspective_transformer is None:
-                result.append({OUTPUT_KEY: None})
+                result.append(
+                    {OUTPUT_DETECTIONS_KEY: None, OUTPUT_IMAGE_KEY: result_image}
+                )
                 continue
+
             corrected_detections = correct_detections(
                 detections=detections,
                 perspective_transformer=perspective_transformer,
             )
-            result.append({OUTPUT_KEY: corrected_detections})
+
+            result.append(
+                {
+                    OUTPUT_DETECTIONS_KEY: corrected_detections,
+                    OUTPUT_IMAGE_KEY: result_image,
+                }
+            )
         return result