From fe1d3b08cca9b0c86aa20a759bae883ddc0defb7 Mon Sep 17 00:00:00 2001 From: Chandler Supple Date: Thu, 30 Jan 2025 10:50:11 -0800 Subject: [PATCH 1/2] Added auto mode --- .../stitch_ocr_detections/v1.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py b/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py index 6aaed08497..bd8bfc7779 100644 --- a/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py +++ b/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py @@ -50,6 +50,8 @@ * **"vertical_bottom_to_top"**: Vertical reading from bottom to top ⬆️ + * **"auto"**: Automatically detects the reading direction based on the spatial arrangement of text elements. + #### Why Use This Transformation? This is especially useful for: @@ -109,10 +111,11 @@ class BlockManifest(WorkflowBlockManifest): "right_to_left", "vertical_top_to_bottom", "vertical_bottom_to_top", + "auto", ] = Field( title="Reading Direction", description="The direction of the text in the image.", - examples=["right_to_left"], + examples=["right_to_left", "auto"], json_schema_extra={ "values_metadata": { "left_to_right": { @@ -131,6 +134,10 @@ class BlockManifest(WorkflowBlockManifest): "name": "Bottom To Top (Vertical)", "description": "Vertical reading from bottom to top", }, + "auto": { + "name": "Auto", + "description": "Automatically detect the reading direction based on text arrangement.", + }, } }, ) @@ -167,6 +174,23 @@ def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.0.0,<2.0.0" +def detect_reading_direction(detections: sv.Detections) -> str: + if len(detections) == 0: + return "left_to_right" + + xyxy = detections.xyxy + widths = xyxy[:, 2] - xyxy[:, 0] + heights = xyxy[:, 3] - xyxy[:, 1] + + avg_width = np.mean(widths) + avg_height = np.mean(heights) + + if avg_width > avg_height: + return "left_to_right" + else: + return "vertical_top_to_bottom" + + class StitchOCRDetectionsBlockV1(WorkflowBlock): @classmethod def get_manifest(cls) -> Type[WorkflowBlockManifest]: @@ -178,6 +202,8 @@ def run( reading_direction: str, tolerance: int, ) -> BlockResult: + if reading_direction == "auto": + reading_direction = detect_reading_direction(predictions[0]) return [ stitch_ocr_detections( detections=detections, From 4da3187e39bd01266f2ac6025ed320c8cc9ef0ee Mon Sep 17 00:00:00 2001 From: Chandler Supple Date: Thu, 30 Jan 2025 10:52:51 -0800 Subject: [PATCH 2/2] Removed example --- .../core_steps/transformations/stitch_ocr_detections/v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py b/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py index bd8bfc7779..8201f60418 100644 --- a/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py +++ b/inference/core/workflows/core_steps/transformations/stitch_ocr_detections/v1.py @@ -115,7 +115,7 @@ class BlockManifest(WorkflowBlockManifest): ] = Field( title="Reading Direction", description="The direction of the text in the image.", - examples=["right_to_left", "auto"], + examples=["right_to_left"], json_schema_extra={ "values_metadata": { "left_to_right": {