From a4024ff2c6529443b6ba701daae6592a4608e3d9 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 21 Nov 2024 13:39:00 +0000
Subject: [PATCH 01/15] add dataclasses

---
 mapreader/spot_text/dataclasses.py        |  16 +
 mapreader/spot_text/deepsolo_runner.py    |   4 +-
 mapreader/spot_text/dptext_detr_runner.py |  66 +---
 mapreader/spot_text/maptext_runner.py     |   4 +-
 mapreader/spot_text/rec_runner_base.py    | 346 -----------------
 mapreader/spot_text/runner_base.py        | 438 ++++++++++++++++++++--
 6 files changed, 426 insertions(+), 448 deletions(-)
 create mode 100644 mapreader/spot_text/dataclasses.py
 delete mode 100644 mapreader/spot_text/rec_runner_base.py

diff --git a/mapreader/spot_text/dataclasses.py b/mapreader/spot_text/dataclasses.py
new file mode 100644
index 00000000..4b155f40
--- /dev/null
+++ b/mapreader/spot_text/dataclasses.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from shapely.geometry import Polygon
+
+# Detection only
+
+
+@dataclass
+class Prediction:
+    geometry: Polygon
+    score: float
+    text: str = None
+    patch_id: str | None = None
+    crs: str | None = None
diff --git a/mapreader/spot_text/deepsolo_runner.py b/mapreader/spot_text/deepsolo_runner.py
index ffa61cbc..e4ac9e35 100644
--- a/mapreader/spot_text/deepsolo_runner.py
+++ b/mapreader/spot_text/deepsolo_runner.py
@@ -20,10 +20,10 @@
 import torch
 from deepsolo.config import get_cfg
 
-from .rec_runner_base import RecRunner
+from .runner_base import DetRecRunner
 
 
-class DeepSoloRunner(RecRunner):
+class DeepSoloRunner(DetRecRunner):
     def __init__(
         self,
         patch_df: pd.DataFrame | gpd.GeoDataFrame | str | pathlib.Path,
diff --git a/mapreader/spot_text/dptext_detr_runner.py b/mapreader/spot_text/dptext_detr_runner.py
index d5badcd7..9b17f1c5 100644
--- a/mapreader/spot_text/dptext_detr_runner.py
+++ b/mapreader/spot_text/dptext_detr_runner.py
@@ -21,10 +21,11 @@
 from dptext_detr.config import get_cfg
 from shapely import MultiPolygon, Polygon
 
-from .runner_base import Runner
+from .dataclasses import Prediction
+from .runner_base import DetRunner
 
 
-class DPTextDETRRunner(Runner):
+class DPTextDETRRunner(DetRunner):
     def __init__(
         self,
         patch_df: pd.DataFrame | gpd.GeoDataFrame | str | pathlib.Path,
@@ -71,7 +72,7 @@ def __init__(
         # setup the predictor
         self.predictor = DefaultPredictor(cfg)
 
-    def get_patch_predictions(
+    def _get_patch_predictions(
         self,
         outputs: dict,
         return_dataframe: bool = False,
@@ -107,7 +108,7 @@ def get_patch_predictions(
         self._deduplicate(image_id, min_ioa=min_ioa)
 
         if return_dataframe:
-            return self._dict_to_dataframe(self.patch_predictions, geo=False)
+            return self._dict_to_dataframe(self.patch_predictions)
         return self.patch_predictions
 
     def _post_process(self, image_id, scores, pred_classes, bd_pnts):
@@ -122,59 +123,6 @@ def _post_process(self, image_id, scores, pred_classes, bd_pnts):
 
             score = f"{score:.2f}"
 
-            self.patch_predictions[image_id].append([polygon, score])
-
-    @staticmethod
-    def _dict_to_dataframe(
-        preds: dict,
-        geo: bool = False,
-        parent: bool = False,
-    ) -> pd.DataFrame:
-        """Convert the predictions dictionary to a pandas DataFrame.
-
-        Parameters
-        ----------
-        preds : dict
-            A dictionary of predictions.
-        geo : bool, optional
-            Whether the dictionary is georeferenced coords (or pixel bounds), by default True
-        parent : bool, optional
-            Whether the dictionary is at the parent level, by default False
-
-        Returns
-        -------
-        pd.DataFrame
-            A pandas DataFrame containing the predictions.
-        """
-        if geo:
-            columns = ["geometry", "crs", "score"]
-        else:
-            columns = ["geometry", "score"]
-
-        if parent:
-            columns.append("patch_id")
-
-        preds_df = pd.concat(
-            pd.DataFrame(
-                preds[k],
-                index=np.full(len(preds[k]), k),
-                columns=columns,
+            self.patch_predictions[image_id].append(
+                Prediction(geometry=polygon, score=score)
             )
-            for k in preds.keys()
-        )
-
-        if geo:
-            # get the crs (should be the same for all)
-            if not preds_df["crs"].nunique() == 1:
-                raise ValueError("[ERROR] Multiple crs found in the predictions.")
-            crs = preds_df["crs"].unique()[0]
-
-            preds_df = gpd.GeoDataFrame(
-                preds_df,
-                geometry="geometry",
-                crs=crs,
-            )
-
-        preds_df.index.name = "image_id"
-        preds_df.reset_index(inplace=True)
-        return preds_df
diff --git a/mapreader/spot_text/maptext_runner.py b/mapreader/spot_text/maptext_runner.py
index 0291496b..b27a67dd 100644
--- a/mapreader/spot_text/maptext_runner.py
+++ b/mapreader/spot_text/maptext_runner.py
@@ -20,10 +20,10 @@
 import torch
 from maptextpipeline.config import get_cfg
 
-from .rec_runner_base import RecRunner
+from .runner_base import DetRecRunner
 
 
-class MapTextRunner(RecRunner):
+class MapTextRunner(DetRecRunner):
     def __init__(
         self,
         patch_df: pd.DataFrame | gpd.GeoDataFrame | str | pathlib.Path,
diff --git a/mapreader/spot_text/rec_runner_base.py b/mapreader/spot_text/rec_runner_base.py
deleted file mode 100644
index 8078cd9e..00000000
--- a/mapreader/spot_text/rec_runner_base.py
+++ /dev/null
@@ -1,346 +0,0 @@
-from __future__ import annotations
-
-import pathlib
-import re
-
-import geopandas as gpd
-import matplotlib.patches as patches
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import xyzservices as xyz
-from PIL import Image
-from shapely import LineString, MultiPolygon, Polygon
-
-from .runner_base import Runner
-
-
-class RecRunner(Runner):
-    def get_patch_predictions(
-        self,
-        outputs: dict,
-        return_dataframe: bool = False,
-        min_ioa: float = 0.7,
-    ) -> dict | pd.DataFrame:
-        """Post process the model outputs to get patch predictions.
-
-        Parameters
-        ----------
-        outputs : dict
-            The outputs from the model.
-        return_dataframe : bool, optional
-            Whether to return the predictions as a pandas DataFrame, by default False
-        min_ioa : float, optional
-            The minimum intersection over area to consider two polygons the same, by default 0.7
-
-        Returns
-        -------
-        dict or pd.DataFrame
-            A dictionary containing the patch predictions or a DataFrame if `as_dataframe` is True.
-        """
-        # key for predictions
-        image_id = outputs["image_id"]
-        self.patch_predictions[image_id] = []
-
-        # get instances
-        instances = outputs["instances"].to("cpu")
-        ctrl_pnts = instances.ctrl_points.numpy()
-        scores = instances.scores.tolist()
-        recs = instances.recs
-        bd_pts = np.asarray(instances.bd)
-
-        self._post_process(image_id, ctrl_pnts, scores, recs, bd_pts)
-        self._deduplicate(image_id, min_ioa=min_ioa)
-
-        if return_dataframe:
-            return self._dict_to_dataframe(self.patch_predictions, geo=False)
-        return self.patch_predictions
-
-    def _process_ctrl_pnt(self, pnt):
-        points = pnt.reshape(-1, 2)
-        return points
-
-    def _post_process(self, image_id, ctrl_pnts, scores, recs, bd_pnts, alpha=0.4):
-        for ctrl_pnt, score, rec, bd in zip(ctrl_pnts, scores, recs, bd_pnts):
-            # draw polygons
-            if bd is not None:
-                bd = np.hsplit(bd, 2)
-                bd = np.vstack([bd[0], bd[1][::-1]])
-                polygon = Polygon(bd).buffer(0)
-
-                if isinstance(polygon, MultiPolygon):
-                    polygon = polygon.convex_hull
-
-            # draw center lines
-            line = self._process_ctrl_pnt(ctrl_pnt)
-            line = LineString(line)
-
-            # draw text
-            text = self._ctc_decode_recognition(rec)
-            if self.voc_size == 37:
-                text = text.upper()
-            # text = "{:.2f}: {}".format(score, text)
-            text = f"{text}"
-            score = f"{score:.2f}"
-
-            self.patch_predictions[image_id].append([polygon, text, score])
-
-    @staticmethod
-    def _dict_to_dataframe(
-        preds: dict,
-        geo: bool = False,
-        parent: bool = False,
-    ) -> pd.DataFrame:
-        """Convert the predictions dictionary to a pandas DataFrame.
-
-        Parameters
-        ----------
-        preds : dict
-            A dictionary of predictions.
-        geo : bool, optional
-            Whether the dictionary is georeferenced coords (or pixel bounds), by default True
-        parent : bool, optional
-            Whether the dictionary is at parent level, by default False
-
-        Returns
-        -------
-        pd.DataFrame
-            A pandas DataFrame containing the predictions.
-        """
-        if geo:
-            columns = ["geometry", "crs", "text", "score"]
-        else:
-            columns = ["geometry", "text", "score"]
-
-        if parent:
-            columns.append("patch_id")
-
-        if len(preds.keys()):
-            preds_df = pd.concat(
-                pd.DataFrame(
-                    preds[k],
-                    index=np.full(len(preds[k]), k),
-                    columns=columns,
-                )
-                for k in preds.keys()
-            )
-        else:
-            preds_df = pd.DataFrame(columns=columns)  # empty dataframe
-
-        if geo:
-            # get the crs (should be the same for all)
-            if not preds_df["crs"].nunique() == 1:
-                raise ValueError("[ERROR] Multiple crs found in the predictions.")
-            crs = preds_df["crs"].unique()[0]
-
-            preds_df = gpd.GeoDataFrame(
-                preds_df,
-                geometry="geometry",
-                crs=crs,
-            )
-
-        preds_df.index.name = "image_id"
-        preds_df.reset_index(inplace=True)  # reset index to get image_id as a column
-        return preds_df
-
-    def search_preds(
-        self, search_text: str, ignore_case: bool = True, return_dataframe: bool = False
-    ) -> dict | pd.DataFrame:
-        """Search the predictions for specific text. Accepts regex.
-
-        Parameters
-        ----------
-        search_text : str
-            The text to search for. Can be a regex pattern.
-        ignore_case : bool, optional
-            Whether to ignore case when searching, by default True.
-        return_dataframe : bool, optional
-            Whether to return the results as a pandas DataFrame, by default False.
-
-        Returns
-        -------
-        dict | pd.DataFrame
-            A dictionary containing the search results or a DataFrame if `return_dataframe` is True.
-
-        Raises
-        ------
-        ValueError
-            If no parent predictions are found.
-        """
-        # reset the search results
-        self.search_results = {}
-
-        # whether to ignore case
-        kwargs = {"flags": re.IGNORECASE} if ignore_case else {}
-
-        if self.parent_predictions == {}:
-            raise ValueError(
-                "[ERROR] No parent predictions found. You may need to run `convert_to_parent_pixel_bounds()`."
-            )
-
-        for image_id, preds in self.parent_predictions.items():
-            for instance in preds:
-                # ["geometry", "text", "score"]
-                if re.search(search_text, instance[1], **kwargs):
-                    if image_id in self.search_results:
-                        self.search_results[image_id].append(instance)
-                    else:
-                        self.search_results[image_id] = [instance]
-
-        if return_dataframe:
-            return self._dict_to_dataframe(self.search_results, parent=True)
-        return self.search_results
-
-    def show_search_results(
-        self,
-        parent_id: str,
-        figsize: tuple | None = (10, 10),
-        border_color: str | None = "r",
-        text_color: str | None = "b",
-    ) -> None:
-        """Show the search results on an image.
-
-        Parameters
-        ----------
-        parent_id : str
-            The image ID to show the predictions on (must be parent level).
-        figsize : tuple | None, optional
-            The size of the figure, by default (10, 10)
-        border_color : str | None, optional
-            The color of the border of the polygons, by default "r"
-        text_color : str | None, optional
-            The color of the text, by default "b".
-
-        Raises
-        ------
-        ValueError
-            If the image ID is not found in the patch or parent predictions.
-        """
-        if parent_id in self.parent_predictions.keys():
-            image_path = self.parent_df.loc[parent_id, "image_path"]
-        else:
-            raise ValueError(f"[ERROR] {parent_id} not found in parent predictions.")
-
-        img = Image.open(image_path)
-
-        fig, ax = plt.subplots(figsize=figsize)
-        ax.axis("off")
-
-        # check if grayscale
-        if len(img.getbands()) == 1:
-            ax.imshow(img, cmap="gray", vmin=0, vmax=255, zorder=1)
-        else:
-            ax.imshow(img, zorder=1)
-        ax.set_title(parent_id)
-
-        preds = self.search_results
-
-        for instance in preds[parent_id]:
-            # Instance is:
-            # - [geometry, text, score] for det/rec
-            polygon = np.array(instance[0].exterior.coords.xy)
-            center = instance[0].centroid.coords.xy
-            patch = patches.Polygon(polygon.T, edgecolor=border_color, facecolor="none")
-            ax.add_patch(patch)
-            ax.text(
-                center[0][0], center[1][0], instance[1], fontsize=8, color=text_color
-            )
-
-        fig.show()
-
-    def _get_geo_search_results(self):
-        """Convert search results to georeferenced search results.
-
-        Returns
-        -------
-        dict
-            Dictionary containing georeferenced search results.
-        """
-        self.check_georeferencing()
-        if not self.georeferenced:
-            raise ValueError(
-                "[ERROR] Cannot convert to coordinates as parent_df does not have 'coordinates' column."
-            )
-
-        geo_search_results = {}
-
-        for parent_id, prediction in self.search_results.items():
-            if parent_id not in geo_search_results.keys():
-                geo_search_results[parent_id] = []
-
-                for instance in prediction:
-                    polygon = instance[0]
-
-                    xx, yy = (np.array(i) for i in polygon.exterior.xy)
-                    xx = (
-                        xx * self.parent_df.loc[parent_id, "dlon"]
-                        + self.parent_df.loc[parent_id, "coordinates"][0]
-                    )
-                    yy = (
-                        self.parent_df.loc[parent_id, "coordinates"][3]
-                        - yy * self.parent_df.loc[parent_id, "dlat"]
-                    )
-
-                    crs = self.parent_df.loc[parent_id, "crs"]
-
-                    parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
-                    geo_search_results[parent_id].append(
-                        [parent_polygon_geo, crs, *instance[1:]]
-                    )
-
-        return geo_search_results
-
-    def explore_search_results(
-        self,
-        parent_id: str,
-        xyz_url: str | None = None,
-        style_kwargs: dict | None = None,
-    ):
-        self.check_georeferencing()
-        if not self.georeferenced:
-            raise ValueError(
-                "[ERROR] This method only works for georeferenced results. Please ensure parent_df has 'coordinates' column and run `convert_to_coords` first."
-            )
-
-        if parent_id not in self.geo_predictions.keys():
-            raise ValueError(f"[ERROR] {parent_id} not found in geo predictions.")
-
-        if style_kwargs is None:
-            style_kwargs = {"fillOpacity": 0.2}
-
-        if xyz_url:
-            tiles = xyz.TileProvider(name=xyz_url, url=xyz_url, attribution=xyz_url)
-        else:
-            tiles = xyz.providers.OpenStreetMap.Mapnik
-
-        geo_search_results = self._get_geo_search_results()
-        geo_df = self._dict_to_dataframe(geo_search_results, geo=True, parent=True)
-
-        return geo_df[geo_df["image_id"] == parent_id].explore(
-            tiles=tiles,
-            style_kwds=style_kwargs,
-        )
-
-    def save_search_results_to_geojson(
-        self,
-        save_path: str | pathlib.Path,
-    ) -> None:
-        """Convert the search results to georeferenced search results and save them to a GeoJSON file.
-
-        Parameters
-        ----------
-        save_path : str | pathlib.Path
-            The path to save the GeoJSON file.
-
-        Raises
-        ------
-        ValueError
-            If no search results are found.
-        """
-        if self.search_results == {}:
-            raise ValueError("[ERROR] No results to save!")
-
-        geo_search_results = self._get_geo_search_results()
-
-        geo_df = self._dict_to_dataframe(geo_search_results, geo=True, parent=True)
-        geo_df.to_file(save_path, driver="GeoJSON", engine="pyogrio")
diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 24d94e03..7ca8ff91 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -12,16 +12,18 @@
 import pandas as pd
 import xyzservices as xyz
 from PIL import Image
-from shapely import Polygon
+from shapely import LineString, MultiPolygon, Polygon
 from tqdm.auto import tqdm
 
 from mapreader import MapImages
 from mapreader.utils.load_frames import load_from_csv, load_from_geojson
 
+from .dataclasses import Prediction
 
-class Runner:
+
+class DetRunner:
     def __init__() -> None:
-        """Initialise the Runner class."""
+        """Initialise the DetRunner class."""
         # empty in the base class
 
     def _load_df(
@@ -110,6 +112,52 @@ def _add_coord_increments(self):
         parent_df, _ = maps.convert_images()
         self.parent_df = parent_df
 
+    @staticmethod
+    def _dict_to_dataframe(
+        preds: dict,
+    ) -> pd.DataFrame:
+        """Convert the predictions dictionary to a pandas DataFrame.
+
+        Parameters
+        ----------
+        preds : dict
+            A dictionary of predictions.
+
+        Returns
+        -------
+        pd.DataFrame
+            A pandas DataFrame containing the predictions.
+        """
+
+        if len(preds):
+            preds_df = pd.concat(
+                pd.DataFrame(
+                    preds[k],
+                    index=np.full(len(preds[k]), k),
+                )
+                for k in preds.keys()
+            )
+            # drop empty cols
+            preds_df.dropna(inplace=True, axis=1)
+
+            if "crs" in preds_df.columns:
+                # get the crs (should be the same for all)
+                if not preds_df["crs"].nunique() == 1:
+                    raise ValueError("[ERROR] Multiple crs found in the predictions.")
+                crs = preds_df["crs"].unique()[0]
+
+                preds_df = gpd.GeoDataFrame(
+                    preds_df,
+                    geometry="geometry",
+                    crs=crs,
+                )
+        else:
+            preds_df = pd.DataFrame()  # empty dataframe
+
+        preds_df.index.name = "image_id"
+        preds_df.reset_index(inplace=True)  # reset index to get image_id as a column
+        return preds_df
+
     def run_all(
         self,
         return_dataframe: bool = False,
@@ -166,9 +214,7 @@ def run_on_images(
             _ = self.run_on_image(img_path, return_outputs=False, min_ioa=min_ioa)
 
         if return_dataframe:
-            return self._dict_to_dataframe(
-                self.patch_predictions, geo=False, parent=False
-            )
+            return self._dict_to_dataframe(self.patch_predictions)
         return self.patch_predictions
 
     def run_on_image(
@@ -208,16 +254,13 @@ def run_on_image(
         if return_outputs:
             return outputs
 
-        self.get_patch_predictions(outputs, min_ioa=min_ioa)
-
-        if return_dataframe:
-            return self._dict_to_dataframe(
-                self.patch_predictions, geo=False, parent=False
-            )
-        return self.patch_predictions
+        patch_predictions = self._get_patch_predictions(
+            outputs, return_dataframe=return_dataframe, min_ioa=min_ioa
+        )
+        return patch_predictions
 
     def _deduplicate(self, image_id, min_ioa=0.7):
-        polygons = [instance[0] for instance in self.patch_predictions[image_id]]
+        polygons = [instance.geometry for instance in self.patch_predictions[image_id]]
 
         def calc_ioa(polygons, i, j):
             return polygons[i].intersection(polygons[j]).area / polygons[i].area
@@ -272,6 +315,7 @@ def convert_to_parent_pixel_bounds(
         dict or pd.DataFrame
             A dictionary of predictions for each parent image or a DataFrame if `return_dataframe` is True.
         """
+        self.parent_predictions = {}  # reset parent predictions
 
         for image_id, prediction in self.patch_predictions.items():
             parent_id = self.patch_df.loc[image_id, "parent_id"]
@@ -279,7 +323,7 @@ def convert_to_parent_pixel_bounds(
                 self.parent_predictions[parent_id] = []
 
             for instance in prediction:
-                polygon = instance[0]
+                polygon = instance.geometry
 
                 xx, yy = (np.array(i) for i in polygon.exterior.xy)
                 xx = xx + self.patch_df.loc[image_id, "pixel_bounds"][0]  # add min_x
@@ -287,7 +331,12 @@ def convert_to_parent_pixel_bounds(
 
                 parent_polygon = Polygon(zip(xx, yy)).buffer(0)
                 self.parent_predictions[parent_id].append(
-                    [parent_polygon, *instance[1:], image_id]
+                    Prediction(
+                        geometry=parent_polygon,
+                        score=instance.score,
+                        text=instance.text,
+                        patch_id=image_id,
+                    )
                 )
 
         if deduplicate:
@@ -295,16 +344,12 @@ def convert_to_parent_pixel_bounds(
                 self._deduplicate_parent_level(parent_id, min_ioa=min_ioa)
 
         if return_dataframe:
-            return self._dict_to_dataframe(
-                self.parent_predictions, geo=False, parent=True
-            )
+            return self._dict_to_dataframe(self.parent_predictions)
         return self.parent_predictions
 
     def _deduplicate_parent_level(self, image_id, min_ioa=0.7):
         # get parent predictions for selected parent image
-        parent_preds = np.array(self.parent_predictions[image_id])
-
-        all_patches = parent_preds[:, -1]
+        all_patches = [pred.patch_id for pred in self.parent_predictions[image_id]]
         patches = np.unique(all_patches).tolist()
 
         for patch_i, patch_j in combinations(patches, 2):
@@ -322,11 +367,14 @@ def _deduplicate_parent_level(self, image_id, min_ioa=0.7):
 
                 # get polygons that overlap with the patch intersection
                 polygons = []
-                for i, pred in enumerate(parent_preds):
-                    if pred[-1] in [patch_i, patch_j] and pred[0].intersects(
-                        intersection
-                    ):
-                        polygons.append([i, pred[0]])
+                for i, pred in enumerate(np.array(self.parent_predictions[image_id])):
+                    if pred is None:
+                        continue
+                    elif pred.patch_id in [
+                        patch_i,
+                        patch_j,
+                    ] and pred.geometry.intersects(intersection):
+                        polygons.append([i, pred.geometry])
 
                 def calc_ioa(polygons, i, j):
                     return (
@@ -389,12 +437,14 @@ def convert_to_coords(
             print("[INFO] Converting patch pixel bounds to parent pixel bounds.")
             _ = self.convert_to_parent_pixel_bounds()
 
+        self.geo_predictions = {}  # reset geo predictions
+
         for parent_id, prediction in self.parent_predictions.items():
             if parent_id not in self.geo_predictions.keys():
                 self.geo_predictions[parent_id] = []
 
                 for instance in prediction:
-                    polygon = instance[0]
+                    polygon = instance.geometry
 
                     xx, yy = (np.array(i) for i in polygon.exterior.xy)
                     xx = (
@@ -410,16 +460,23 @@ def convert_to_coords(
 
                     parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
                     self.geo_predictions[parent_id].append(
-                        [parent_polygon_geo, crs, *instance[1:]]
+                        Prediction(
+                            geometry=parent_polygon_geo,
+                            score=instance.score,
+                            text=instance.text,
+                            patch_id=instance.patch_id,
+                            crs=crs,
+                        )
                     )
 
         if return_dataframe:
-            return self._dict_to_dataframe(self.geo_predictions, geo=True, parent=True)
+            return self._dict_to_dataframe(self.geo_predictions)
         return self.geo_predictions
 
     def save_to_geojson(
         self,
         save_path: str | pathlib.Path,
+        centroid: bool = False,
     ) -> None:
         """Save the georeferenced predictions to a GeoJSON file.
 
@@ -427,15 +484,26 @@ def save_to_geojson(
         ----------
         save_path : str | pathlib.Path, optional
             Path to save the GeoJSON file
+        centroid : bool, optional
+            Whether to save the centroid of the polygons as the geometry column, by default False.
+            Note: The original polygon will stil be saved as a separate column.
         """
         if self.geo_predictions == {}:
             raise ValueError(
                 "[ERROR] No georeferenced predictions found. Run `convert_to_coords` first."
             )
 
-        geo_df = self._dict_to_dataframe(self.geo_predictions, geo=True, parent=True)
+        geo_df = self._dict_to_dataframe(self.geo_predictions)
+
+        if centroid:
+            geo_df["polygon"] = geo_df["geometry"].to_wkt()
+            geo_df["geometry"] = geo_df["geometry"].apply(self._polygon_to_centroid)
+
         geo_df.to_file(save_path, driver="GeoJSON", engine="pyogrio")
 
+    def _polygon_to_centroid(self, polygon):
+        return polygon.centroid
+
     def show_predictions(
         self,
         image_id: str,
@@ -488,17 +556,17 @@ def show_predictions(
         ax.set_title(image_id)
 
         for instance in preds[image_id]:
-            # Instance is:
-            # - [geometry, text, score] for det/rec
-            # - [geometry, score] for det only
-            polygon = np.array(instance[0].exterior.coords.xy)
-            center = instance[0].centroid.coords.xy
+            polygon = np.array(instance.geometry.exterior.coords.xy)
+            center = instance.geometry.centroid.coords.xy
             patch = patches.Polygon(polygon.T, edgecolor=border_color, facecolor="none")
             ax.add_patch(patch)
             ax.text(
-                center[0][0], center[1][0], instance[1], fontsize=8, color=text_color
+                x=center[0][0],
+                y=center[1][0],
+                s=instance.text if instance.text is not None else instance.score,
+                fontsize=8,
+                color=text_color,
             )
-
         fig.show()
 
     def explore_predictions(
@@ -526,9 +594,301 @@ def explore_predictions(
         else:
             tiles = xyz.providers.OpenStreetMap.Mapnik
 
-        preds_df = self._dict_to_dataframe(self.geo_predictions, geo=True, parent=True)
+        preds_df = self._dict_to_dataframe(self.geo_predictions)
 
         return preds_df[preds_df["image_id"] == parent_id].explore(
             tiles=tiles,
             style_kwds=style_kwargs,
         )
+
+
+class DetRecRunner(DetRunner):
+    def _get_patch_predictions(
+        self,
+        outputs: dict,
+        return_dataframe: bool = False,
+        min_ioa: float = 0.7,
+    ) -> dict | pd.DataFrame:
+        """Post process the model outputs to get patch predictions.
+
+        Parameters
+        ----------
+        outputs : dict
+            The outputs from the model.
+        return_dataframe : bool, optional
+            Whether to return the predictions as a pandas DataFrame, by default False
+        min_ioa : float, optional
+            The minimum intersection over area to consider two polygons the same, by default 0.7
+
+        Returns
+        -------
+        dict or pd.DataFrame
+            A dictionary containing the patch predictions or a DataFrame if `as_dataframe` is True.
+        """
+        # key for predictions
+        image_id = outputs["image_id"]
+        self.patch_predictions[image_id] = []
+
+        # get instances
+        instances = outputs["instances"].to("cpu")
+        ctrl_pnts = instances.ctrl_points.numpy()
+        scores = instances.scores.tolist()
+        recs = instances.recs
+        bd_pts = np.asarray(instances.bd)
+
+        self._post_process(image_id, ctrl_pnts, scores, recs, bd_pts)
+        self._deduplicate(image_id, min_ioa=min_ioa)
+
+        if return_dataframe:
+            return self._dict_to_dataframe(self.patch_predictions)
+        return self.patch_predictions
+
+    def _process_ctrl_pnt(self, pnt):
+        points = pnt.reshape(-1, 2)
+        return points
+
+    def _post_process(self, image_id, ctrl_pnts, scores, recs, bd_pnts):
+        for ctrl_pnt, score, rec, bd in zip(ctrl_pnts, scores, recs, bd_pnts):
+            # draw polygons
+            if bd is not None:
+                bd = np.hsplit(bd, 2)
+                bd = np.vstack([bd[0], bd[1][::-1]])
+                polygon = Polygon(bd).buffer(0)
+
+                if isinstance(polygon, MultiPolygon):
+                    polygon = polygon.convex_hull
+
+            # draw center lines
+            line = self._process_ctrl_pnt(ctrl_pnt)
+            line = LineString(line)
+
+            # draw text
+            text = self._ctc_decode_recognition(rec)
+            if self.voc_size == 37:
+                text = text.upper()
+            # text = "{:.2f}: {}".format(score, text)
+            text = f"{text}"
+            score = f"{score:.2f}"
+
+            self.patch_predictions[image_id].append(
+                Prediction(geometry=polygon, score=score, text=text)
+            )
+
+    def search_preds(
+        self, search_text: str, ignore_case: bool = True, return_dataframe: bool = False
+    ) -> dict | pd.DataFrame:
+        """Search the predictions for specific text. Accepts regex.
+
+        Parameters
+        ----------
+        search_text : str
+            The text to search for. Can be a regex pattern.
+        ignore_case : bool, optional
+            Whether to ignore case when searching, by default True.
+        return_dataframe : bool, optional
+            Whether to return the results as a pandas DataFrame, by default False.
+
+        Returns
+        -------
+        dict | pd.DataFrame
+            A dictionary containing the search results or a DataFrame if `return_dataframe` is True.
+
+        Raises
+        ------
+        ValueError
+            If no parent predictions are found.
+        """
+        # reset the search results
+        self.search_results = {}
+
+        # whether to ignore case
+        kwargs = {"flags": re.IGNORECASE} if ignore_case else {}
+
+        if self.parent_predictions == {}:
+            raise ValueError(
+                "[ERROR] No parent predictions found. You may need to run `convert_to_parent_pixel_bounds()`."
+            )
+
+        for image_id, preds in self.parent_predictions.items():
+            for instance in preds:
+                if re.search(search_text, instance.text, **kwargs):
+                    if image_id in self.search_results:
+                        self.search_results[image_id].append(instance)
+                    else:
+                        self.search_results[image_id] = [instance]
+
+        if return_dataframe:
+            return self._dict_to_dataframe(self.search_results)
+        return self.search_results
+
+    def show_search_results(
+        self,
+        parent_id: str,
+        figsize: tuple | None = (10, 10),
+        border_color: str | None = "r",
+        text_color: str | None = "b",
+    ) -> None:
+        """Show the search results on an image.
+
+        Parameters
+        ----------
+        parent_id : str
+            The image ID to show the predictions on (must be parent level).
+        figsize : tuple | None, optional
+            The size of the figure, by default (10, 10)
+        border_color : str | None, optional
+            The color of the border of the polygons, by default "r"
+        text_color : str | None, optional
+            The color of the text, by default "b".
+
+        Raises
+        ------
+        ValueError
+            If the image ID is not found in the patch or parent predictions.
+        """
+        if parent_id in self.parent_predictions.keys():
+            image_path = self.parent_df.loc[parent_id, "image_path"]
+        else:
+            raise ValueError(f"[ERROR] {parent_id} not found in parent predictions.")
+
+        img = Image.open(image_path)
+
+        fig, ax = plt.subplots(figsize=figsize)
+        ax.axis("off")
+
+        # check if grayscale
+        if len(img.getbands()) == 1:
+            ax.imshow(img, cmap="gray", vmin=0, vmax=255, zorder=1)
+        else:
+            ax.imshow(img, zorder=1)
+        ax.set_title(parent_id)
+
+        preds = self.search_results
+
+        for instance in preds[parent_id]:
+            # Instance is:
+            # - [geometry, text, score] for det/rec
+            polygon = np.array(instance.geometry.exterior.coords.xy)
+            center = instance.geometry.centroid.coords.xy
+            patch = patches.Polygon(polygon.T, edgecolor=border_color, facecolor="none")
+            ax.add_patch(patch)
+            ax.text(
+                x=center[0][0],
+                y=center[1][0],
+                s=instance.text,
+                fontsize=8,
+                color=text_color,
+            )
+
+        fig.show()
+
+    def _get_geo_search_results(self):
+        """Convert search results to georeferenced search results.
+
+        Returns
+        -------
+        dict
+            Dictionary containing georeferenced search results.
+        """
+        self.check_georeferencing()
+        if not self.georeferenced:
+            raise ValueError(
+                "[ERROR] Cannot convert to coordinates as parent_df does not have 'coordinates' column."
+            )
+
+        geo_search_results = {}
+
+        for parent_id, prediction in self.search_results.items():
+            if parent_id not in geo_search_results.keys():
+                geo_search_results[parent_id] = []
+
+                for instance in prediction:
+                    polygon = instance.geometry
+
+                    xx, yy = (np.array(i) for i in polygon.exterior.xy)
+                    xx = (
+                        xx * self.parent_df.loc[parent_id, "dlon"]
+                        + self.parent_df.loc[parent_id, "coordinates"][0]
+                    )
+                    yy = (
+                        self.parent_df.loc[parent_id, "coordinates"][3]
+                        - yy * self.parent_df.loc[parent_id, "dlat"]
+                    )
+
+                    crs = self.parent_df.loc[parent_id, "crs"]
+
+                    parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
+                    geo_search_results[parent_id].append(
+                        Prediction(
+                            geometry=parent_polygon_geo,
+                            score=instance.score,
+                            text=instance.score,
+                            patch_id=instance.patch_id,
+                            crs=crs,
+                        )
+                    )
+
+        return geo_search_results
+
+    def explore_search_results(
+        self,
+        parent_id: str,
+        xyz_url: str | None = None,
+        style_kwargs: dict | None = None,
+    ):
+        self.check_georeferencing()
+        if not self.georeferenced:
+            raise ValueError(
+                "[ERROR] This method only works for georeferenced results. Please ensure parent_df has 'coordinates' column and run `convert_to_coords` first."
+            )
+
+        if parent_id not in self.geo_predictions.keys():
+            raise ValueError(f"[ERROR] {parent_id} not found in geo predictions.")
+
+        if style_kwargs is None:
+            style_kwargs = {"fillOpacity": 0.2}
+
+        if xyz_url:
+            tiles = xyz.TileProvider(name=xyz_url, url=xyz_url, attribution=xyz_url)
+        else:
+            tiles = xyz.providers.OpenStreetMap.Mapnik
+
+        geo_search_results = self._get_geo_search_results()
+        geo_df = self._dict_to_dataframe(geo_search_results)
+
+        return geo_df[geo_df["image_id"] == parent_id].explore(
+            tiles=tiles,
+            style_kwds=style_kwargs,
+        )
+
+    def save_search_results_to_geojson(
+        self,
+        save_path: str | pathlib.Path,
+        centroid: bool = False,
+    ) -> None:
+        """Convert the search results to georeferenced search results and save them to a GeoJSON file.
+
+        Parameters
+        ----------
+        save_path : str | pathlib.Path
+            The path to save the GeoJSON file.
+        centroid : bool, optional
+            Whether to save the centroid of the polygons as the geometry column, by default False.
+            Note: The original polygon will stil be saved as a separate column.
+
+        Raises
+        ------
+        ValueError
+            If no search results are found.
+        """
+        if self.search_results == {}:
+            raise ValueError("[ERROR] No results to save!")
+
+        geo_search_results = self._get_geo_search_results()
+        geo_df = self._dict_to_dataframe(geo_search_results)
+
+        if centroid:
+            geo_df["polygon"] = geo_df["geometry"].to_wkt()
+            geo_df["geometry"] = geo_df["geometry"].apply(self._polygon_to_centroid)
+
+        geo_df.to_file(save_path, driver="GeoJSON", engine="pyogrio")

From a7c3e84b8c43e0c4907c79684b5625707b9e6514 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 21 Nov 2024 13:39:11 +0000
Subject: [PATCH 02/15] update tests

---
 tests/test_text_spotting/test_deepsolo_runner.py | 9 ++++++---
 tests/test_text_spotting/test_dptext_runner.py   | 6 +++---
 tests/test_text_spotting/test_maptext_runner.py  | 6 +++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index 22a9713d..9247c46c 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -13,6 +13,7 @@
 
 from mapreader import DeepSoloRunner
 from mapreader.load import MapImages
+from mapreader.spot_text.dataclasses import Prediction
 
 # use cloned DeepSolo path if running in github actions
 DEEPSOLO_PATH = (
@@ -142,8 +143,9 @@ def test_deepsolo_run_all(init_runner, mock_response):
     assert isinstance(out, dict)
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"][0], Prediction)
     # dataframe
-    out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False)
+    out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(["image_id", "geometry", "text", "score"])
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
@@ -156,8 +158,9 @@ def test_deepsolo_convert_to_parent(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], Prediction)
     # dataframe
-    out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True)
+    out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "geometry", "text", "score"]
@@ -173,7 +176,7 @@ def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response):
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True)
+    out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "geometry", "crs", "text", "score"]
diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
index cf62baa8..d8e4199b 100644
--- a/tests/test_text_spotting/test_dptext_runner.py
+++ b/tests/test_text_spotting/test_dptext_runner.py
@@ -144,7 +144,7 @@ def test_dptext_run_all(init_runner, mock_response):
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False)
+    out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(["image_id", "geometry", "score"])
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
@@ -158,7 +158,7 @@ def test_dptext_convert_to_parent(runner_run_all, mock_response):
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True)
+    out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(["image_id", "patch_id", "geometry", "score"])
     assert "mapreader_text.png" in out["image_id"].values
@@ -172,7 +172,7 @@ def test_dptext_convert_to_parent_coords(runner_run_all, mock_response):
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True)
+    out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
     assert set(out.columns) == set(["image_id", "patch_id", "geometry", "crs", "score"])
     assert "mapreader_text.png" in out["image_id"].values
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 2952013c..9c593872 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -144,7 +144,7 @@ def test_maptext_run_all(init_runner, mock_response):
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False)
+    out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(["image_id", "geometry", "text", "score"])
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
@@ -158,7 +158,7 @@ def test_maptext_convert_to_parent(runner_run_all, mock_response):
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True)
+    out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "geometry", "text", "score"]
@@ -174,7 +174,7 @@ def test_maptext_convert_to_parent_coords(runner_run_all, mock_response):
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
     # dataframe
-    out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True)
+    out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "geometry", "crs", "text", "score"]

From 821f455cc2096ea8f56ca2d6febe60a55713f153 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Fri, 22 Nov 2024 15:51:58 +0000
Subject: [PATCH 03/15] update dataclasses, add load_predictions method for
 loading from geojson

---
 mapreader/spot_text/dataclasses.py |  26 ++++--
 mapreader/spot_text/runner_base.py | 131 ++++++++++++++++++++++++-----
 2 files changed, 129 insertions(+), 28 deletions(-)

diff --git a/mapreader/spot_text/dataclasses.py b/mapreader/spot_text/dataclasses.py
index 4b155f40..97098e35 100644
--- a/mapreader/spot_text/dataclasses.py
+++ b/mapreader/spot_text/dataclasses.py
@@ -4,13 +4,27 @@
 
 from shapely.geometry import Polygon
 
-# Detection only
 
+@dataclass(frozen=True)
+class PatchPrediction:
+    pixel_geometry: Polygon
+    score: float
+    text: str = None
 
-@dataclass
-class Prediction:
-    geometry: Polygon
+
+@dataclass(frozen=True)
+class ParentPrediction:
+    pixel_geometry: Polygon
     score: float
+    patch_id: str
+    text: str = None
+
+
+@dataclass(frozen=True)
+class GeoPrediction:
+    pixel_geometry: Polygon
+    score: float
+    patch_id: str
+    geometry: Polygon
+    crs: str
     text: str = None
-    patch_id: str | None = None
-    crs: str | None = None
diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 7ca8ff91..77c1ee71 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -12,13 +12,13 @@
 import pandas as pd
 import xyzservices as xyz
 from PIL import Image
-from shapely import LineString, MultiPolygon, Polygon
+from shapely import LineString, MultiPolygon, Polygon, from_wkt
 from tqdm.auto import tqdm
 
 from mapreader import MapImages
 from mapreader.utils.load_frames import load_from_csv, load_from_geojson
 
-from .dataclasses import Prediction
+from .dataclasses import GeoPrediction, ParentPrediction, PatchPrediction
 
 
 class DetRunner:
@@ -260,7 +260,9 @@ def run_on_image(
         return patch_predictions
 
     def _deduplicate(self, image_id, min_ioa=0.7):
-        polygons = [instance.geometry for instance in self.patch_predictions[image_id]]
+        polygons = [
+            instance.pixel_geometry for instance in self.patch_predictions[image_id]
+        ]
 
         def calc_ioa(polygons, i, j):
             return polygons[i].intersection(polygons[j]).area / polygons[i].area
@@ -323,7 +325,7 @@ def convert_to_parent_pixel_bounds(
                 self.parent_predictions[parent_id] = []
 
             for instance in prediction:
-                polygon = instance.geometry
+                polygon = instance.pixel_geometry
 
                 xx, yy = (np.array(i) for i in polygon.exterior.xy)
                 xx = xx + self.patch_df.loc[image_id, "pixel_bounds"][0]  # add min_x
@@ -331,8 +333,8 @@ def convert_to_parent_pixel_bounds(
 
                 parent_polygon = Polygon(zip(xx, yy)).buffer(0)
                 self.parent_predictions[parent_id].append(
-                    Prediction(
-                        geometry=parent_polygon,
+                    ParentPrediction(
+                        pixel_geometry=parent_polygon,
                         score=instance.score,
                         text=instance.text,
                         patch_id=image_id,
@@ -373,8 +375,8 @@ def _deduplicate_parent_level(self, image_id, min_ioa=0.7):
                     elif pred.patch_id in [
                         patch_i,
                         patch_j,
-                    ] and pred.geometry.intersects(intersection):
-                        polygons.append([i, pred.geometry])
+                    ] and pred.pixel_geometry.intersects(intersection):
+                        polygons.append([i, pred.pixel_geometry])
 
                 def calc_ioa(polygons, i, j):
                     return (
@@ -444,7 +446,7 @@ def convert_to_coords(
                 self.geo_predictions[parent_id] = []
 
                 for instance in prediction:
-                    polygon = instance.geometry
+                    polygon = instance.pixel_geometry
 
                     xx, yy = (np.array(i) for i in polygon.exterior.xy)
                     xx = (
@@ -456,15 +458,16 @@ def convert_to_coords(
                         - yy * self.parent_df.loc[parent_id, "dlat"]
                     )
 
+                    parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
                     crs = self.parent_df.loc[parent_id, "crs"]
 
-                    parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
                     self.geo_predictions[parent_id].append(
-                        Prediction(
-                            geometry=parent_polygon_geo,
+                        GeoPrediction(
+                            pixel_geometry=instance.pixel_geometry,
                             score=instance.score,
                             text=instance.text,
                             patch_id=instance.patch_id,
+                            geometry=parent_polygon_geo,
                             crs=crs,
                         )
                     )
@@ -556,8 +559,8 @@ def show_predictions(
         ax.set_title(image_id)
 
         for instance in preds[image_id]:
-            polygon = np.array(instance.geometry.exterior.coords.xy)
-            center = instance.geometry.centroid.coords.xy
+            polygon = np.array(instance.pixel_geometry.exterior.coords.xy)
+            center = instance.pixel_geometry.centroid.coords.xy
             patch = patches.Polygon(polygon.T, edgecolor=border_color, facecolor="none")
             ax.add_patch(patch)
             ax.text(
@@ -601,6 +604,91 @@ def explore_predictions(
             style_kwds=style_kwargs,
         )
 
+    def load_predictions(
+        self,
+        path_save: str | pathlib.Path,
+    ):
+        """Load georeferenced text predictions from a GeoJSON file.
+
+        Parameters
+        ----------
+        path_save : str | pathlib.Path
+            The path to the GeoJSON file.
+
+        Raises
+        ------
+        ValueError
+            If the path does not point to a GeoJSON file.
+
+        Note
+        ----
+        This will overwrite any existing predictions!
+        """
+        if re.search(r"\..*?json$", str(path_save)):
+            preds_df = load_from_geojson(path_save, engine="pyogrio")
+        else:
+            raise ValueError("[ERROR] ``path_save`` must be a path to a geojson file.")
+
+        # convert pixel_geometry to shapely geometry
+        preds_df["pixel_geometry"] = preds_df["pixel_geometry"].apply(
+            lambda x: from_wkt(x)
+        )
+
+        self.geo_predictions = {}
+        self.parent_predictions = {}
+
+        for image_id in preds_df.index.unique():
+            if image_id not in self.geo_predictions.keys():
+                self.geo_predictions[image_id] = []
+            if image_id not in self.parent_predictions.keys():
+                self.parent_predictions[image_id] = []
+
+            for _, v in preds_df[preds_df.index == image_id].iterrows():
+                self.geo_predictions[image_id].append(
+                    GeoPrediction(
+                        pixel_geometry=v.pixel_geometry,
+                        score=v.score,
+                        text=v.text,
+                        patch_id=v.patch_id,
+                        geometry=v.geometry,
+                        crs=v.crs,
+                    )
+                )
+                self.parent_predictions[image_id].append(
+                    ParentPrediction(
+                        pixel_geometry=v.pixel_geometry,
+                        score=v.score,
+                        text=v.text,
+                        patch_id=v.patch_id,
+                    )
+                )
+
+        self.patch_predictions = {}  # reset patch predictions
+
+        for _, prediction in self.parent_predictions.items():
+            for instance in prediction:
+                if instance.patch_id not in self.patch_predictions.keys():
+                    self.patch_predictions[instance.patch_id] = []
+
+                polygon = instance.pixel_geometry
+
+                xx, yy = (np.array(i) for i in polygon.exterior.xy)
+                xx = (
+                    xx - self.patch_df.loc[instance.patch_id, "pixel_bounds"][0]
+                )  # add min_x
+                yy = (
+                    yy - self.patch_df.loc[instance.patch_id, "pixel_bounds"][1]
+                )  # add min_y
+
+                patch_polygon = Polygon(zip(xx, yy)).buffer(0)
+                self.patch_predictions[instance.patch_id].append(
+                    PatchPrediction(
+                        pixel_geometry=patch_polygon,
+                        score=instance.score,
+                        text=instance.text,
+                    )
+                )
+
 
 class DetRecRunner(DetRunner):
     def _get_patch_predictions(
@@ -671,7 +759,7 @@ def _post_process(self, image_id, ctrl_pnts, scores, recs, bd_pnts):
             score = f"{score:.2f}"
 
             self.patch_predictions[image_id].append(
-                Prediction(geometry=polygon, score=score, text=text)
+                PatchPrediction(pixel_geometry=polygon, score=score, text=text)
             )
 
     def search_preds(
@@ -766,10 +854,8 @@ def show_search_results(
         preds = self.search_results
 
         for instance in preds[parent_id]:
-            # Instance is:
-            # - [geometry, text, score] for det/rec
-            polygon = np.array(instance.geometry.exterior.coords.xy)
-            center = instance.geometry.centroid.coords.xy
+            polygon = np.array(instance.pixel_geometry.exterior.coords.xy)
+            center = instance.pixel_geometry.centroid.coords.xy
             patch = patches.Polygon(polygon.T, edgecolor=border_color, facecolor="none")
             ax.add_patch(patch)
             ax.text(
@@ -803,7 +889,7 @@ def _get_geo_search_results(self):
                 geo_search_results[parent_id] = []
 
                 for instance in prediction:
-                    polygon = instance.geometry
+                    polygon = instance.pixel_geometry
 
                     xx, yy = (np.array(i) for i in polygon.exterior.xy)
                     xx = (
@@ -819,11 +905,12 @@ def _get_geo_search_results(self):
 
                     parent_polygon_geo = Polygon(zip(xx, yy)).buffer(0)
                     geo_search_results[parent_id].append(
-                        Prediction(
-                            geometry=parent_polygon_geo,
+                        GeoPrediction(
+                            pixel_geometry=instance.pixel_geometry,
                             score=instance.score,
                             text=instance.score,
                             patch_id=instance.patch_id,
+                            geometry=parent_polygon_geo,
                             crs=crs,
                         )
                     )

From a4127b768f2cae275e3a13af917806538204094f Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 27 Nov 2024 13:36:28 +0000
Subject: [PATCH 04/15] fix imports

---
 mapreader/spot_text/dptext_detr_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mapreader/spot_text/dptext_detr_runner.py b/mapreader/spot_text/dptext_detr_runner.py
index 9b17f1c5..8ed24b6a 100644
--- a/mapreader/spot_text/dptext_detr_runner.py
+++ b/mapreader/spot_text/dptext_detr_runner.py
@@ -21,7 +21,7 @@
 from dptext_detr.config import get_cfg
 from shapely import MultiPolygon, Polygon
 
-from .dataclasses import Prediction
+from .dataclasses import PatchPrediction
 from .runner_base import DetRunner
 
 
@@ -124,5 +124,5 @@ def _post_process(self, image_id, scores, pred_classes, bd_pnts):
             score = f"{score:.2f}"
 
             self.patch_predictions[image_id].append(
-                Prediction(geometry=polygon, score=score)
+                PatchPrediction(geometry=polygon, score=score)
             )

From b7e3b0b46ceb10459221362c9f88adaa94880d38 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 27 Nov 2024 13:50:30 +0000
Subject: [PATCH 05/15] fix deepsolo tests

---
 .../test_deepsolo_runner.py                   | 27 ++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index 9247c46c..d935ec0b 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -13,7 +13,7 @@
 
 from mapreader import DeepSoloRunner
 from mapreader.load import MapImages
-from mapreader.spot_text.dataclasses import Prediction
+from mapreader.spot_text.dataclasses import ParentPrediction, PatchPrediction
 
 # use cloned DeepSolo path if running in github actions
 DEEPSOLO_PATH = (
@@ -143,11 +143,20 @@ def test_deepsolo_run_all(init_runner, mock_response):
     assert isinstance(out, dict)
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
-    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"][0], Prediction)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
     # dataframe
     out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
-    assert set(out.columns) == set(["image_id", "geometry", "text", "score"])
+    assert set(out.columns) == set(
+        [
+            "image_id",
+            "pixel_geometry",
+            "text",
+            "score",
+        ]
+    )
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
 
 
@@ -158,12 +167,12 @@ def test_deepsolo_convert_to_parent(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
-    assert isinstance(out["mapreader_text.png"][0], Prediction)
+    assert isinstance(out["mapreader_text.png"][0], ParentPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
 
@@ -179,7 +188,7 @@ def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response):
     out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
     assert out.crs == runner.parent_df.crs
@@ -230,7 +239,7 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
     assert set(gdf.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
 
 
@@ -244,7 +253,7 @@ def test_deepsolo_search_preds(runner_run_all, mock_response):
     out = runner.search_preds("map", ignore_case=True, return_dataframe=True)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
     out = runner.search_preds("somethingelse", ignore_case=True, return_dataframe=True)
@@ -267,7 +276,7 @@ def test_deepsolo_save_search_results(runner_run_all, tmp_path, mock_response):
     gdf = gpd.read_file(f"{tmp_path}/search_results.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
     assert set(gdf.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
     assert "mapreader_text.png" in gdf["image_id"].values
 

From db3c5c507bd89574fab33e532640013c190c3f47 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 27 Nov 2024 14:02:31 +0000
Subject: [PATCH 06/15] add/update tests

---
 .../test_deepsolo_runner.py                   |  7 +++++-
 .../test_text_spotting/test_dptext_runner.py  | 22 +++++++++++++++----
 .../test_text_spotting/test_maptext_runner.py | 22 ++++++++++++++-----
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index d935ec0b..eb22c8ed 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -13,7 +13,11 @@
 
 from mapreader import DeepSoloRunner
 from mapreader.load import MapImages
-from mapreader.spot_text.dataclasses import ParentPrediction, PatchPrediction
+from mapreader.spot_text.dataclasses import (
+    GeoPrediction,
+    ParentPrediction,
+    PatchPrediction,
+)
 
 # use cloned DeepSolo path if running in github actions
 DEEPSOLO_PATH = (
@@ -184,6 +188,7 @@ def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], GeoPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
index d8e4199b..0c42fea0 100644
--- a/tests/test_text_spotting/test_dptext_runner.py
+++ b/tests/test_text_spotting/test_dptext_runner.py
@@ -13,6 +13,11 @@
 
 from mapreader import DPTextDETRRunner
 from mapreader.load import MapImages
+from mapreader.spot_text.dataclasses import (
+    GeoPrediction,
+    ParentPrediction,
+    PatchPrediction,
+)
 
 # use cloned DPText-DETR path if running in github actions
 DPTEXT_DETR_PATH = (
@@ -143,10 +148,13 @@ def test_dptext_run_all(init_runner, mock_response):
     assert isinstance(out, dict)
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
     # dataframe
     out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
-    assert set(out.columns) == set(["image_id", "geometry", "score"])
+    assert set(out.columns) == set(["image_id", "pixel_geometry", "score"])
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
 
 
@@ -157,10 +165,11 @@ def test_dptext_convert_to_parent(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], ParentPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
-    assert set(out.columns) == set(["image_id", "patch_id", "geometry", "score"])
+    assert set(out.columns) == set(["image_id", "patch_id", "pixel_geometry", "score"])
     assert "mapreader_text.png" in out["image_id"].values
 
 
@@ -171,10 +180,13 @@ def test_dptext_convert_to_parent_coords(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], GeoPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
-    assert set(out.columns) == set(["image_id", "patch_id", "geometry", "crs", "score"])
+    assert set(out.columns) == set(
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "score"]
+    )
     assert "mapreader_text.png" in out["image_id"].values
     assert out.crs == runner.parent_df.crs
 
@@ -223,4 +235,6 @@ def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
     assert os.path.exists(f"{tmp_path}/text.geojson")
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
-    assert set(gdf.columns) == set(["image_id", "patch_id", "geometry", "crs", "score"])
+    assert set(gdf.columns) == set(
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "score"]
+    )
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 9c593872..6bd20b08 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -13,6 +13,11 @@
 
 from mapreader import MapTextRunner
 from mapreader.load import MapImages
+from mapreader.spot_text.dataclasses import (
+    GeoPrediction,
+    ParentPrediction,
+    PatchPrediction,
+)
 
 # use cloned MapTextPipeline path if running in github actions
 MAPTEXTPIPELINE_PATH = (
@@ -143,10 +148,13 @@ def test_maptext_run_all(init_runner, mock_response):
     assert isinstance(out, dict)
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
     assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
     # dataframe
     out = runner._dict_to_dataframe(runner.patch_predictions)
     assert isinstance(out, pd.DataFrame)
-    assert set(out.columns) == set(["image_id", "geometry", "text", "score"])
+    assert set(out.columns) == set(["image_id", "pixel_geometry", "text", "score"])
     assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values
 
 
@@ -157,11 +165,12 @@ def test_maptext_convert_to_parent(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], ParentPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.parent_predictions)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
 
@@ -173,11 +182,12 @@ def test_maptext_convert_to_parent_coords(runner_run_all, mock_response):
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     assert isinstance(out["mapreader_text.png"], list)
+    assert isinstance(out["mapreader_text.png"][0], GeoPrediction)
     # dataframe
     out = runner._dict_to_dataframe(runner.geo_predictions)
     assert isinstance(out, gpd.GeoDataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
     assert out.crs == runner.parent_df.crs
@@ -228,7 +238,7 @@ def test_maptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
     assert set(gdf.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
 
 
@@ -242,7 +252,7 @@ def test_maptext_search_preds(runner_run_all, mock_response):
     out = runner.search_preds("map", ignore_case=True, return_dataframe=True)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
-        ["image_id", "patch_id", "geometry", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
     out = runner.search_preds("somethingelse", ignore_case=True, return_dataframe=True)
@@ -265,7 +275,7 @@ def test_maptext_save_search_results(runner_run_all, tmp_path, mock_response):
     gdf = gpd.read_file(f"{tmp_path}/search_results.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
     assert set(gdf.columns) == set(
-        ["image_id", "patch_id", "geometry", "crs", "text", "score"]
+        ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
     assert "mapreader_text.png" in gdf["image_id"].values
 

From 3af480b8d7bab9559aa3ef6dd171c9d99ef79bfe Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Fri, 29 Nov 2024 09:36:14 +0000
Subject: [PATCH 07/15] fix naming error

---
 mapreader/spot_text/dptext_detr_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapreader/spot_text/dptext_detr_runner.py b/mapreader/spot_text/dptext_detr_runner.py
index 8ed24b6a..c8bbb16b 100644
--- a/mapreader/spot_text/dptext_detr_runner.py
+++ b/mapreader/spot_text/dptext_detr_runner.py
@@ -124,5 +124,5 @@ def _post_process(self, image_id, scores, pred_classes, bd_pnts):
             score = f"{score:.2f}"
 
             self.patch_predictions[image_id].append(
-                PatchPrediction(geometry=polygon, score=score)
+                PatchPrediction(pixel_geometry=polygon, score=score)
             )

From 807e715984385e16ac640922c6317b5c3a456e27 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 4 Dec 2024 13:31:52 +0000
Subject: [PATCH 08/15] add save to /load from csv

---
 mapreader/spot_text/runner_base.py | 134 ++++++++++++++++++++++++-----
 1 file changed, 113 insertions(+), 21 deletions(-)

diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 77c1ee71..352c7378 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -16,7 +16,7 @@
 from tqdm.auto import tqdm
 
 from mapreader import MapImages
-from mapreader.utils.load_frames import load_from_csv, load_from_geojson
+from mapreader.utils.load_frames import eval_dataframe, load_from_csv, load_from_geojson
 
 from .dataclasses import GeoPrediction, ParentPrediction, PatchPrediction
 
@@ -478,18 +478,18 @@ def convert_to_coords(
 
     def save_to_geojson(
         self,
-        save_path: str | pathlib.Path,
+        path_save: str | pathlib.Path,
         centroid: bool = False,
     ) -> None:
         """Save the georeferenced predictions to a GeoJSON file.
 
         Parameters
         ----------
-        save_path : str | pathlib.Path, optional
+        path_save : str | pathlib.Path, optional
             Path to save the GeoJSON file
         centroid : bool, optional
-            Whether to save the centroid of the polygons as the geometry column, by default False.
-            Note: The original polygon will stil be saved as a separate column.
+            Whether to convert the polygons to centroids, by default False.
+            NOTE: The original polygon will still be saved as a separate column
         """
         if self.geo_predictions == {}:
             raise ValueError(
@@ -500,12 +500,61 @@ def save_to_geojson(
 
         if centroid:
             geo_df["polygon"] = geo_df["geometry"].to_wkt()
-            geo_df["geometry"] = geo_df["geometry"].apply(self._polygon_to_centroid)
+            geo_df["geometry"] = geo_df["geometry"].centroid
 
-        geo_df.to_file(save_path, driver="GeoJSON", engine="pyogrio")
+        geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
 
-    def _polygon_to_centroid(self, polygon):
-        return polygon.centroid
+    def save_to_csv(
+        self,
+        path_save: str | pathlib.Path,
+        centroid: bool = False,
+    ) -> None:
+        """Saves the patch, parent and georeferenced predictions to CSV files.
+
+        Parameters
+        ----------
+        path_save : str | pathlib.Path
+            The path to save the CSV files. Files will be saved as `patch_predictions.csv`, `parent_predictions.csv` and `geo_predictions.csv`.
+        centroid : bool, optional
+            Whether to convert polygons to centroids, by default False.
+            NOTE: The original polygon will still be saved as a separate column.
+
+        Note
+        ----
+        Use the `save_to_geojson` method to save georeferenced predictions to a GeoJSON file.
+        """
+        if self.patch_predictions == {}:  # implies no parent or geo predictions
+            raise ValueError("[ERROR] No patch predictions found.")
+
+        if not os.path.exists(path_save):
+            os.makedirs(path_save)
+
+        print("[INFO] Saving patch predictions.")
+        patch_df = self._dict_to_dataframe(self.patch_predictions)
+        if centroid:
+            patch_df["polygon"] = patch_df["pixel_geometry"]
+            patch_df["pixel_geometry"] = patch_df["pixel_geometry"].apply(
+                lambda x: x.centroid
+            )
+        patch_df.to_csv(f"{path_save}/patch_predictions.csv")
+
+        if self.parent_predictions != {}:
+            print("[INFO] Saving parent predictions.")
+            parent_df = self._dict_to_dataframe(self.parent_predictions)
+            if centroid:
+                parent_df["polygon"] = parent_df["pixel_geometry"]
+                parent_df["pixel_geometry"] = parent_df["pixel_geometry"].apply(
+                    lambda x: x.centroid
+                )
+            parent_df.to_csv(f"{path_save}/parent_predictions.csv")
+
+        if self.geo_predictions != {}:
+            print("[INFO] Saving geo predictions.")
+            geo_df = self._dict_to_dataframe(self.geo_predictions)
+            if centroid:
+                geo_df["polygon"] = geo_df["geometry"]
+                geo_df["geometry"] = geo_df["geometry"].centroid
+            geo_df.to_csv(f"{path_save}/geo_predictions.csv")
 
     def show_predictions(
         self,
@@ -604,15 +653,15 @@ def explore_predictions(
             style_kwds=style_kwargs,
         )
 
-    def load_predictions(
+    def load_geo_predictions(
         self,
-        path_save: str | pathlib.Path,
+        load_path: str | pathlib.Path,
     ):
         """Load georeferenced text predictions from a GeoJSON file.
 
         Parameters
         ----------
-        path_save : str | pathlib.Path
+        load_path : str | pathlib.Path
             The path to the GeoJSON file.
 
         Raises
@@ -624,10 +673,10 @@ def load_predictions(
         ----
         This will overwrite any existing predictions!
         """
-        if re.search(r"\..*?json$", str(path_save)):
-            preds_df = load_from_geojson(path_save, engine="pyogrio")
+        if re.search(r"\..*?json$", str(load_path)):
+            preds_df = load_from_geojson(load_path, engine="pyogrio")
         else:
-            raise ValueError("[ERROR] ``path_save`` must be a path to a geojson file.")
+            raise ValueError("[ERROR] ``load_path`` must be a path to a geojson file.")
 
         # convert pixel_geometry to shapely geometry
         preds_df["pixel_geometry"] = preds_df["pixel_geometry"].apply(
@@ -648,7 +697,7 @@ def load_predictions(
                     GeoPrediction(
                         pixel_geometry=v.pixel_geometry,
                         score=v.score,
-                        text=v.text,
+                        text=v.text if "text" in v.index else None,
                         patch_id=v.patch_id,
                         geometry=v.geometry,
                         crs=v.crs,
@@ -658,7 +707,7 @@ def load_predictions(
                     ParentPrediction(
                         pixel_geometry=v.pixel_geometry,
                         score=v.score,
-                        text=v.text,
+                        text=v.text if "text" in v.index else None,
                         patch_id=v.patch_id,
                     )
                 )
@@ -689,6 +738,49 @@ def load_predictions(
                     )
                 )
 
+    def load_patch_predictions(
+        self,
+        patch_preds: str | pathlib.Path | pd.DataFrame,
+    ) -> None:
+        if not isinstance(patch_preds, pd.DataFrame):
+            if re.search(r"\..*?csv$", str(patch_preds)):
+                patch_preds = pd.read_csv(patch_preds, index_col=0)
+                patch_preds = eval_dataframe(patch_preds)
+            else:
+                raise ValueError(
+                    "[ERROR] ``patch_preds`` must be a pandas DataFrame or path to a CSV file."
+                )
+
+        # if we have a polygon column, this implies the pixel_geometry column is the centroid
+        if "polygon" in patch_preds.columns:
+            patch_preds["pixel_geometry"] = patch_preds["polygon"]
+            patch_preds.drop(columns=["polygon"], inplace=True)
+
+        # convert pixel_geometry to shapely geometry
+        patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply(
+            lambda x: from_wkt(x)
+        )
+
+        self.patch_predictions = {}  # reset patch predictions
+
+        for image_id in patch_preds["image_id"].unique():
+            if image_id not in self.patch_predictions.keys():
+                self.patch_predictions[image_id] = []
+
+            for _, v in patch_preds[patch_preds["image_id"] == image_id].iterrows():
+                self.patch_predictions[image_id].append(
+                    PatchPrediction(
+                        pixel_geometry=v.pixel_geometry,
+                        score=v.score,
+                        text=v.text if "text" in v.index else None,
+                    )
+                )
+
+        self.geo_predictions = {}
+        self.parent_predictions = {}
+
+        self.convert_to_parent_pixel_bounds()
+
 
 class DetRecRunner(DetRunner):
     def _get_patch_predictions(
@@ -950,14 +1042,14 @@ def explore_search_results(
 
     def save_search_results_to_geojson(
         self,
-        save_path: str | pathlib.Path,
+        path_save: str | pathlib.Path,
         centroid: bool = False,
     ) -> None:
         """Convert the search results to georeferenced search results and save them to a GeoJSON file.
 
         Parameters
         ----------
-        save_path : str | pathlib.Path
+        path_save : str | pathlib.Path
             The path to save the GeoJSON file.
         centroid : bool, optional
             Whether to save the centroid of the polygons as the geometry column, by default False.
@@ -976,6 +1068,6 @@ def save_search_results_to_geojson(
 
         if centroid:
             geo_df["polygon"] = geo_df["geometry"].to_wkt()
-            geo_df["geometry"] = geo_df["geometry"].apply(self._polygon_to_centroid)
+            geo_df["geometry"] = geo_df["geometry"].centroid
 
-        geo_df.to_file(save_path, driver="GeoJSON", engine="pyogrio")
+        geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")

From a46f4810b66a4fdb8e5a4f0a7f6a60cfe05642ad Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 4 Dec 2024 13:44:02 +0000
Subject: [PATCH 09/15] update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7325e09b..7a1a5d5b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,11 @@ The following table shows which versions of MapReader are compatible with which
 
 _Add new changes here_
 
+## Added
+
+- Added ablity to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
+- Added minimal dataclasses for text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
+
 ## [v1.6.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.6.1) (2024-11-18)
 
 ### Added

From 13ba2fca4eface2d9585755f93b7c90cad8935db Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 4 Dec 2024 17:09:09 +0000
Subject: [PATCH 10/15] add tests deepsolo

---
 mapreader/spot_text/runner_base.py            |  28 ++-
 .../test_deepsolo_runner.py                   | 222 ++++++++++++++++++
 2 files changed, 239 insertions(+), 11 deletions(-)

diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 352c7378..ccc299cf 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -500,7 +500,9 @@ def save_to_geojson(
 
         if centroid:
             geo_df["polygon"] = geo_df["geometry"].to_wkt()
-            geo_df["geometry"] = geo_df["geometry"].centroid
+            geo_df["geometry"] = (
+                geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+            )
 
         geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
 
@@ -553,7 +555,9 @@ def save_to_csv(
             geo_df = self._dict_to_dataframe(self.geo_predictions)
             if centroid:
                 geo_df["polygon"] = geo_df["geometry"]
-                geo_df["geometry"] = geo_df["geometry"].centroid
+                geo_df["geometry"] = (
+                    geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+                )
             geo_df.to_csv(f"{path_save}/geo_predictions.csv")
 
     def show_predictions(
@@ -751,15 +755,15 @@ def load_patch_predictions(
                     "[ERROR] ``patch_preds`` must be a pandas DataFrame or path to a CSV file."
                 )
 
-        # if we have a polygon column, this implies the pixel_geometry column is the centroid
-        if "polygon" in patch_preds.columns:
-            patch_preds["pixel_geometry"] = patch_preds["polygon"]
-            patch_preds.drop(columns=["polygon"], inplace=True)
+            # if we have a polygon column, this implies the pixel_geometry column is the centroid
+            if "polygon" in patch_preds.columns:
+                patch_preds["pixel_geometry"] = patch_preds["polygon"]
+                patch_preds.drop(columns=["polygon"], inplace=True)
 
-        # convert pixel_geometry to shapely geometry
-        patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply(
-            lambda x: from_wkt(x)
-        )
+            # convert pixel_geometry to shapely geometry
+            patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply(
+                lambda x: from_wkt(x)
+            )
 
         self.patch_predictions = {}  # reset patch predictions
 
@@ -1068,6 +1072,8 @@ def save_search_results_to_geojson(
 
         if centroid:
             geo_df["polygon"] = geo_df["geometry"].to_wkt()
-            geo_df["geometry"] = geo_df["geometry"].centroid
+            geo_df["geometry"] = (
+                geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+            )
 
         geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index eb22c8ed..ea9ed6c2 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -5,11 +5,13 @@
 import pickle
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pytest
 from deepsolo.config import get_cfg
 from detectron2.engine import DefaultPredictor
 from detectron2.structures.instances import Instances
+from shapely import Polygon
 
 from mapreader import DeepSoloRunner
 from mapreader.load import MapImages
@@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path):
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
     maps.patchify_all(patch_size=800, path_save=tmp_path)
     maps.check_georeferencing()
+    assert maps.georeferenced
     parent_df, patch_df = maps.convert_images()
     return parent_df, patch_df
 
@@ -140,6 +143,80 @@ def test_deepsolo_init_tsv(init_dataframes, tmp_path):
     assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)
 
 
+def test_deepsolo_init_geojson(init_dataframes, tmp_path, mock_response):
+    parent_df, patch_df = init_dataframes
+    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    runner = DeepSoloRunner(
+        f"{tmp_path}/patch_df.geojson",
+        parent_df=f"{tmp_path}/parent_df.geojson",
+        cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+    )
+    assert isinstance(runner, DeepSoloRunner)
+    assert isinstance(runner.predictor, DefaultPredictor)
+    assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
+    out = runner.run_all()
+    assert isinstance(out, dict)
+    assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
+
+
+def test_deepsolo_init_errors(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DeepSoloRunner(
+            patch_df="fake_file.txt",
+            parent_df=parent_df,
+            cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DeepSoloRunner(
+            patch_df=patch_df,
+            parent_df="fake_file.txt",
+            cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DeepSoloRunner(
+            patch_df=np.array([1, 2, 3]),
+            parent_df=parent_df,
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DeepSoloRunner(
+            patch_df=patch_df,
+            parent_df=np.array([1, 2, 3]),
+        )
+
+
+def test_check_georeferencing(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    runner = DeepSoloRunner(
+        patch_df,
+        parent_df=parent_df,
+        cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DeepSoloRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["dlat", "dlon"]),
+        cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DeepSoloRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["coordinates"]),
+        cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
+    )
+    runner.check_georeferencing()
+    assert not runner.georeferenced
+
+
 def test_deepsolo_run_all(init_runner, mock_response):
     runner = init_runner
     # dict
@@ -246,6 +323,151 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
     assert set(gdf.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
+    runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
+    assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
+    gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
+    assert isinstance(gdf_centroid, gpd.GeoDataFrame)
+    assert set(gdf_centroid.columns) == set(
+        [
+            "image_id",
+            "patch_id",
+            "pixel_geometry",
+            "geometry",
+            "crs",
+            "text",
+            "score",
+            "polygon",
+        ]
+    )
+
+
+def test_deepsolo_load_geo_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.geo_predictions = {}
+    runner.load_geo_predictions(f"{tmp_path}/text.geojson")
+    assert len(runner.geo_predictions)
+    assert "mapreader_text.png" in runner.geo_predictions.keys()
+    assert isinstance(runner.geo_predictions["mapreader_text.png"], list)
+    assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction)
+
+
+def test_deepsolo_load_geo_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(ValueError, match="must be a path to a geojson file"):
+        runner.load_geo_predictions("fakefile.csv")
+
+
+def test_deepsolo_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_deepsolo_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_deepsolo_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    runner.patch_predictions = {}
+    with pytest.raises(ValueError, match="No patch predictions found"):
+        runner.save_to_csv(tmp_path)
+
+
+def test_deepsolo_load_patch_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.save_to_csv(tmp_path)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_deepsolo_load_patch_predictions_dataframe(runner_run_all):
+    runner = runner_run_all
+    patch_preds = runner._dict_to_dataframe(runner.patch_predictions)
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.load_patch_predictions(patch_preds)
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_deepsolo_load_patch_predictions_centroid(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)
+    runner.save_to_csv(tmp_path, centroid=True)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_deepsolo_load_patch_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(
+        ValueError, match="must be a pandas DataFrame or path to a CSV file"
+    ):
+        runner.load_patch_predictions("fake_file.geojson")
 
 
 def test_deepsolo_search_preds(runner_run_all, mock_response):

From 31e02c17e86ced5303597436eb75c4fd370d8e64 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 5 Dec 2024 09:04:48 +0000
Subject: [PATCH 11/15] add test for saving search results to geojson

---
 .../test_deepsolo_runner.py                   | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index ea9ed6c2..b08f20a1 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -323,6 +323,11 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
     assert set(gdf.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
     )
+
+
+def test_deepsolo_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
     runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
     assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
     gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
@@ -508,6 +513,32 @@ def test_deepsolo_save_search_results(runner_run_all, tmp_path, mock_response):
     assert "mapreader_text.png" in gdf["image_id"].values
 
 
+def test_deepsolo_save_search_results_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    _ = runner.convert_to_parent_pixel_bounds()
+    out = runner.search_preds("map", ignore_case=True)
+    assert isinstance(out, dict)
+    runner.save_search_results_to_geojson(
+        f"{tmp_path}/search_results_centroid.geojson", centroid=True
+    )
+    assert os.path.exists(f"{tmp_path}/search_results_centroid.geojson")
+    gdf = gpd.read_file(f"{tmp_path}/search_results_centroid.geojson")
+    assert isinstance(gdf, gpd.GeoDataFrame)
+    assert set(gdf.columns) == set(
+        [
+            "image_id",
+            "patch_id",
+            "pixel_geometry",
+            "geometry",
+            "crs",
+            "text",
+            "score",
+            "polygon",
+        ]
+    )
+    assert "mapreader_text.png" in gdf["image_id"].values
+
+
 def test_deepsolo_save_search_results_errors(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     with pytest.raises(ValueError, match="No results to save"):

From c9b9529e419e98cf3ceaf445c8303ccc2c94e743 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 5 Dec 2024 11:38:25 +0000
Subject: [PATCH 12/15] update maptext and dptxext detr tests

---
 .../test_text_spotting/test_dptext_runner.py  | 226 ++++++++++++++++++
 .../test_text_spotting/test_maptext_runner.py |  76 ++++++
 2 files changed, 302 insertions(+)

diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
index 0c42fea0..71327ab5 100644
--- a/tests/test_text_spotting/test_dptext_runner.py
+++ b/tests/test_text_spotting/test_dptext_runner.py
@@ -5,11 +5,13 @@
 import pickle
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pytest
 from detectron2.engine import DefaultPredictor
 from detectron2.structures.instances import Instances
 from dptext_detr.config import get_cfg
+from shapely import Polygon
 
 from mapreader import DPTextDETRRunner
 from mapreader.load import MapImages
@@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path):
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
     maps.patchify_all(patch_size=800, path_save=tmp_path)
     maps.check_georeferencing()
+    assert maps.georeferenced
     parent_df, patch_df = maps.convert_images()
     return parent_df, patch_df
 
@@ -141,6 +144,80 @@ def test_dptext_init_tsv(init_dataframes, tmp_path):
     assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)
 
 
+def test_dptext_init_geojson(init_dataframes, tmp_path, mock_response):
+    parent_df, patch_df = init_dataframes
+    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    runner = DPTextDETRRunner(
+        f"{tmp_path}/patch_df.geojson",
+        parent_df=f"{tmp_path}/parent_df.geojson",
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    assert isinstance(runner, DPTextDETRRunner)
+    assert isinstance(runner.predictor, DefaultPredictor)
+    assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
+    out = runner.run_all()
+    assert isinstance(out, dict)
+    assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
+
+
+def test_dptext_init_errors(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df="fake_file.txt",
+            parent_df=parent_df,
+            cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=patch_df,
+            parent_df="fake_file.txt",
+            cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=np.array([1, 2, 3]),
+            parent_df=parent_df,
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        DPTextDETRRunner(
+            patch_df=patch_df,
+            parent_df=np.array([1, 2, 3]),
+        )
+
+
+def test_dptext_check_georeferencing(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df,
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["dlat", "dlon"]),
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = DPTextDETRRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["coordinates"]),
+        cfg_file=f"{DPTEXT_DETR_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml",
+    )
+    runner.check_georeferencing()
+    assert not runner.georeferenced
+
+
 def test_dptext_run_all(init_runner, mock_response):
     runner = init_runner
     # dict
@@ -238,3 +315,152 @@ def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
     assert set(gdf.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "score"]
     )
+
+
+def test_dptext_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
+    assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
+    gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
+    assert isinstance(gdf_centroid, gpd.GeoDataFrame)
+    assert set(gdf_centroid.columns) == set(
+        [
+            "image_id",
+            "patch_id",
+            "pixel_geometry",
+            "geometry",
+            "crs",
+            "score",
+            "polygon",
+        ]
+    )
+
+
+def test_dptext_load_geo_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.geo_predictions = {}
+    runner.load_geo_predictions(f"{tmp_path}/text.geojson")
+    assert len(runner.geo_predictions)
+    assert "mapreader_text.png" in runner.geo_predictions.keys()
+    assert isinstance(runner.geo_predictions["mapreader_text.png"], list)
+    assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction)
+
+
+def test_dptext_load_geo_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(ValueError, match="must be a path to a geojson file"):
+        runner.load_geo_predictions("fakefile.csv")
+
+
+def test_dptext_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_dptext_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    # patch
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    # parent
+    _ = runner.convert_to_parent_pixel_bounds()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    # geo
+    _ = runner.convert_to_coords()
+    runner.save_to_csv(tmp_path, centroid=True)
+    assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
+    assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
+
+
+def test_dptext_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
+    runner = runner_run_all
+    runner.patch_predictions = {}
+    with pytest.raises(ValueError, match="No patch predictions found"):
+        runner.save_to_csv(tmp_path)
+
+
+def test_dptext_load_patch_predictions(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.save_to_csv(tmp_path)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_dataframe(runner_run_all):
+    runner = runner_run_all
+    patch_preds = runner._dict_to_dataframe(runner.patch_predictions)
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)  # this will be empty after reloading
+    runner.load_patch_predictions(patch_preds)
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_centroid(runner_run_all, tmp_path):
+    runner = runner_run_all
+    _ = runner.convert_to_coords()
+    assert len(runner.geo_predictions)
+    runner.save_to_csv(tmp_path, centroid=True)
+    runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
+    assert len(runner.patch_predictions)
+    assert len(runner.geo_predictions) == 0
+    assert (
+        "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
+    )
+    assert isinstance(
+        runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
+        PatchPrediction,
+    )
+
+
+def test_dptext_load_patch_predictions_errors(runner_run_all, tmp_path):
+    runner = runner_run_all
+    with pytest.raises(
+        ValueError, match="must be a pandas DataFrame or path to a CSV file"
+    ):
+        runner.load_patch_predictions("fake_file.geojson")
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 6bd20b08..0a4ff5bc 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -5,11 +5,13 @@
 import pickle
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pytest
 from detectron2.engine import DefaultPredictor
 from detectron2.structures.instances import Instances
 from maptextpipeline.config import get_cfg
+from shapely import Polygon
 
 from mapreader import MapTextRunner
 from mapreader.load import MapImages
@@ -141,6 +143,80 @@ def test_maptext_init_tsv(init_dataframes, tmp_path):
     assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)
 
 
+def test_maptext_init_geojson(init_dataframes, tmp_path, mock_response):
+    parent_df, patch_df = init_dataframes
+    parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
+    patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
+    runner = MapTextRunner(
+        f"{tmp_path}/patch_df.geojson",
+        parent_df=f"{tmp_path}/parent_df.geojson",
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    assert isinstance(runner, MapTextRunner)
+    assert isinstance(runner.predictor, DefaultPredictor)
+    assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
+    out = runner.run_all()
+    assert isinstance(out, dict)
+    assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
+    assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
+    assert isinstance(
+        out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
+    )
+
+
+def test_maptext_init_errors(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df="fake_file.txt",
+            parent_df=parent_df,
+            cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=patch_df,
+            parent_df="fake_file.txt",
+            cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=np.array([1, 2, 3]),
+            parent_df=parent_df,
+        )
+    with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
+        MapTextRunner(
+            patch_df=patch_df,
+            parent_df=np.array([1, 2, 3]),
+        )
+
+
+def test_maptext_check_georeferencing(init_dataframes):
+    parent_df, patch_df = init_dataframes
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df,
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["dlat", "dlon"]),
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert runner.georeferenced
+
+    runner = MapTextRunner(
+        patch_df,
+        parent_df=parent_df.drop(columns=["coordinates"]),
+        cfg_file=f"{MAPTEXTPIPELINE_PATH}/configs/ViTAEv2_S/rumsey/test.yaml",
+    )
+    runner.check_georeferencing()
+    assert not runner.georeferenced
+
+
 def test_maptext_run_all(init_runner, mock_response):
     runner = init_runner
     # dict

From aaf8f7c8c86a3e68963f8533aa237adc6354a245 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Fri, 6 Dec 2024 10:29:14 +0000
Subject: [PATCH 13/15] update docs

---
 .../step-by-step-guide/6-spot-text.rst        | 73 ++++++++++++++++---
 1 file changed, 63 insertions(+), 10 deletions(-)

diff --git a/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst b/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
index bdca45a2..84c5730e 100644
--- a/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
+++ b/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
@@ -223,7 +223,7 @@ You can do this by setting the ``deduplicate`` argument and passing a ``min_ioa`
 
 This will help resolve any issues with predictions being cut-off at the edges of patches since the overlap should help find the full piece of text.
 
-Again, to view the predictions, you can use the ``show`` method.
+Again, to view the predictions, you can use the ``show_predictions`` method.
 You should pass a parent image ID as the ``image_id`` argument:
 
 .. code-block:: python
@@ -244,11 +244,6 @@ As above, use the ``border_color``, ``text_color`` and ``figsize`` arguments to
         figsize = (20, 20),
     )
 
-You can save your predictions to a csv file using the pandas ``to_csv`` method:
-
-.. code-block:: python
-
-    parent_preds_df.to_csv("text_preds.csv")
 
 Geo-reference
 -------------
@@ -282,7 +277,11 @@ Or, if your maps are taken from a tilelayer, you can specify the URL of the tile
 You can also pass in a dictionary of ``style_kwargs`` to customize the appearance of the map.
 Refer to the `geopandas explore documentation <https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.explore.html>`__ for more information on the available options.
 
-Again, you can save your georeferenced predictions to a csv file (as shown above), or, you can save them to a geojson file for loading into GIS software:
+
+Saving
+------
+
+You can save your georeferenced predictions to a geojson file for loading into GIS software using the ``save_to_geojson`` method:
 
 .. code-block:: python
 
@@ -290,6 +289,58 @@ Again, you can save your georeferenced predictions to a csv file (as shown above
 
 This will save the predictions to a geojson file, with each text prediction as a separate feature.
 
+By default, the geometry column will contain the polygon representing the bounding box of your text.
+If instead you would like to save just the centroid of this polygon, you can set the ``centroid`` argument:
+
+.. code-block:: python
+
+    my_runner.save_to_geojson("text_preds.geojson", centroid=True)
+
+This will save the centroid of the bounding box as the geometry column and create a "polygon" column containing the original polygon.
+
+At any point, you can also save your patch, parent and georeferenced predictions to CSV files using the ``save_to_csv`` method:
+
+.. code-block:: python
+
+    my_runner.save_to_csv("my_preds/")
+
+This will create a folder called "my_preds" and save the patch, parent and georeferenced predictions to CSV files within it.
+
+As above, you can use the ``centroid`` argument to save the centroid of the bounding box instead of the full polygon.
+
+
+Loading
+-------
+
+If you have saved your predictions and want to reload them into a runner, you use either of the ``load_geo_predictions`` or ``load_patch_predictions`` methods.
+
+.. note:: These methods will overwrite any existing predictions in the runner. So if you want to keep your existing predictions, you should save them to a file first!
+
+The ``load_geo_predictions`` method is used to load georeferenced predictions from a geojson file:
+
+.. code-block:: python
+
+    my_runner.load_geo_predictions("text_preds.geojson")
+
+Loading this fill will populate the patch, parent and georeferenced predictions in the runner.
+
+The ``load_patch_predictions`` method is used to load patch predictions from a CSV file or pandas DataFrame.
+To load a CSV file, you can use:
+
+.. code-block:: python
+
+    my_runner.load_patch_predictions("my_preds/patch_preds.csv")
+
+Or, to load a pandas DataFrame, you can use:
+
+.. code-block:: python
+
+    my_runner.load_patch_predictions(patch_preds_df)
+
+This will populate the patch and parent predictions in the runner but not the georeferenced predictions (incase you do not have georefencing information).
+If you do want to convert these to georeferenced predictions, you can use the ``convert_to_coords`` method as shown above.
+
+
 Search predictions
 ------------------
 
@@ -364,8 +415,10 @@ If your maps are georeferenced, you can also save your search results using the
 
     my_runner.save_search_results_to_geojson("search_results.geojson")
 
-This will save the search results to a geojson file, with each search result as a separate feature.
+This will save the search results to a geojson file, with each search result as a separate feature which can be loaded into GIS software for further analysis/exploration.
 
-These can then be loaded into GIS software for further analysis/exploration.
+If, however, your maps are not georeferenced, you will need to save the search results to a csv file using the pandas ``to_csv`` method:
+
+.. code-block:: python
 
-If your maps are not georeferenced, you can save the search results to a csv file using the pandas ``to_csv`` method (as shown above).
+    search_results_df.to_csv("search_results.csv")

From 5ca580ab36c3414e7a7587ca426deb7d7df5fe63 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 19 Dec 2024 09:22:38 +0000
Subject: [PATCH 14/15] Minor fixes/address review comments

---
 CHANGELOG.md                                  |   2 +-
 .../step-by-step-guide/6-spot-text.rst        |  24 ++--
 mapreader/spot_text/runner_base.py            | 114 +++++++++++++-----
 .../test_deepsolo_runner.py                   |  50 ++++----
 .../test_text_spotting/test_dptext_runner.py  |  34 +++---
 .../test_text_spotting/test_maptext_runner.py |  12 +-
 6 files changed, 142 insertions(+), 94 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a1a5d5b..cabd1d60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,7 @@ _Add new changes here_
 
 ## Added
 
-- Added ablity to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
+- Added ability to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
 - Added minimal dataclasses for text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
 
 ## [v1.6.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.6.1) (2024-11-18)
diff --git a/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst b/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
index 84c5730e..2b417d63 100644
--- a/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
+++ b/docs/source/using-mapreader/step-by-step-guide/6-spot-text.rst
@@ -248,7 +248,7 @@ As above, use the ``border_color``, ``text_color`` and ``figsize`` arguments to
 Geo-reference
 -------------
 
-If you maps are georeferenced in your ``parent_df``, you can also convert the pixel bounds to georeferenced coordinates using the ``convert_to_coords`` method:
+If you maps are georeferenced in your ``parent_df``, you can also convert the pixel coordinates to georeferenced coordinates using the ``convert_to_coords`` method:
 
 .. code-block:: python
 
@@ -281,11 +281,11 @@ Refer to the `geopandas explore documentation <https://geopandas.org/en/stable/d
 Saving
 ------
 
-You can save your georeferenced predictions to a geojson file for loading into GIS software using the ``save_to_geojson`` method:
+You can save your georeferenced predictions to a geojson file for loading into GIS software using the ``to_geojson`` method:
 
 .. code-block:: python
 
-    my_runner.save_to_geojson("text_preds.geojson")
+    my_runner.to_geojson("text_preds.geojson")
 
 This will save the predictions to a geojson file, with each text prediction as a separate feature.
 
@@ -294,19 +294,19 @@ If instead you would like to save just the centroid of this polygon, you can set
 
 .. code-block:: python
 
-    my_runner.save_to_geojson("text_preds.geojson", centroid=True)
+    my_runner.to_geojson("text_preds.geojson", centroid=True)
 
 This will save the centroid of the bounding box as the geometry column and create a "polygon" column containing the original polygon.
 
-At any point, you can also save your patch, parent and georeferenced predictions to CSV files using the ``save_to_csv`` method:
+At any point, you can also save your patch, parent and georeferenced predictions to CSV files using the ``to_csv`` method:
 
 .. code-block:: python
 
-    my_runner.save_to_csv("my_preds/")
+    my_runner.to_csv("my_preds/")
 
 This will create a folder called "my_preds" and save the patch, parent and georeferenced predictions to CSV files within it.
 
-As above, you can use the ``centroid`` argument to save the centroid of the bounding box instead of the full polygon.
+As above, you can use the ``centroid=True`` argument to save the centroid of the bounding box instead of the full polygon.
 
 
 Loading
@@ -322,7 +322,7 @@ The ``load_geo_predictions`` method is used to load georeferenced predictions fr
 
     my_runner.load_geo_predictions("text_preds.geojson")
 
-Loading this fill will populate the patch, parent and georeferenced predictions in the runner.
+Loading this will populate the patch, parent and georeferenced predictions in the runner.
 
 The ``load_patch_predictions`` method is used to load patch predictions from a CSV file or pandas DataFrame.
 To load a CSV file, you can use:
@@ -337,8 +337,8 @@ Or, to load a pandas DataFrame, you can use:
 
     my_runner.load_patch_predictions(patch_preds_df)
 
-This will populate the patch and parent predictions in the runner but not the georeferenced predictions (incase you do not have georefencing information).
-If you do want to convert these to georeferenced predictions, you can use the ``convert_to_coords`` method as shown above.
+This will populate the patch and parent predictions in the runner but not the georeferenced predictions (in case you do not have georefencing information).
+If you do want to convert your text predictions from pixel coordinates to geospatial coordinates, you can use the ``convert_to_coords`` method as shown above.
 
 
 Search predictions
@@ -409,11 +409,11 @@ You can also pass in a dictionary of ``style_kwargs`` to customize the appearanc
 Save search results
 ~~~~~~~~~~~~~~~~~~~
 
-If your maps are georeferenced, you can also save your search results using the ``save_search_results_to_geojson`` method:
+If your maps are georeferenced, you can also save your search results using the ``search_results_to_geojson`` method:
 
 .. code-block:: python
 
-    my_runner.save_search_results_to_geojson("search_results.geojson")
+    my_runner.search_results_to_geojson("search_results.geojson")
 
 This will save the search results to a geojson file, with each search result as a separate feature which can be loaded into GIS software for further analysis/exploration.
 
diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index ccc299cf..8207812f 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -480,6 +480,27 @@ def save_to_geojson(
         self,
         path_save: str | pathlib.Path,
         centroid: bool = False,
+    ) -> None:
+        """
+        Save the georeferenced predictions to a GeoJSON file.
+
+        Parameters
+        ----------
+        path_save : str | pathlib.Path, optional
+            Path to save the GeoJSON file
+        centroid : bool, optional
+            Whether to convert the polygons to centroids, by default False.
+            NOTE: The original polygon will still be saved as a separate column
+        """
+        print(
+            "[WARNING] This method is deprecated and will soon be removed. Use `to_geojson` instead."
+        )
+        self.to_geojson(path_save, centroid)
+
+    def to_geojson(
+        self,
+        path_save: str | pathlib.Path,
+        centroid: bool = False,
     ) -> None:
         """Save the georeferenced predictions to a GeoJSON file.
 
@@ -506,7 +527,7 @@ def save_to_geojson(
 
         geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
 
-    def save_to_csv(
+    def to_csv(
         self,
         path_save: str | pathlib.Path,
         centroid: bool = False,
@@ -858,7 +879,7 @@ def _post_process(self, image_id, ctrl_pnts, scores, recs, bd_pnts):
                 PatchPrediction(pixel_geometry=polygon, score=score, text=text)
             )
 
-    def search_preds(
+    def search_predictions(
         self, search_text: str, ignore_case: bool = True, return_dataframe: bool = False
     ) -> dict | pd.DataFrame:
         """Search the predictions for specific text. Accepts regex.
@@ -1044,36 +1065,63 @@ def explore_search_results(
             style_kwds=style_kwargs,
         )
 
-    def save_search_results_to_geojson(
-        self,
-        path_save: str | pathlib.Path,
-        centroid: bool = False,
-    ) -> None:
-        """Convert the search results to georeferenced search results and save them to a GeoJSON file.
-
-        Parameters
-        ----------
-        path_save : str | pathlib.Path
-            The path to save the GeoJSON file.
-        centroid : bool, optional
-            Whether to save the centroid of the polygons as the geometry column, by default False.
-            Note: The original polygon will stil be saved as a separate column.
-
-        Raises
-        ------
-        ValueError
-            If no search results are found.
-        """
-        if self.search_results == {}:
-            raise ValueError("[ERROR] No results to save!")
-
-        geo_search_results = self._get_geo_search_results()
-        geo_df = self._dict_to_dataframe(geo_search_results)
 
-        if centroid:
-            geo_df["polygon"] = geo_df["geometry"].to_wkt()
-            geo_df["geometry"] = (
-                geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
-            )
+def save_search_results_to_geojson(
+    self,
+    path_save: str | pathlib.Path,
+    centroid: bool = False,
+) -> None:
+    """Convert the search results to georeferenced search results and save them to a GeoJSON file.
+
+    Parameters
+    ----------
+    path_save : str | pathlib.Path
+        The path to save the GeoJSON file.
+    centroid : bool, optional
+        Whether to save the centroid of the polygons as the geometry column, by default False.
+        Note: The original polygon will stil be saved as a separate column.
+
+    Raises
+    ------
+    ValueError
+        If no search results are found.
+    """
+    print(
+        "[WARNING] This method is deprecated and will soon be removed. Use `search_results_to_geojson` instead."
+    )
+    self.search_results_to_geojson(path_save, centroid)
+
+
+def search_results_to_geojson(
+    self,
+    path_save: str | pathlib.Path,
+    centroid: bool = False,
+) -> None:
+    """Convert the search results to georeferenced search results and save them to a GeoJSON file.
+
+    Parameters
+    ----------
+    path_save : str | pathlib.Path
+        The path to save the GeoJSON file.
+    centroid : bool, optional
+        Whether to save the centroid of the polygons as the geometry column, by default False.
+        Note: The original polygon will stil be saved as a separate column.
+
+    Raises
+    ------
+    ValueError
+        If no search results are found.
+    """
+    if self.search_results == {}:
+        raise ValueError("[ERROR] No results to save!")
+
+    geo_search_results = self._get_geo_search_results()
+    geo_df = self._dict_to_dataframe(geo_search_results)
+
+    if centroid:
+        geo_df["polygon"] = geo_df["geometry"].to_wkt()
+        geo_df["geometry"] = (
+            geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+        )
 
-        geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
+    geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index b08f20a1..01a24c8a 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -44,7 +44,7 @@ def init_dataframes(sample_dir, tmp_path):
     """
     maps = MapImages(f"{sample_dir}/mapreader_text.png")
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
-    maps.patchify_all(patch_size=800, path_save=tmp_path)
+    maps.patchify_all(patch_size=800, path_=tmp_path)
     maps.check_georeferencing()
     assert maps.georeferenced
     parent_df, patch_df = maps.convert_images()
@@ -279,7 +279,7 @@ def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response):
 def test_deepsolo_deduplicate(sample_dir, tmp_path, mock_response):
     maps = MapImages(f"{sample_dir}/mapreader_text.png")
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
-    maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5)
+    maps.patchify_all(patch_size=800, path_=tmp_path, overlap=0.5)
     maps.check_georeferencing()
     parent_df, patch_df = maps.convert_images()
     runner = DeepSoloRunner(
@@ -313,10 +313,10 @@ def test_deepsolo_run_on_image(init_runner, mock_response):
     assert isinstance(out["instances"], Instances)
 
 
-def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_to_geojson(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.to_geojson(f"{tmp_path}/text.geojson")
     assert os.path.exists(f"{tmp_path}/text.geojson")
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
@@ -325,10 +325,10 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
     )
 
 
-def test_deepsolo_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
+    runner.to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
     assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
     gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
     assert isinstance(gdf_centroid, gpd.GeoDataFrame)
@@ -349,7 +349,7 @@ def test_deepsolo_save_to_geojson_centroid(runner_run_all, tmp_path, mock_respon
 def test_deepsolo_load_geo_predictions(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.to_geojson(f"{tmp_path}/text.geojson")
     runner.geo_predictions = {}
     runner.load_geo_predictions(f"{tmp_path}/text.geojson")
     assert len(runner.geo_predictions)
@@ -364,54 +364,54 @@ def test_deepsolo_load_geo_predictions_errors(runner_run_all, tmp_path):
         runner.load_geo_predictions("fakefile.csv")
 
 
-def test_deepsolo_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_to_csv_polygon(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     # patch
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     # parent
     _ = runner.convert_to_parent_pixel_bounds()
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     # geo
     _ = runner.convert_to_coords()
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
 
 
-def test_deepsolo_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_to_csv_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     # patch
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     # parent
     _ = runner.convert_to_parent_pixel_bounds()
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     # geo
     _ = runner.convert_to_coords()
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
 
 
-def test_deepsolo_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_to_csv_errors(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     runner.patch_predictions = {}
     with pytest.raises(ValueError, match="No patch predictions found"):
-        runner.save_to_csv(tmp_path)
+        runner.to_csv(tmp_path)
 
 
 def test_deepsolo_load_patch_predictions(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
     assert len(runner.geo_predictions)  # this will be empty after reloading
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
     assert len(runner.patch_predictions)
     assert len(runner.geo_predictions) == 0
@@ -451,7 +451,7 @@ def test_deepsolo_load_patch_predictions_centroid(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
     assert len(runner.geo_predictions)
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
     assert len(runner.patch_predictions)
     assert len(runner.geo_predictions) == 0
@@ -498,12 +498,12 @@ def test_deepsolo_search_preds_errors(runner_run_all, mock_response):
         runner.search_preds("maps", ignore_case=True)
 
 
-def test_deepsolo_save_search_results(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_search_results(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
     out = runner.search_preds("map", ignore_case=True)
     assert isinstance(out, dict)
-    runner.save_search_results_to_geojson(f"{tmp_path}/search_results.geojson")
+    runner.search_results_to_geojson(f"{tmp_path}/search_results.geojson")
     assert os.path.exists(f"{tmp_path}/search_results.geojson")
     gdf = gpd.read_file(f"{tmp_path}/search_results.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
@@ -513,12 +513,12 @@ def test_deepsolo_save_search_results(runner_run_all, tmp_path, mock_response):
     assert "mapreader_text.png" in gdf["image_id"].values
 
 
-def test_deepsolo_save_search_results_centroid(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_search_results_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
     out = runner.search_preds("map", ignore_case=True)
     assert isinstance(out, dict)
-    runner.save_search_results_to_geojson(
+    runner.search_results_to_geojson(
         f"{tmp_path}/search_results_centroid.geojson", centroid=True
     )
     assert os.path.exists(f"{tmp_path}/search_results_centroid.geojson")
@@ -539,7 +539,7 @@ def test_deepsolo_save_search_results_centroid(runner_run_all, tmp_path, mock_re
     assert "mapreader_text.png" in gdf["image_id"].values
 
 
-def test_deepsolo_save_search_results_errors(runner_run_all, tmp_path, mock_response):
+def test_deepsolo_search_results_errors(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     with pytest.raises(ValueError, match="No results to save"):
-        runner.save_search_results_to_geojson(f"{tmp_path}/test.geojson")
+        runner.search_results_to_geojson(f"{tmp_path}/test.geojson")
diff --git a/tests/test_text_spotting/test_dptext_runner.py b/tests/test_text_spotting/test_dptext_runner.py
index 71327ab5..ed1a231d 100644
--- a/tests/test_text_spotting/test_dptext_runner.py
+++ b/tests/test_text_spotting/test_dptext_runner.py
@@ -305,10 +305,10 @@ def test_dptext_run_on_image(init_runner, mock_response):
     assert isinstance(out["instances"], Instances)
 
 
-def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
+def test_dptext_to_geojson(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.to_geojson(f"{tmp_path}/text.geojson")
     assert os.path.exists(f"{tmp_path}/text.geojson")
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
@@ -317,10 +317,10 @@ def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
     )
 
 
-def test_dptext_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
+def test_dptext_to_geojson_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
+    runner.to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
     assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
     gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
     assert isinstance(gdf_centroid, gpd.GeoDataFrame)
@@ -340,7 +340,7 @@ def test_dptext_save_to_geojson_centroid(runner_run_all, tmp_path, mock_response
 def test_dptext_load_geo_predictions(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.to_geojson(f"{tmp_path}/text.geojson")
     runner.geo_predictions = {}
     runner.load_geo_predictions(f"{tmp_path}/text.geojson")
     assert len(runner.geo_predictions)
@@ -355,54 +355,54 @@ def test_dptext_load_geo_predictions_errors(runner_run_all, tmp_path):
         runner.load_geo_predictions("fakefile.csv")
 
 
-def test_dptext_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
+def test_dptext_to_csv_polygon(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     # patch
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     # parent
     _ = runner.convert_to_parent_pixel_bounds()
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     # geo
     _ = runner.convert_to_coords()
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
 
 
-def test_dptext_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
+def test_dptext_to_csv_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     # patch
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     # parent
     _ = runner.convert_to_parent_pixel_bounds()
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     # geo
     _ = runner.convert_to_coords()
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
     assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
     assert os.path.exists(f"{tmp_path}/geo_predictions.csv")
 
 
-def test_dptext_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
+def test_dptext_to_csv_errors(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     runner.patch_predictions = {}
     with pytest.raises(ValueError, match="No patch predictions found"):
-        runner.save_to_csv(tmp_path)
+        runner.to_csv(tmp_path)
 
 
 def test_dptext_load_patch_predictions(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
     assert len(runner.geo_predictions)  # this will be empty after reloading
-    runner.save_to_csv(tmp_path)
+    runner.to_csv(tmp_path)
     runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
     assert len(runner.patch_predictions)
     assert len(runner.geo_predictions) == 0
@@ -442,7 +442,7 @@ def test_dptext_load_patch_predictions_centroid(runner_run_all, tmp_path):
     runner = runner_run_all
     _ = runner.convert_to_coords()
     assert len(runner.geo_predictions)
-    runner.save_to_csv(tmp_path, centroid=True)
+    runner.to_csv(tmp_path, centroid=True)
     runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
     assert len(runner.patch_predictions)
     assert len(runner.geo_predictions) == 0
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 0a4ff5bc..2f21a162 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -306,10 +306,10 @@ def test_maptext_run_on_image(init_runner, mock_response):
     assert isinstance(out["instances"], Instances)
 
 
-def test_maptext_save_to_geojson(runner_run_all, tmp_path, mock_response):
+def test_maptext_to_geojson(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_coords()
-    runner.save_to_geojson(f"{tmp_path}/text.geojson")
+    runner.to_geojson(f"{tmp_path}/text.geojson")
     assert os.path.exists(f"{tmp_path}/text.geojson")
     gdf = gpd.read_file(f"{tmp_path}/text.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
@@ -341,12 +341,12 @@ def test_maptext_search_preds_errors(runner_run_all, mock_response):
         runner.search_preds("maps", ignore_case=True)
 
 
-def test_maptext_save_search_results(runner_run_all, tmp_path, mock_response):
+def test_maptext_search_results(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
     out = runner.search_preds("map", ignore_case=True)
     assert isinstance(out, dict)
-    runner.save_search_results_to_geojson(f"{tmp_path}/search_results.geojson")
+    runner.search_results_to_geojson(f"{tmp_path}/search_results.geojson")
     assert os.path.exists(f"{tmp_path}/search_results.geojson")
     gdf = gpd.read_file(f"{tmp_path}/search_results.geojson")
     assert isinstance(gdf, gpd.GeoDataFrame)
@@ -356,7 +356,7 @@ def test_maptext_save_search_results(runner_run_all, tmp_path, mock_response):
     assert "mapreader_text.png" in gdf["image_id"].values
 
 
-def test_maptext_save_search_results_errors(runner_run_all, tmp_path, mock_response):
+def test_maptext_search_results_errors(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     with pytest.raises(ValueError, match="No results to save"):
-        runner.save_search_results_to_geojson(f"{tmp_path}/test.geojson")
+        runner.search_results_to_geojson(f"{tmp_path}/test.geojson")

From 484b28333c7b499ffc894a95b5eb6e501c55e588 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 19 Dec 2024 11:54:07 +0000
Subject: [PATCH 15/15] fix typo

---
 mapreader/spot_text/runner_base.py            | 112 +++++++++---------
 .../test_deepsolo_runner.py                   |  22 ++--
 .../test_text_spotting/test_maptext_runner.py |  16 +--
 3 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 8207812f..3221a3df 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -1065,63 +1065,61 @@ def explore_search_results(
             style_kwds=style_kwargs,
         )
 
+    def save_search_results_to_geojson(
+        self,
+        path_save: str | pathlib.Path,
+        centroid: bool = False,
+    ) -> None:
+        """Convert the search results to georeferenced search results and save them to a GeoJSON file.
+
+        Parameters
+        ----------
+        path_save : str | pathlib.Path
+            The path to save the GeoJSON file.
+        centroid : bool, optional
+            Whether to save the centroid of the polygons as the geometry column, by default False.
+            Note: The original polygon will stil be saved as a separate column.
 
-def save_search_results_to_geojson(
-    self,
-    path_save: str | pathlib.Path,
-    centroid: bool = False,
-) -> None:
-    """Convert the search results to georeferenced search results and save them to a GeoJSON file.
-
-    Parameters
-    ----------
-    path_save : str | pathlib.Path
-        The path to save the GeoJSON file.
-    centroid : bool, optional
-        Whether to save the centroid of the polygons as the geometry column, by default False.
-        Note: The original polygon will stil be saved as a separate column.
-
-    Raises
-    ------
-    ValueError
-        If no search results are found.
-    """
-    print(
-        "[WARNING] This method is deprecated and will soon be removed. Use `search_results_to_geojson` instead."
-    )
-    self.search_results_to_geojson(path_save, centroid)
-
-
-def search_results_to_geojson(
-    self,
-    path_save: str | pathlib.Path,
-    centroid: bool = False,
-) -> None:
-    """Convert the search results to georeferenced search results and save them to a GeoJSON file.
-
-    Parameters
-    ----------
-    path_save : str | pathlib.Path
-        The path to save the GeoJSON file.
-    centroid : bool, optional
-        Whether to save the centroid of the polygons as the geometry column, by default False.
-        Note: The original polygon will stil be saved as a separate column.
-
-    Raises
-    ------
-    ValueError
-        If no search results are found.
-    """
-    if self.search_results == {}:
-        raise ValueError("[ERROR] No results to save!")
-
-    geo_search_results = self._get_geo_search_results()
-    geo_df = self._dict_to_dataframe(geo_search_results)
-
-    if centroid:
-        geo_df["polygon"] = geo_df["geometry"].to_wkt()
-        geo_df["geometry"] = (
-            geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+        Raises
+        ------
+        ValueError
+            If no search results are found.
+        """
+        print(
+            "[WARNING] This method is deprecated and will soon be removed. Use `search_results_to_geojson` instead."
         )
+        self.search_results_to_geojson(path_save, centroid)
+
+    def search_results_to_geojson(
+        self,
+        path_save: str | pathlib.Path,
+        centroid: bool = False,
+    ) -> None:
+        """Convert the search results to georeferenced search results and save them to a GeoJSON file.
 
-    geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
+        Parameters
+        ----------
+        path_save : str | pathlib.Path
+            The path to save the GeoJSON file.
+        centroid : bool, optional
+            Whether to save the centroid of the polygons as the geometry column, by default False.
+            Note: The original polygon will stil be saved as a separate column.
+
+        Raises
+        ------
+        ValueError
+            If no search results are found.
+        """
+        if self.search_results == {}:
+            raise ValueError("[ERROR] No results to save!")
+
+        geo_search_results = self._get_geo_search_results()
+        geo_df = self._dict_to_dataframe(geo_search_results)
+
+        if centroid:
+            geo_df["polygon"] = geo_df["geometry"].to_wkt()
+            geo_df["geometry"] = (
+                geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
+            )
+
+        geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py
index 01a24c8a..da0bd882 100644
--- a/tests/test_text_spotting/test_deepsolo_runner.py
+++ b/tests/test_text_spotting/test_deepsolo_runner.py
@@ -44,7 +44,7 @@ def init_dataframes(sample_dir, tmp_path):
     """
     maps = MapImages(f"{sample_dir}/mapreader_text.png")
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
-    maps.patchify_all(patch_size=800, path_=tmp_path)
+    maps.patchify_all(patch_size=800, path_save=tmp_path)
     maps.check_georeferencing()
     assert maps.georeferenced
     parent_df, patch_df = maps.convert_images()
@@ -279,7 +279,7 @@ def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response):
 def test_deepsolo_deduplicate(sample_dir, tmp_path, mock_response):
     maps = MapImages(f"{sample_dir}/mapreader_text.png")
     maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
-    maps.patchify_all(patch_size=800, path_=tmp_path, overlap=0.5)
+    maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5)
     maps.check_georeferencing()
     parent_df, patch_df = maps.convert_images()
     runner = DeepSoloRunner(
@@ -475,33 +475,35 @@ def test_deepsolo_load_patch_predictions_errors(runner_run_all, tmp_path):
         runner.load_patch_predictions("fake_file.geojson")
 
 
-def test_deepsolo_search_preds(runner_run_all, mock_response):
+def test_deepsolo_search_predictions(runner_run_all, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
-    out = runner.search_preds("map", ignore_case=True)
+    out = runner.search_predictions("map", ignore_case=True)
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     # test dataframe
-    out = runner.search_preds("map", ignore_case=True, return_dataframe=True)
+    out = runner.search_predictions("map", ignore_case=True, return_dataframe=True)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
-    out = runner.search_preds("somethingelse", ignore_case=True, return_dataframe=True)
+    out = runner.search_predictions(
+        "somethingelse", ignore_case=True, return_dataframe=True
+    )
     assert len(out) == 0
 
 
-def test_deepsolo_search_preds_errors(runner_run_all, mock_response):
+def test_deepsolo_search_predictions_errors(runner_run_all, mock_response):
     runner = runner_run_all
     with pytest.raises(ValueError, match="No parent predictions found"):
-        runner.search_preds("maps", ignore_case=True)
+        runner.search_predictions("maps", ignore_case=True)
 
 
 def test_deepsolo_search_results(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
-    out = runner.search_preds("map", ignore_case=True)
+    out = runner.search_predictions("map", ignore_case=True)
     assert isinstance(out, dict)
     runner.search_results_to_geojson(f"{tmp_path}/search_results.geojson")
     assert os.path.exists(f"{tmp_path}/search_results.geojson")
@@ -516,7 +518,7 @@ def test_deepsolo_search_results(runner_run_all, tmp_path, mock_response):
 def test_deepsolo_search_results_centroid(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
-    out = runner.search_preds("map", ignore_case=True)
+    out = runner.search_predictions("map", ignore_case=True)
     assert isinstance(out, dict)
     runner.search_results_to_geojson(
         f"{tmp_path}/search_results_centroid.geojson", centroid=True
diff --git a/tests/test_text_spotting/test_maptext_runner.py b/tests/test_text_spotting/test_maptext_runner.py
index 2f21a162..cbb9d8dc 100644
--- a/tests/test_text_spotting/test_maptext_runner.py
+++ b/tests/test_text_spotting/test_maptext_runner.py
@@ -318,33 +318,35 @@ def test_maptext_to_geojson(runner_run_all, tmp_path, mock_response):
     )
 
 
-def test_maptext_search_preds(runner_run_all, mock_response):
+def test_maptext_search_predictions(runner_run_all, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
-    out = runner.search_preds("map", ignore_case=True)
+    out = runner.search_predictions("map", ignore_case=True)
     assert isinstance(out, dict)
     assert "mapreader_text.png" in out.keys()
     # test dataframe
-    out = runner.search_preds("map", ignore_case=True, return_dataframe=True)
+    out = runner.search_predictions("map", ignore_case=True, return_dataframe=True)
     assert isinstance(out, pd.DataFrame)
     assert set(out.columns) == set(
         ["image_id", "patch_id", "pixel_geometry", "text", "score"]
     )
     assert "mapreader_text.png" in out["image_id"].values
-    out = runner.search_preds("somethingelse", ignore_case=True, return_dataframe=True)
+    out = runner.search_predictions(
+        "somethingelse", ignore_case=True, return_dataframe=True
+    )
     assert len(out) == 0
 
 
-def test_maptext_search_preds_errors(runner_run_all, mock_response):
+def test_maptext_search_predictions_errors(runner_run_all, mock_response):
     runner = runner_run_all
     with pytest.raises(ValueError, match="No parent predictions found"):
-        runner.search_preds("maps", ignore_case=True)
+        runner.search_predictions("maps", ignore_case=True)
 
 
 def test_maptext_search_results(runner_run_all, tmp_path, mock_response):
     runner = runner_run_all
     _ = runner.convert_to_parent_pixel_bounds()
-    out = runner.search_preds("map", ignore_case=True)
+    out = runner.search_predictions("map", ignore_case=True)
     assert isinstance(out, dict)
     runner.search_results_to_geojson(f"{tmp_path}/search_results.geojson")
     assert os.path.exists(f"{tmp_path}/search_results.geojson")