diff --git a/.gitignore b/.gitignore index 23ab4c2..007bfb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# maps and patches +worked_examples/geospatial/maps/ +worked_examples/geospatial/patches_*_*/ + # classification one inch worked_examples/geospatial/*classification_one_inch_maps/* !worked_examples/geospatial/*classification_one_inch_maps/Pipeline.ipynb @@ -5,7 +9,7 @@ worked_examples/geospatial/*classification_one_inch_maps/* # text spotting one inch worked_examples/geospatial/text_spotting_one_inch_maps/*/* -!worked_examples/geospatial/text_spotting_one_inch_maps/*/*/Pipeline.ipynb +!worked_examples/geospatial/text_spotting_one_inch_maps/*/Pipeline.ipynb # classification mnist worked_examples/non-geospatial/classification_mnist/* diff --git a/worked_examples/geospatial/classification_one_inch_maps/Pipeline.ipynb b/worked_examples/geospatial/classification_one_inch_maps/Pipeline.ipynb index 8e0ca90..2e3fe37 100644 --- a/worked_examples/geospatial/classification_one_inch_maps/Pipeline.ipynb +++ b/worked_examples/geospatial/classification_one_inch_maps/Pipeline.ipynb @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -109,7 +109,7 @@ "outputs": [], "source": [ "my_ts.get_grid_bb(zoom_level=14)\n", - "my_ts.download_map_sheets_by_queries(force=True)" + "my_ts.download_map_sheets_by_queries(path_save=\"../maps\", force=True)" ] }, { @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -145,7 +145,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_files = loader(\"./maps/*png\")" + "my_files = loader(\"../maps/*png\")" ] }, { @@ -173,7 +173,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_files.add_metadata(\"./maps/metadata.csv\")" + "my_files.add_metadata(\"../maps/metadata.csv\")" ] }, { @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -211,7 +211,7 @@ }, "outputs": [], "source": [ - "my_files.patchify_all(patch_size=100) # in pixels" + "my_files.patchify_all(patch_size=100, path_save=\"../patches_100_pixel\") # in pixels" ] }, { @@ -343,7 +343,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -359,9 +359,9 @@ "outputs": [], "source": [ "annotator = Annotator(\n", - " patch_paths=\"./patches_100_pixel/*png\",\n", - " parent_paths=\"./maps/*png\",\n", - " metadata_path=\"./maps/metadata.csv\",\n", + " patch_paths=\"../patches_100_pixel/*png\",\n", + " parent_paths=\"../maps/*png\",\n", + " metadata_path=\"../maps/metadata.csv\",\n", " annotations_dir=\"./annotations_one_inch\",\n", " labels=[\"no\", \"railspace\"],\n", " username=\"rw\",\n", @@ -399,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -416,7 +416,7 @@ "source": [ "annotated_images = AnnotationsLoader()\n", "\n", - "annotated_images.load(\"./annotations_one_inch/rail_space_#rw#.csv\", images_dir=\"./patches_100_pixel\")" + "annotated_images.load(\"./annotations_one_inch/rail_space_#rw#.csv\", images_dir=\"../patches_100_pixel\")" ] }, { @@ -517,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -532,7 +532,7 @@ }, "outputs": [], "source": [ - "my_classifier = ClassifierContainer(model =\"resnet18\",\n", + "my_classifier = ClassifierContainer(model=\"resnet18\",\n", " labels_map={0: 'No', 1: 'railspace'},\n", " dataloaders=dataloaders,\n", " )" @@ -577,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -588,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -597,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -693,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -709,16 +709,16 @@ "outputs": [], "source": [ "my_maps = load_patches(\n", - " \"./patches_100_pixel/*74488689*png\", parent_paths=\"./maps/map_74488689.png\"\n", + " \"../patches_100_pixel/*74488689*png\", parent_paths=\"../maps/map_74488689.png\"\n", ")\n", "\n", - "my_maps.add_metadata(\"./maps/metadata.csv\", ignore_mismatch=True)\n", + "my_maps.add_metadata(\"../maps/metadata.csv\", ignore_mismatch=True)\n", "my_maps.add_metadata(\"patch_df.csv\", tree_level=\"patch\", ignore_mismatch=True)" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -735,7 +735,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -744,7 +744,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -755,34 +755,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load dataset in to new Classifier (or use existing one if continuing with notebook)" + "## Load dataset in to Classifier" ] }, { "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "from mapreader import ClassifierContainer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "my_classifier = ClassifierContainer(\n", - " model=None,\n", - " labels_map=None,\n", - " load_path=\"./models_tutorial/checkpoint_10.pkl\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 59, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -851,7 +829,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -870,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ diff --git a/worked_examples/geospatial/context_classification_one_inch_maps/Pipeline.ipynb b/worked_examples/geospatial/context_classification_one_inch_maps/Pipeline.ipynb index 3e50d5f..205ba6f 100644 --- a/worked_examples/geospatial/context_classification_one_inch_maps/Pipeline.ipynb +++ b/worked_examples/geospatial/context_classification_one_inch_maps/Pipeline.ipynb @@ -109,7 +109,7 @@ "outputs": [], "source": [ "my_ts.get_grid_bb(zoom_level=14)\n", - "my_ts.download_map_sheets_by_queries(force=True)" + "my_ts.download_map_sheets_by_queries(path_save=\"../maps\", force=True)" ] }, { @@ -145,7 +145,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_files = loader(\"./maps/*png\")" + "my_files = loader(\"../maps/*png\")" ] }, { @@ -173,7 +173,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_files.add_metadata(\"./maps/metadata.csv\")" + "my_files.add_metadata(\"../maps/metadata.csv\")" ] }, { @@ -208,7 +208,7 @@ "metadata": {}, "outputs": [], "source": [ - "my_files.patchify_all(patch_size=100) # in pixels" + "my_files.patchify_all(patch_size=100, path_save=\"../patches_100_pixel\") # in pixels" ] }, { @@ -358,9 +358,9 @@ "outputs": [], "source": [ "annotator = Annotator(\n", - " patch_paths=\"./patches_100_pixel/*png\",\n", - " parent_paths=\"./maps/*png\",\n", - " metadata_path=\"./maps/metadata.csv\",\n", + " patch_paths=\"../patches_100_pixel/*png\",\n", + " parent_paths=\"../maps/*png\",\n", + " metadata_path=\"../maps/metadata.csv\",\n", " annotations_dir=\"./annotations_one_inch\",\n", " labels=[\"no\", \"railspace\"],\n", " username=\"rw\",\n", @@ -415,7 +415,7 @@ "source": [ "annotated_images = AnnotationsLoader()\n", "\n", - "annotated_images.load(\"./annotations_one_inch/rail_space_#rw#.csv\", images_dir=\"./patches_100_pixel\")" + "annotated_images.load(\"./annotations_one_inch/rail_space_#rw#.csv\", images_dir=\"../patches_100_pixel\")" ] }, { @@ -537,7 +537,7 @@ }, "outputs": [], "source": [ - "my_classifier = ClassifierContainer(model =\"resnet18\",\n", + "my_classifier = ClassifierContainer(model=\"resnet18\",\n", " labels_map={0: 'No', 1: 'railspace'},\n", " dataloaders=dataloaders,\n", " )" @@ -582,7 +582,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -593,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -602,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -692,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -706,16 +706,16 @@ "outputs": [], "source": [ "my_maps = load_patches(\n", - " \"./patches_100_pixel/*74488689*png\", parent_paths=\"./maps/map_74488689.png\"\n", + " \"../patches_100_pixel/*74488689*png\", parent_paths=\"../maps/map_74488689.png\"\n", ")\n", "\n", - "my_maps.add_metadata(\"./maps/metadata.csv\", ignore_mismatch=True)\n", + "my_maps.add_metadata(\"../maps/metadata.csv\", ignore_mismatch=True)\n", "my_maps.add_metadata(\"patch_df.csv\", tree_level=\"patch\", ignore_mismatch=True)" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -731,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -740,7 +740,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -758,7 +758,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ diff --git a/worked_examples/geospatial/text_spotting_one_inch_maps/deepsolo/Pipeline.ipynb b/worked_examples/geospatial/text_spotting_one_inch_maps/deepsolo/Pipeline.ipynb new file mode 100644 index 0000000..cc75018 --- /dev/null +++ b/worked_examples/geospatial/text_spotting_one_inch_maps/deepsolo/Pipeline.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download\n", + "\n", + "https://mapreader.readthedocs.io/en/latest/User-guide/Download.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import SheetDownloader\n", + "\n", + "my_ts = SheetDownloader(\n", + " metadata_path=\"../../NLS_metadata/metadata_OS_One_Inch_GB_WFS_light.json\",\n", + " download_url=\"https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.extract_wfs_id_nos()\n", + "my_ts.plot_all_metadata_on_map(map_extent=\"uk\", add_id=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.get_minmax_latlon()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.extract_published_dates()\n", + "my_ts.metadata[\"published_date\"].hist(bins=30, edgecolor=\"k\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Query maps using a list of lats/lons" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.query_map_sheets_by_coordinates((-4.33, 55.90))\n", + "my_ts.query_map_sheets_by_coordinates((-3.25, 51.93), append=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.print_found_queries()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.plot_queries_on_map(map_extent=\"uk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download map tiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.get_grid_bb(zoom_level=14)\n", + "my_ts.download_map_sheets_by_queries(path_save=\"../../maps\", force=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load\n", + "\n", + "https://mapreader.readthedocs.io/en/latest/User-guide/Load.html" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load in downloaded maps" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files = loader(\"../../maps/*png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# len() shows the total number of images currently read (or sliced, see below)\n", + "print(f\"Number of images: {len(my_files)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(my_files)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.add_metadata(\"../../maps/metadata.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "parent_list = my_files.list_parents()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Patchify maps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "my_files.patchify_all(patch_size=1000, path_save=\"../../patches_1000_pixel\") # in pixels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.show_sample(num_samples=6, tree_level=\"patch\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.show_patches(\n", + " parent_id=parent_list[0],\n", + " figsize=(15, 15)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.explore_patches(\n", + " parent_id=parent_list[0],\n", + " xyz_url=\"https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add coordinate increments for text spotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.add_coord_increments()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "parent_df, patch_df = my_files.convert_images(save=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "parent_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "patch_list = my_files.list_patches()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detect text\n", + "\n", + "Here, we show how to load an already trained/fine-tuned text detection model and run the model inference on your patches. DeepSolo is a text detection and recognition framework so it produces bounding boxes, OCR text outputs and scores." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the DeepSoloRunner" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import DeepSoloRunner" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# change these to your own paths, see the README for more details on how to get these\n", + "cfg_file = \"/Users/rwood/projects/DataCulture/ocr_test/detectron2_etc/DeepSolo/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml\"\n", + "weights_file = \"/Users/rwood/projects/DataCulture/ocr_test/detectron2_etc/ic15_res50_finetune_synth-tt-mlt-13-15-textocr.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "my_runner = DeepSoloRunner(\n", + " patch_df[:10], # only the first 10 to save time\n", + " parent_df,\n", + " cfg_file = cfg_file,\n", + " weights_file = weights_file,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run on all patches in the patch dataframe\n", + "\n", + "Note: we've only loaded the first 10 rows of our patch dataframe to save time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_predictions = my_runner.run_all(return_dataframe=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_predictions.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.show_predictions(\n", + " patch_list[0],\n", + " figsize=(15, 15),\n", + " border_color=\"r\",\n", + " text_color=\"b\",\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scale up to parent images" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "parent_predictions = my_runner.convert_to_parent_pixel_bounds(return_dataframe=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_predictions.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.show_predictions(\n", + " parent_list[0], \n", + " figsize=(15, 15),\n", + " border_color=\"r\",\n", + " text_color=\"b\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Convert pixel bounds to coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "geo_predictions = my_runner.convert_to_coords(return_dataframe=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can view these overlayed on the tilelayer as an interactive plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.explore_predictions(\n", + " parent_list[0],\n", + " xyz_url=\"https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Saving these outputs will give you a geojson file you can load into a GIS software." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.save_to_geojson(\"./example_outputs.geojson\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mr_worked_examples", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/worked_examples/geospatial/text_spotting_one_inch_maps/dptext-detr/Pipeline.ipynb b/worked_examples/geospatial/text_spotting_one_inch_maps/dptext-detr/Pipeline.ipynb new file mode 100644 index 0000000..8c0570c --- /dev/null +++ b/worked_examples/geospatial/text_spotting_one_inch_maps/dptext-detr/Pipeline.ipynb @@ -0,0 +1,475 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download\n", + "\n", + "https://mapreader.readthedocs.io/en/latest/User-guide/Download.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import SheetDownloader\n", + "\n", + "my_ts = SheetDownloader(\n", + " metadata_path=\"../../NLS_metadata/metadata_OS_One_Inch_GB_WFS_light.json\",\n", + " download_url=\"https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.extract_wfs_id_nos()\n", + "my_ts.plot_all_metadata_on_map(map_extent=\"uk\", add_id=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.get_minmax_latlon()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.extract_published_dates()\n", + "my_ts.metadata[\"published_date\"].hist(bins=30, edgecolor=\"k\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Query maps using a list of lats/lons" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.query_map_sheets_by_coordinates((-4.33, 55.90))\n", + "my_ts.query_map_sheets_by_coordinates((-3.25, 51.93), append=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.print_found_queries()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.plot_queries_on_map(map_extent=\"uk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download map tiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_ts.get_grid_bb(zoom_level=14)\n", + "my_ts.download_map_sheets_by_queries(path_save=\"../../maps\", force=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load\n", + "\n", + "https://mapreader.readthedocs.io/en/latest/User-guide/Load.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load in downloaded maps" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files = loader(\"../../maps/*png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# len() shows the total number of images currently read (or sliced, see below)\n", + "print(f\"Number of images: {len(my_files)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(my_files)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.add_metadata(\"../../maps/metadata.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "parent_list = my_files.list_parents()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Patchify maps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.patchify_all(patch_size=1000, path_save=\"../../patches_1000_pixel\") # in pixels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.show_sample(num_samples=6, tree_level=\"patch\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.show_patches(\n", + " parent_id=parent_list[0],\n", + " figsize=(15, 15)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.explore_patches(\n", + " parent_id=parent_list[0],\n", + " xyz_url=\"https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add coordinate increments for text spotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_files.add_coord_increments()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_df, patch_df = my_files.convert_images(save=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "patch_list = my_files.list_patches()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detect text\n", + "\n", + "Here, we show how to load an already trained/fine-tuned text detection model and run the model inference on your patches. DPText-DETR is a text detection framework so it produces bounding boxes and scores only (but not OCR text outputs)." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the DPText-DETR runner" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from mapreader import DPTextDETRRunner" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# change these to your own paths, see the README for more details on how to get these\n", + "cfg_file = \"/Users/rwood/projects/DataCulture/ocr_test/detectron2_etc/DPText-DETR/configs/DPText_DETR/ArT/R_50_poly.yaml\"\n", + "weights_file = \"/Users/rwood/projects/DataCulture/ocr_test/detectron2_etc/art_final.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "my_runner = DPTextDETRRunner(\n", + " patch_df[:10], # only the first 10 to save time\n", + " parent_df,\n", + " cfg_file = cfg_file,\n", + " weights_file = weights_file,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run on all patches in the patch dataframe\n", + "\n", + "Note: we've only loaded the first 10 rows of our patch dataframe to save time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_predictions = my_runner.run_all(return_dataframe=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "patch_predictions.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.show_predictions(\n", + " patch_list[0],\n", + " figsize=(15, 15),\n", + " border_color=\"r\",\n", + " text_color=\"b\",\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scale up to parent images" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "parent_predictions = my_runner.convert_to_parent_pixel_bounds(return_dataframe=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_predictions.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.show_predictions(\n", + " parent_list[0],\n", + " figsize=(15, 15),\n", + " border_color=\"r\",\n", + " text_color=\"b\",\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Convert pixel bounds to coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "geo_predictions = my_runner.convert_to_coords(return_dataframe=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Saving these outputs will give you a geojson file you can load into a GIS software." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "my_runner.save_to_geojson(\"./example_outputs.geojson\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mr_worked_examples", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}