Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add geo_to_h3 function #4

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions examples/EOI00002H.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,21 @@
]
}
},
"producedBy_samplingSite_location_h3_0": "8029fffffffffff",
"producedBy_samplingSite_location_h3_1": "8128fffffffffff",
"producedBy_samplingSite_location_h3_10": "8a28ed791baffff",
"producedBy_samplingSite_location_h3_11": "8b28ed791ba9fff",
"producedBy_samplingSite_location_h3_12": "8c28ed791ba97ff",
"producedBy_samplingSite_location_h3_13": "8d28ed791ba967f",
"producedBy_samplingSite_location_h3_14": "8e28ed791ba9647",
"producedBy_samplingSite_location_h3_2": "8228effffffffff",
"producedBy_samplingSite_location_h3_3": "8328edfffffffff",
"producedBy_samplingSite_location_h3_4": "8428ed7ffffffff",
"producedBy_samplingSite_location_h3_5": "8528ed7bfffffff",
"producedBy_samplingSite_location_h3_6": "8628ed797ffffff",
"producedBy_samplingSite_location_h3_7": "8728ed791ffffff",
"producedBy_samplingSite_location_h3_8": "8828ed791bfffff",
"producedBy_samplingSite_location_h3_9": "8928ed791bbffff",
"registrant": "Andra Bobbitt",
"relatedResource": [],
"sampleidentifier": "igsn:10.58052/EOI00002H",
Expand Down
15 changes: 15 additions & 0 deletions examples/IEDUT103B.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,21 @@
]
}
},
"producedBy_samplingSite_location_h3_0": "8067fffffffffff",
"producedBy_samplingSite_location_h3_1": "8145bffffffffff",
"producedBy_samplingSite_location_h3_10": "8a45b29884e7fff",
"producedBy_samplingSite_location_h3_11": "8b45b29884e1fff",
"producedBy_samplingSite_location_h3_12": "8c45b29884e15ff",
"producedBy_samplingSite_location_h3_13": "8d45b29884e147f",
"producedBy_samplingSite_location_h3_14": "8e45b29884e1457",
"producedBy_samplingSite_location_h3_2": "8245b7fffffffff",
"producedBy_samplingSite_location_h3_3": "8345b2fffffffff",
"producedBy_samplingSite_location_h3_4": "8445b29ffffffff",
"producedBy_samplingSite_location_h3_5": "8545b29bfffffff",
"producedBy_samplingSite_location_h3_6": "8645b298fffffff",
"producedBy_samplingSite_location_h3_7": "8745b2988ffffff",
"producedBy_samplingSite_location_h3_8": "8845b29885fffff",
"producedBy_samplingSite_location_h3_9": "8945b29884fffff",
"registrant": "Andrea Dutton",
"relatedResource": [],
"sampleidentifier": "igsn:10.58052/IEDUT103B",
Expand Down
15 changes: 15 additions & 0 deletions examples/IEEJR000M.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@
]
}
},
"producedBy_samplingSite_location_h3_0": "8029fffffffffff",
"producedBy_samplingSite_location_h3_1": "8129bffffffffff",
"producedBy_samplingSite_location_h3_10": "8a29aa8f66dffff",
"producedBy_samplingSite_location_h3_11": "8b29aa8f66dbfff",
"producedBy_samplingSite_location_h3_12": "8c29aa8f66dbbff",
"producedBy_samplingSite_location_h3_13": "8d29aa8f66dbaff",
"producedBy_samplingSite_location_h3_14": "8e29aa8f66dbaef",
"producedBy_samplingSite_location_h3_2": "8229affffffffff",
"producedBy_samplingSite_location_h3_3": "8329aafffffffff",
"producedBy_samplingSite_location_h3_4": "8429aa9ffffffff",
"producedBy_samplingSite_location_h3_5": "8529aa8ffffffff",
"producedBy_samplingSite_location_h3_6": "8629aa8f7ffffff",
"producedBy_samplingSite_location_h3_7": "8729aa8f2ffffff",
"producedBy_samplingSite_location_h3_8": "8829aa8f2dfffff",
"producedBy_samplingSite_location_h3_9": "8929aa8f2d7ffff",
"registrant": "Evan Ramos",
"relatedResource": [],
"sampleidentifier": "igsn:10.58052/IEEJR000M",
Expand Down
15 changes: 15 additions & 0 deletions examples/IEJEN0040.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@
]
}
},
"producedBy_samplingSite_location_h3_0": "800dfffffffffff",
"producedBy_samplingSite_location_h3_1": "810c7ffffffffff",
"producedBy_samplingSite_location_h3_10": "8a0c42d0e837fff",
"producedBy_samplingSite_location_h3_11": "8b0c42d0e832fff",
"producedBy_samplingSite_location_h3_12": "8c0c42d0e8327ff",
"producedBy_samplingSite_location_h3_13": "8d0c42d0e8326ff",
"producedBy_samplingSite_location_h3_14": "8e0c42d0e8326e7",
"producedBy_samplingSite_location_h3_2": "820c47fffffffff",
"producedBy_samplingSite_location_h3_3": "830c42fffffffff",
"producedBy_samplingSite_location_h3_4": "840c42dffffffff",
"producedBy_samplingSite_location_h3_5": "850c42d3fffffff",
"producedBy_samplingSite_location_h3_6": "860c42d0fffffff",
"producedBy_samplingSite_location_h3_7": "870c42d0effffff",
"producedBy_samplingSite_location_h3_8": "880c42d0e9fffff",
"producedBy_samplingSite_location_h3_9": "890c42d0e83ffff",
"registrant": "Jonathan Nichols",
"relatedResource": [],
"sampleidentifier": "igsn:10.58052/IEJEN0040",
Expand Down
15 changes: 15 additions & 0 deletions examples/IERVTL1I7.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@
"placeName": []
}
},
"producedBy_samplingSite_location_h3_0": "8027fffffffffff",
"producedBy_samplingSite_location_h3_1": "8126bffffffffff",
"producedBy_samplingSite_location_h3_10": "8a26b56364a7fff",
"producedBy_samplingSite_location_h3_11": "8b26b56364a3fff",
"producedBy_samplingSite_location_h3_12": "8c26b56364a31ff",
"producedBy_samplingSite_location_h3_13": "8d26b56364a30ff",
"producedBy_samplingSite_location_h3_14": "8e26b56364a30c7",
"producedBy_samplingSite_location_h3_2": "8226b7fffffffff",
"producedBy_samplingSite_location_h3_3": "8326b5fffffffff",
"producedBy_samplingSite_location_h3_4": "8426b57ffffffff",
"producedBy_samplingSite_location_h3_5": "8526b57bfffffff",
"producedBy_samplingSite_location_h3_6": "8626b5637ffffff",
"producedBy_samplingSite_location_h3_7": "8726b5636ffffff",
"producedBy_samplingSite_location_h3_8": "8826b56365fffff",
"producedBy_samplingSite_location_h3_9": "8926b56364bffff",
"registrant": "SLAC SFA",
"relatedResource": [],
"sampleidentifier": "igsn:10.58052/IERVTL1I7",
Expand Down
15 changes: 15 additions & 0 deletions examples/ODP02Q1IZ.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@
"placeName": []
}
},
"producedBy_samplingSite_location_h3_0": "80dffffffffffff",
"producedBy_samplingSite_location_h3_1": "81df3ffffffffff",
"producedBy_samplingSite_location_h3_10": "8adf35928cf7fff",
"producedBy_samplingSite_location_h3_11": "8bdf35928cf0fff",
"producedBy_samplingSite_location_h3_12": "8cdf35928cf05ff",
"producedBy_samplingSite_location_h3_13": "8ddf35928cf0cff",
"producedBy_samplingSite_location_h3_14": "8edf35928cf0cdf",
"producedBy_samplingSite_location_h3_2": "82df37fffffffff",
"producedBy_samplingSite_location_h3_3": "83e89bfffffffff",
"producedBy_samplingSite_location_h3_4": "84df359ffffffff",
"producedBy_samplingSite_location_h3_5": "85df3593fffffff",
"producedBy_samplingSite_location_h3_6": "86df3592fffffff",
"producedBy_samplingSite_location_h3_7": "87df35928ffffff",
"producedBy_samplingSite_location_h3_8": "88df35928dfffff",
"producedBy_samplingSite_location_h3_9": "89df35928cfffff",
"registrant": "Integrated Ocean Drilling Program (TAMU)",
"relatedResource": [],
"sampleidentifier": "igsn:10.60471/ODP02Q1IZ",
Expand Down
21 changes: 21 additions & 0 deletions isamples_sesar/sesar_transformer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing
from typing import Optional
import logging
import h3
from .sample import Sample

from .mapper import (
Expand All @@ -19,6 +20,8 @@ class Transformer():

FEET_PER_METER = 3.28084

DEFAULT_H3_RESOLUTION = 15

def __init__(self, sample: Sample):
self.sample = sample
self._material_prediction_results: Optional[list] = None
Expand Down Expand Up @@ -79,6 +82,10 @@ def transform(self) -> typing.Dict:
"authorizedBy": self.authorized_by(),
"compliesWith": self.complies_with(),
}
for index in range(0, 15):
h3_at_resolution = self.h3_function()(self.sample.latitude, self.sample.longitude, index)
field_name = f"producedBy_samplingSite_location_h3_{index}"
transformed_record[field_name] = h3_at_resolution
return transformed_record

def has_context_categories(self) -> typing.List[str]:
Expand Down Expand Up @@ -406,6 +413,9 @@ def curation_responsibility(self) -> list[dict]:
responsibility.append(metadata_publisher)
return responsibility

def h3_function(self) -> typing.Callable:
return geo_to_h3


class MaterialCategoryMetaMapper(AbstractCategoryMetaMapper):
_endsWithRockMapper = StringEndsWithCategoryMapper("Rock", "Rock")
Expand Down Expand Up @@ -627,3 +637,14 @@ def categories_mappers(cls) -> typing.List[AbstractCategoryMapper]:
cls._floodplainAquiferMapper,
cls._creekBankMapper,
]


def geo_to_h3(
latitude: typing.Optional[float],
longitude: typing.Optional[float],
resolution: int = Transformer.DEFAULT_H3_RESOLUTION
) -> typing.Optional[str]:
if latitude is not None and longitude is not None:
return h3.latlng_to_cell(latitude, longitude, resolution)
else:
return None
6 changes: 3 additions & 3 deletions scripts/sesar_things.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from isamples_sesar.sesar_adapter import SESARItem
from isamples_sesar.sqlmodel_database import SQLModelDAO as SESAR_SQLModelDAO, get_sample_rows
from isamples_sesar.sesar_transformer import Transformer
from isamples_sesar.sesar_transformer import Transformer, geo_to_h3
from isb_web.sqlmodel_database import SQLModelDAO as iSB_SQLModelDAO, all_thing_primary_keys, DatabaseBulkUpdater # type: ignore

BATCH_SIZE = 10000
Expand Down Expand Up @@ -34,9 +34,9 @@ def load_sesar_entries(sesar_db_session, isb_db_session, start_from=None):
num_newer += 1
thing_id = f"igsn:{sample.igsn}"
resolved_url = f"doi.org/{sample.igsn}"
# h3 = Transformer.geo_to_h3(current_record)
h3 = geo_to_h3(sample.latitude, sample.longitude)
t_created = sample.registration_date
bulk_updater.add_thing(current_record, thing_id, resolved_url, 200, "h3", t_created)
bulk_updater.add_thing(current_record, thing_id, resolved_url, 200, h3, t_created)
offset += BATCH_SIZE
bulk_updater.finish()
print(f"Num newer={num_newer}\n\n")
Expand Down
19 changes: 19 additions & 0 deletions tests/test_isamples_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_example(sesar_session: Session, igsn):
check_related_resource(transformed_test_data, expected_data)
check_authorized_by(transformed_test_data, expected_data)
check_complies_with(transformed_test_data, expected_data)
check_geo_to_h3(transformed_test_data, expected_data)


def check_id(test_data, expected_data):
Expand Down Expand Up @@ -188,3 +189,21 @@ def check_authorized_by(test_data, expected_data):

def check_complies_with(test_data, expected_data):
assert test_data["compliesWith"] == expected_data["compliesWith"]


def check_geo_to_h3(test_data, expected_data):
assert test_data["producedBy_samplingSite_location_h3_0"] == expected_data["producedBy_samplingSite_location_h3_0"]
assert test_data["producedBy_samplingSite_location_h3_1"] == expected_data["producedBy_samplingSite_location_h3_1"]
assert test_data["producedBy_samplingSite_location_h3_2"] == expected_data["producedBy_samplingSite_location_h3_2"]
assert test_data["producedBy_samplingSite_location_h3_3"] == expected_data["producedBy_samplingSite_location_h3_3"]
assert test_data["producedBy_samplingSite_location_h3_4"] == expected_data["producedBy_samplingSite_location_h3_4"]
assert test_data["producedBy_samplingSite_location_h3_5"] == expected_data["producedBy_samplingSite_location_h3_5"]
assert test_data["producedBy_samplingSite_location_h3_6"] == expected_data["producedBy_samplingSite_location_h3_6"]
assert test_data["producedBy_samplingSite_location_h3_7"] == expected_data["producedBy_samplingSite_location_h3_7"]
assert test_data["producedBy_samplingSite_location_h3_8"] == expected_data["producedBy_samplingSite_location_h3_8"]
assert test_data["producedBy_samplingSite_location_h3_9"] == expected_data["producedBy_samplingSite_location_h3_9"]
assert test_data["producedBy_samplingSite_location_h3_10"] == expected_data["producedBy_samplingSite_location_h3_10"]
assert test_data["producedBy_samplingSite_location_h3_11"] == expected_data["producedBy_samplingSite_location_h3_11"]
assert test_data["producedBy_samplingSite_location_h3_12"] == expected_data["producedBy_samplingSite_location_h3_12"]
assert test_data["producedBy_samplingSite_location_h3_13"] == expected_data["producedBy_samplingSite_location_h3_13"]
assert test_data["producedBy_samplingSite_location_h3_14"] == expected_data["producedBy_samplingSite_location_h3_14"]