From caad5c50772cc0ec10e5c3806f5844600c37e4a8 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 21 Jan 2025 04:24:36 +0000 Subject: [PATCH 1/6] NPI-3689 reworked functions for iau2000 download --- gnssanalysis/gn_download.py | 213 ++++++++++++++++++++++++++++-------- 1 file changed, 165 insertions(+), 48 deletions(-) diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index 5cec397..f5f39b3 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -28,7 +28,7 @@ import ftplib as _ftplib from ftplib import FTP_TLS as _FTP_TLS from pathlib import Path as _Path -from typing import Any, Generator, List, Optional, Tuple, Union +from typing import Any, Generator, List, Literal, Optional, Tuple, Union from urllib import request as _request from urllib.error import HTTPError as _HTTPError @@ -37,7 +37,7 @@ import pandas as _pd from boto3.s3.transfer import TransferConfig -from .gn_datetime import GPSDate, dt2gpswk, gpswkD2dt +from .gn_datetime import GPSDate, gpswkD2dt from .gn_utils import ensure_folders MB = 1024 * 1024 @@ -150,7 +150,16 @@ def request_metadata(url: str, max_retries: int = 5, metadata_header: str = "x-a return None -def download_url(url: str, destfile: Union[str, _os.PathLike], max_retries: int = 5) -> Optional[_Path]: +def download_url( + url: str, destfile: Union[str, _os.PathLike], max_retries: int = 5, raise_on_failure: bool = False +) -> Optional[_Path]: + """ + TODO finish docstring + :param bool raise_on_failure: Raise exceptions on errors, rather than returning None. + :raises Exception: On download failure. If original exception was an _HTTPError, raised Exception will wrap it. + :return Optional[_Path]: On success: local path to downloaded file. On errors: None, unless raise_on_failure is + set to True. + """ logging.info(f'requesting "{url}"') for retry in range(1, max_retries + 1): try: @@ -163,13 +172,19 @@ def download_url(url: str, destfile: Union[str, _os.PathLike], max_retries: int except _HTTPError as err: logging.error(f" HTTP Error {err.code}: {err.reason}") if err.code == 404: + if raise_on_failure: + raise Exception(f"Download failed for URL '{url}'", err) return None # File Not Found on the server so no point in retrying t_seconds = 2**retry logging.error(f"Retry No. {retry} in {t_seconds} seconds") _time.sleep(t_seconds) if retry >= max_retries: logging.error(f"Maximum number of retries reached: {max_retries}. File not downloaded") + if raise_on_failure: + raise Exception(f"Download failed for URL '{url}' after {max_retries} retries.", err) return None + if raise_on_failure: + raise Exception(f"Fell out of download block for URL '{url}' after {max_retries} retries.") logging.error("Maximum retries exceeded in download_url with no clear outcome, returning None") return None @@ -490,7 +505,8 @@ def attempt_ftps_download( :param str filename: Filename to assign for the downloaded file :param str type_of_file: How to label the file for STDOUT messages, defaults to None :param str if_file_present: What to do if file already present: "replace", "dont_replace", defaults to "prompt_user" - :return _Path or None: The pathlib.Path of the downloaded file if successful, otherwise returns None + :return _Path or None: The pathlib.Path of the downloaded file if successful, or None if the download was skipped + (based on if_file_present). """ "" logging.info(f"Attempting FTPS Download of {type_of_file} file - {filename} to {download_dir}") @@ -506,7 +522,12 @@ def attempt_ftps_download( def attempt_url_download( - download_dir: _Path, url: str, filename: str = None, type_of_file: str = None, if_file_present: str = "prompt_user" + download_dir: _Path, + url: str, + filename: str = None, + type_of_file: str = None, + if_file_present: str = "prompt_user", + raise_on_failure: bool = False, ) -> Union[_Path, None]: """Attempt download of file given URL (url) to chosen location (download_dir) @@ -515,6 +536,7 @@ def attempt_url_download( :param str filename: Filename to assign for the downloaded file, defaults to None :param str type_of_file: How to label the file for STDOUT messages, defaults to None :param str if_file_present: What to do if file already present: "replace", "dont_replace", defaults to "prompt_user" + TODO docstring for this param :return _Path or None: The pathlib.Path of the downloaded file if successful, otherwise returns None """ # If the download_filename is not provided, use the filename from the URL @@ -526,7 +548,7 @@ def attempt_url_download( filename=filename, download_dir=download_dir, if_file_present=if_file_present ) if download_filepath: - download_filepath = download_url(url, download_filepath) + download_filepath = download_url(url, download_filepath, raise_on_failure=raise_on_failure) return download_filepath @@ -729,13 +751,13 @@ def download_file_from_cddis( :param str if_file_present: What to do if file already present: "replace", "dont_replace", defaults to "prompt_user" :param str note_filetype: How to label the file for STDOUT messages, defaults to None :raises e: Raise any error that is run into by ftplib - :return _Path or None: The pathlib.Path of the downloaded file if successful, otherwise returns None + :return _Path or None: The pathlib.Path of the downloaded file (or decompressed output of it). Returns None if the + file already existed and was skipped. """ with ftp_tls(CDDIS_FTP) as ftps: ftps.cwd(ftp_folder) retries = 0 - download_done = False - while not download_done and retries <= max_retries: + while retries <= max_retries: try: download_filepath = attempt_ftps_download( download_dir=output_folder, @@ -744,17 +766,20 @@ def download_file_from_cddis( type_of_file=note_filetype, if_file_present=if_file_present, ) - if decompress and download_filepath: - download_filepath = decompress_file( - input_filepath=download_filepath, delete_after_decompression=True - ) - download_done = True - if download_filepath: - logging.info(f"Downloaded {download_filepath.name}") + if not download_filepath: # File already existed and was skipped + return None + # File was downloaded + logging.info(f"Downloaded {download_filepath.name}") + if decompress: # Does it need unpacking? + # Decompress, and return the path of the resultant file + logging.info(f"Decompressing downloaded file {download_filepath.name}") + return decompress_file(input_filepath=download_filepath, delete_after_decompression=True) + # File doesn't need unpacking, return downloaded path + return download_filepath except _ftplib.all_errors as e: retries += 1 if retries > max_retries: - logging.error(f"Failed to download {filename} and reached maximum retry count ({max_retries}).") + logging.error(f"Failed to download {filename} and reached maximum retry count ({max_retries}).", e) if (output_folder / filename).is_file(): (output_folder / filename).unlink() raise e @@ -762,7 +787,10 @@ def download_file_from_cddis( logging.debug(f"Received an error ({e}) while try to download {filename}, retrying({retries}).") # Add some backoff time (exponential random as it appears to be contention based?) _time.sleep(_random.uniform(0.0, 2.0**retries)) - return download_filepath + + # Fell out of loop and context manager without returning a result or raising an exception. + # Shouldn't be possible, raise exception if it somehow happens. + raise Exception("Failed to download file or raise exception. Some logic is broken.") def download_multiple_files_from_cddis(files: List[str], ftp_folder: str, output_folder: _Path) -> None: @@ -903,59 +931,148 @@ def download_product_from_cddis( return download_filepaths -def download_iau2000_file( - download_dir: _Path, start_epoch: _datetime, if_file_present: str = "prompt_user" +def download_iau2000_varient( + download_dir: _Path, + iau2000_file_varient: Literal["standard", "daily"], + if_file_present: str = "prompt_user", ) -> Union[_Path, None]: - """Download relevant IAU2000 file from CDDIS or IERS based on start_epoch of data + """ + Downloads IAU2000 file based on the varient requested ("daily" or "standard" file). + Added in approximately version 0.0.58 - :param _Path download_dir: Where to download files (local directory) - :param _datetime start_epoch: Start epoch of data in file + :param _Path download_dir: Where to download file (local directory). + :param Literal["standard", "daily"] iau2000_file_varient: name of file varient to download. Specifies whether to + download the recent "daily" file, or the historical "standard" file. :param str if_file_present: What to do if file already present: "replace", "dont_replace", defaults to "prompt_user" - :return _Path or None: The pathlib.Path of the downloaded file if successful, otherwise returns None + :raises Exception: On download failures. In some cases the underlying exception will be wrapped in the one raised. + :return Union[_Path, None]: _Path to the downloaded file, or None if not downloaded (based on if_file_present + setting). """ ensure_folders([download_dir]) - # Download most recent daily IAU2000 file if running for a session within the past week (data is within 3 months) - if _datetime.datetime.now() - start_epoch < _datetime.timedelta(weeks=1): + filetype = "EOP IAU2000" + + if iau2000_file_varient == "daily": + # Daily (recent) file spanning the last three months. Updated daily (as the name suggests). url_dir = "daily/" iau2000_filename = "finals2000A.daily" download_filename = "finals.daily.iau2000.txt" - logging.info("Attempting Download of finals2000A.daily file") - # Otherwise download the IAU2000 file dating back to 1992 - else: + # logging.info("Attempting Download of finals2000A.daily file") + elif iau2000_file_varient == "standard": + # Standard (historic) IAU2000 file dating back to 1992: url_dir = "standard/" iau2000_filename = "finals2000A.data" download_filename = "finals.data.iau2000.txt" - logging.info("Attempting Download of finals2000A.data file") - filetype = "EOP IAU2000" + # logging.info("Attempting Download of finals2000A.data file") + else: + raise ValueError(f"Unrecognised IAU2000 file varient requested: {iau2000_file_varient}") + # Can we skip this download, based on existing file and value of if_file_present? if not check_whether_to_download( filename=download_filename, download_dir=download_dir, if_file_present=if_file_present ): - return None + return None # No output path for this varient given we didn't download it. - # Attempt download from the CDDIS website first, if that fails try IERS - # Eugene: should try IERS first and then CDDIS? + # Default source IERS, then fall back to CDDIS (based on Eugene's comment) try: - logging.info("Downloading IAU2000 file from CDDIS") - download_filepath = download_file_from_cddis( - filename=iau2000_filename, - ftp_folder="products/iers/", - output_folder=download_dir, - decompress=False, - if_file_present=if_file_present, - note_filetype=filetype - ) - download_filepath = download_filepath.rename(download_dir / download_filename) - except: - logging.info("Failed CDDIS download - Downloading IAU2000 file from IERS") - download_filepath = attempt_url_download( + logging.info("Downloading IAU2000 file from IERS") + # We set raise_on_failure, so a return of None indicates the file did not need downloading (bit redundant). + return attempt_url_download( download_dir=download_dir, url="https://datacenter.iers.org/products/eop/rapid/" + url_dir + iau2000_filename, filename=download_filename, type_of_file=filetype, if_file_present=if_file_present, + raise_on_failure=True, ) - return download_filepath + except Exception as ex: + logging.error("Failed to download IAU2000 file from IERS.", ex) + try: + logging.info("Downloading IAU2000 file from CDDIS") + download_filepath_or_none = download_file_from_cddis( + filename=iau2000_filename, + ftp_folder="products/iers/", + output_folder=download_dir, + decompress=False, + if_file_present=if_file_present, + note_filetype=filetype, + # Already raises on failure by default + ) + if download_filepath_or_none is None: # File already existed and was not re-downloaded. + return None + # Download succeeded. Rename to standard filename, and return the path + return download_filepath_or_none.rename(download_dir / download_filename) + except Exception as ex: + logging.error("Failed to download IAU2000 file from CDDIS.", ex) + + +def get_iau2000_file_varients_for_dates( + start_epoch: Union[_datetime.datetime, None] = None, + end_epoch: Union[_datetime.datetime, None] = None, +) -> set[Literal["standard", "daily"]]: + """ + Works out which varient(s) of IAU2000 files are needed, based on the given start and or end epoch. + The returned varient(s) can be iterated over and passed to the download_iau2000_varient() function to fetch the + file(s). + Added in approximately version 0.0.58 + + Specifications for the file formats: + finals.data https://datacenter.iers.org/versionMetadata.php?filename=latestVersionMeta/10_FINALS.DATA_IAU2000_V2013_0110.txt + finals.daily https://datacenter.iers.org/versionMetadata.php?filename=latestVersionMeta/13_FINALS.DAILY_IAU2000_V2013_0113.txt + + :param Union[_datetime.datetime, None] start_epoch: Start of date range. Optional if end_epoch is provided. + :param Union[_datetime.datetime, None] end_epoch: End of date range. Optional if start_epoch is provided. + :return set[Literal["standard", "daily"]]: Set of IAU2000 file varient names needed for your date / date range + """ + if not (start_epoch or end_epoch): + raise ValueError("start_epoch, end_epoch or both, must be provided") + + needed_varients: set[Literal["standard", "daily"]] = set() + now = _datetime.datetime.now() + # TODO double check if they do it by calendar months, or days. If it's calendar months we need to be more conservative; 80 days perhaps. + three_months_ago = now - _datetime.timedelta(days=80) + one_week_ago = now - _datetime.timedelta(weeks=1) + eight_days_ago = now - _datetime.timedelta(days=8) + + # To be safe, until we confirm that data makes it into the 'standard' file on day 7 + one_week_ago = eight_days_ago + + # If the time range overlaps with less than a week ago, you need the daily file (the historical one won't have + # new enough data). + # Otherwise, the historical file will be more complete. + # TODO but do you actually benefit from the historical file if you're not going more than 3 months back? + + # Keeping it simple + # If your timerange includes dates as recent as last week, you need the daily file + # If your timerange includes dates older than three months ago, you need the standard file + # NOTE: you may need both! + + if (start_epoch and start_epoch >= one_week_ago) or (end_epoch and end_epoch >= one_week_ago): + needed_varients.add("daily") + + if (start_epoch and start_epoch < three_months_ago) or (end_epoch and end_epoch < three_months_ago): + needed_varients.add("standard") + + return needed_varients + + +# TODO DEPRECATED +def download_iau2000_file( + download_dir: _Path, + start_epoch: _datetime.datetime, + if_file_present: str = "prompt_user", +) -> Union[_Path, None]: + """ + Compatibility wrapper around new functions + DEPRECATED since approximately version 0.0.58 + """ + varients = get_iau2000_file_varients_for_dates(start_epoch=start_epoch) + if len(varients) != 1: + raise NotImplementedError( + "Legacy wrapper for IAU2000 file download failed. Exactly one file varient should be returned based on " + f"a single date ({start_epoch})." + ) + varient = varients.pop() + download_iau2000_varient(download_dir=download_dir, iau2000_file_varient=varient, if_file_present=if_file_present) def download_atx( From bfe6dced0103ae60d78d796e85017e22836d5cef Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 21 Jan 2025 07:54:10 +0000 Subject: [PATCH 2/6] NPI-3689 fix spelling, update variant selection logic, add preferred_variant option to use when dates can be accomodated by multiple variants --- gnssanalysis/gn_download.py | 84 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index f5f39b3..06aad6a 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -931,17 +931,17 @@ def download_product_from_cddis( return download_filepaths -def download_iau2000_varient( +def download_iau2000_variant( download_dir: _Path, - iau2000_file_varient: Literal["standard", "daily"], + iau2000_file_variant: Literal["standard", "daily"], if_file_present: str = "prompt_user", ) -> Union[_Path, None]: """ - Downloads IAU2000 file based on the varient requested ("daily" or "standard" file). + Downloads IAU2000 file based on the variant requested ("daily" or "standard" file). Added in approximately version 0.0.58 :param _Path download_dir: Where to download file (local directory). - :param Literal["standard", "daily"] iau2000_file_varient: name of file varient to download. Specifies whether to + :param Literal["standard", "daily"] iau2000_file_variant: name of file variant to download. Specifies whether to download the recent "daily" file, or the historical "standard" file. :param str if_file_present: What to do if file already present: "replace", "dont_replace", defaults to "prompt_user" :raises Exception: On download failures. In some cases the underlying exception will be wrapped in the one raised. @@ -951,26 +951,26 @@ def download_iau2000_varient( ensure_folders([download_dir]) filetype = "EOP IAU2000" - if iau2000_file_varient == "daily": + if iau2000_file_variant == "daily": # Daily (recent) file spanning the last three months. Updated daily (as the name suggests). url_dir = "daily/" iau2000_filename = "finals2000A.daily" download_filename = "finals.daily.iau2000.txt" # logging.info("Attempting Download of finals2000A.daily file") - elif iau2000_file_varient == "standard": + elif iau2000_file_variant == "standard": # Standard (historic) IAU2000 file dating back to 1992: url_dir = "standard/" iau2000_filename = "finals2000A.data" download_filename = "finals.data.iau2000.txt" # logging.info("Attempting Download of finals2000A.data file") else: - raise ValueError(f"Unrecognised IAU2000 file varient requested: {iau2000_file_varient}") + raise ValueError(f"Unrecognised IAU2000 file variant requested: {iau2000_file_variant}") # Can we skip this download, based on existing file and value of if_file_present? if not check_whether_to_download( filename=download_filename, download_dir=download_dir, if_file_present=if_file_present ): - return None # No output path for this varient given we didn't download it. + return None # No output path for this variant given we didn't download it. # Default source IERS, then fall back to CDDIS (based on Eugene's comment) try: @@ -1005,13 +1005,16 @@ def download_iau2000_varient( logging.error("Failed to download IAU2000 file from CDDIS.", ex) -def get_iau2000_file_varients_for_dates( +def get_iau2000_file_variants_for_dates( start_epoch: Union[_datetime.datetime, None] = None, end_epoch: Union[_datetime.datetime, None] = None, + preferred_variant: Literal["standard", "daily"] = "daily", ) -> set[Literal["standard", "daily"]]: """ - Works out which varient(s) of IAU2000 files are needed, based on the given start and or end epoch. - The returned varient(s) can be iterated over and passed to the download_iau2000_varient() function to fetch the + Works out which variant(s) of IAU2000 files are needed, based on the given start and or end epoch. If the epoch(s) + entered can be accomodated by both IAU2000 variants, the preferred_variant will be returned (by default this + is 'daily'). + The returned variant(s) can be iterated over and passed to the download_iau2000_variant() function to fetch the file(s). Added in approximately version 0.0.58 @@ -1021,38 +1024,51 @@ def get_iau2000_file_varients_for_dates( :param Union[_datetime.datetime, None] start_epoch: Start of date range. Optional if end_epoch is provided. :param Union[_datetime.datetime, None] end_epoch: End of date range. Optional if start_epoch is provided. - :return set[Literal["standard", "daily"]]: Set of IAU2000 file varient names needed for your date / date range + :param set[Literal["standard", "daily"] preferred_variant: For date ranges where either file variant would work, + which one should be selected. Defaults to the 'daily' file. + :return set[Literal["standard", "daily"]]: Set of IAU2000 file variant names needed for your date / date range """ if not (start_epoch or end_epoch): raise ValueError("start_epoch, end_epoch or both, must be provided") - needed_varients: set[Literal["standard", "daily"]] = set() + needed_variants: set[Literal["standard", "daily"]] = set() now = _datetime.datetime.now() - # TODO double check if they do it by calendar months, or days. If it's calendar months we need to be more conservative; 80 days perhaps. - three_months_ago = now - _datetime.timedelta(days=80) - one_week_ago = now - _datetime.timedelta(weeks=1) - eight_days_ago = now - _datetime.timedelta(days=8) - # To be safe, until we confirm that data makes it into the 'standard' file on day 7 - one_week_ago = eight_days_ago + date_89_days_ago = now - _datetime.timedelta(days=89) + date_8_days_ago = now - _datetime.timedelta(days=8) + # Notes on file variant choice logic: # If the time range overlaps with less than a week ago, you need the daily file (the historical one won't have # new enough data). # Otherwise, the historical file will be more complete. - # TODO but do you actually benefit from the historical file if you're not going more than 3 months back? - # Keeping it simple - # If your timerange includes dates as recent as last week, you need the daily file - # If your timerange includes dates older than three months ago, you need the standard file - # NOTE: you may need both! + # Fundamentally: + # If your timerange includes dates as recent as last week, you need the daily file. + # If your timerange includes dates older than three months ago, you need the standard (historic) file. + # Note that you may need both. - if (start_epoch and start_epoch >= one_week_ago) or (end_epoch and end_epoch >= one_week_ago): - needed_varients.add("daily") + # If start or end epoch within (>=) the date 8 days ago, standard file may not have the data yet, use daily + if (start_epoch and start_epoch >= date_8_days_ago) or (end_epoch and end_epoch >= date_8_days_ago): + needed_variants.add("daily") + # If start or end epoch older (<) the date three months ago, daily file won't have data, use standard file + if (start_epoch and start_epoch <= date_89_days_ago) or (end_epoch and end_epoch <= date_89_days_ago): + needed_variants.add("standard") - if (start_epoch and start_epoch < three_months_ago) or (end_epoch and end_epoch < three_months_ago): - needed_varients.add("standard") + # Default to preferred variant if provided dates weren't within a range that *required* use of one file or the other + if len(needed_variants) == 0: + needed_variants.add(preferred_variant) - return needed_varients + # Is there ambiguity in the date range (start or end not specified)? + + # Start of range was unspecified, we have to assume it may be older than 3 months + if not start_epoch: + needed_variants.add("standard") + + # End of range was unspecified, we have to assume it may be newer than a week + if not end_epoch: + needed_variants.add("daily") + + return needed_variants # TODO DEPRECATED @@ -1065,14 +1081,14 @@ def download_iau2000_file( Compatibility wrapper around new functions DEPRECATED since approximately version 0.0.58 """ - varients = get_iau2000_file_varients_for_dates(start_epoch=start_epoch) - if len(varients) != 1: + variants = get_iau2000_file_variants_for_dates(start_epoch=start_epoch) + if len(variants) != 1: raise NotImplementedError( - "Legacy wrapper for IAU2000 file download failed. Exactly one file varient should be returned based on " + "Legacy wrapper for IAU2000 file download failed. Exactly one file variant should be returned based on " f"a single date ({start_epoch})." ) - varient = varients.pop() - download_iau2000_varient(download_dir=download_dir, iau2000_file_varient=varient, if_file_present=if_file_present) + variant = variants.pop() + download_iau2000_variant(download_dir=download_dir, iau2000_file_variant=variant, if_file_present=if_file_present) def download_atx( From 531cf09defe2e2df9759997ed1696f6e664f9627 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Thu, 23 Jan 2025 05:43:15 +0000 Subject: [PATCH 3/6] NPI-3689 add initial unit tests for IAU2000 variant selection logic --- tests/test_download.py | 162 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 tests/test_download.py diff --git a/tests/test_download.py b/tests/test_download.py new file mode 100644 index 0000000..c094fa7 --- /dev/null +++ b/tests/test_download.py @@ -0,0 +1,162 @@ +# Tests for gn_download.py + +import datetime +from pathlib import Path +from pyfakefs.fake_filesystem_unittest import TestCase +from datetime import datetime, timedelta + +from gnssanalysis.gn_download import get_iau2000_file_variants_for_dates, download_iau2000_variant + + +class TestIAU2000Selection(TestCase): + + def test_iau2000_variant_selection(self) -> None: + + # variants = Literal["standard", "daily"] + now = datetime.now() + + date_100_days_ago: datetime = now - timedelta(days=100) # Must use standard file + date_90_days_ago: datetime = now - timedelta(days=90) # Must use standard file (boundary) + date_89_days_ago: datetime = now - timedelta(days=89) # Must use standard file (safety boundary) + date_8_weeks_ago: datetime = now - timedelta(weeks=8) # Can use either file (preference dictates) + date_2_weeks_ago: datetime = now - timedelta(weeks=2) # Can use either file (preference dictates) + date_8_days_ago: datetime = now - timedelta(days=8) # Must use daily file (safety boundary) + date_7_days_ago: datetime = now - timedelta(days=7) # Must use daily file (boundary) + date_3_days_ago: datetime = now - timedelta(days=3) # Must use daily file + date_1_day_ago: datetime = now - timedelta(days=1) # Probably ok, must use daily file + date_today: datetime = now # Edge case, probably should raise an exception + date_1_day_in_future: datetime = now + timedelta(days=1) # Should raise exception + + # --- Tests with both start and end date specified --- + + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_100_days_ago, end_epoch=date_90_days_ago), + set(["standard"]), + "Start and end both dated older than 90 days, should use standard file", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_100_days_ago, end_epoch=date_89_days_ago), + set(["standard"]), + "Range with border, should choose conservative option (89 day boundary should choose standard file)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_89_days_ago, end_epoch=date_89_days_ago), + set(["standard"]), + "0-length range on 89 days should should choose standard file", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_89_days_ago, end_epoch=date_3_days_ago), + set(["standard", "daily"]), + "Range on 89 days should conservatively include standard file (even if other date causes selection of daily file as well)", + ) + + # Option 1: no preference + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_100_days_ago, end_epoch=date_7_days_ago), + set(["standard", "daily"]), + "Range touching 7 days ago should conservatively include daily file, regardless of preference (default / not stated) and other date in range", + ) + + # Option 2: prefer standard + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_100_days_ago, end_epoch=date_7_days_ago, preferred_variant="standard" + ), + set(["standard", "daily"]), + "Range touching 7 days ago should conservatively include daily file, regardless of preference (standard) and other date in range", + ) + + # Option 3: prefer daily (default at time of writing) + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_100_days_ago, end_epoch=date_7_days_ago, preferred_variant="daily" + ), + set(["standard", "daily"]), + "Range touching 7 days ago should conservatively include daily file, regardless of preference (daily) and other date in range", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates(start_epoch=date_8_days_ago, end_epoch=date_3_days_ago), + set(["daily"]), + "Recent range should always pick daily file when on boundary, regardless of preference (not specified)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago, end_epoch=date_3_days_ago, preferred_variant="standard" + ), + set(["daily"]), + "Recent range should always pick daily file when on boundary, regardless of preference (standard)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago, end_epoch=date_3_days_ago, preferred_variant="daily" + ), + set(["daily"]), + "Recent range should always pick daily file when on boundary, regardless of preference (daily)", + ) + + # --- Tests leveraging variant preference overrride --- + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_weeks_ago, end_epoch=date_2_weeks_ago, preferred_variant="daily" + ), + set(["daily"]), + "Date ranges which are version agnostic should fall back on the preference specified (daily)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_weeks_ago, end_epoch=date_2_weeks_ago, preferred_variant="standard" + ), + set(["standard"]), + "Date ranges which are version agnostic should fall back on the preference specified (standard)", + ) + + # --- Tests focussing on boundary dates (not all of them) --- + self.assertTrue( + "daily" + in get_iau2000_file_variants_for_dates( + start_epoch=date_2_weeks_ago, end_epoch=date_1_day_ago, preferred_variant="standard" + ), + "Date range ending at yesterday should be allowable, but must utilise daily file", + ) + + # --- Tests with start or end date only --- + + self.assertTrue( + "daily" in get_iau2000_file_variants_for_dates(start_epoch=date_8_weeks_ago, preferred_variant="standard"), + "Open ended range should conservatively assume the range may extend past a boundary", + ) + + self.assertTrue( + "standard" in get_iau2000_file_variants_for_dates(end_epoch=date_3_days_ago, preferred_variant="daily"), + "Open ended range should conservatively assume the range may extend past a boundary", + ) + + + # --- Tests for invalid values --- + # Invalid argument + with self.assertRaises(Exception): + get_iau2000_file_variants_for_dates() + # Start or end date must be provided. + + # Invalid dates + # Dates must be before today / can't be in the future. We allow about a day for the data source to update. + with self.assertRaises(ValueError): + get_iau2000_file_variants_for_dates(start_epoch=now) + + with self.assertRaises(ValueError): + get_iau2000_file_variants_for_dates(end_epoch=now) + + with self.assertRaises(ValueError): + get_iau2000_file_variants_for_dates(start_epoch=date_1_day_in_future) + + with self.assertRaises(ValueError): + get_iau2000_file_variants_for_dates(end_epoch=date_1_day_in_future) + From 0f97f59716812fe7530271eff8bb2c599c1cee9a Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Thu, 23 Jan 2025 05:55:38 +0000 Subject: [PATCH 4/6] NPI-3689 small upates to iau2000 selection logic --- gnssanalysis/gn_download.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index 06aad6a..9766ec5 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -1011,9 +1011,9 @@ def get_iau2000_file_variants_for_dates( preferred_variant: Literal["standard", "daily"] = "daily", ) -> set[Literal["standard", "daily"]]: """ - Works out which variant(s) of IAU2000 files are needed, based on the given start and or end epoch. If the epoch(s) - entered can be accomodated by both IAU2000 variants, the preferred_variant will be returned (by default this - is 'daily'). + Works out which variant(s) of IAU2000 files are needed, based on the given start and or end epoch. If no part of + the date range entered *necessitates* using a specific IAU2000 variant, the preferred_variant will be returned + as a tie-breaker (by default this is 'daily'). The returned variant(s) can be iterated over and passed to the download_iau2000_variant() function to fetch the file(s). Added in approximately version 0.0.58 @@ -1024,15 +1024,21 @@ def get_iau2000_file_variants_for_dates( :param Union[_datetime.datetime, None] start_epoch: Start of date range. Optional if end_epoch is provided. :param Union[_datetime.datetime, None] end_epoch: End of date range. Optional if start_epoch is provided. - :param set[Literal["standard", "daily"] preferred_variant: For date ranges where either file variant would work, - which one should be selected. Defaults to the 'daily' file. + :param set[Literal["standard", "daily"] preferred_variant: For date ranges that don't require us to use a specific + variant, which variant should we fall back on as a tie-breaker. Defaults to the 'daily' file. :return set[Literal["standard", "daily"]]: Set of IAU2000 file variant names needed for your date / date range """ if not (start_epoch or end_epoch): raise ValueError("start_epoch, end_epoch or both, must be provided") needed_variants: set[Literal["standard", "daily"]] = set() - now = _datetime.datetime.now() + date_24_hours_ago = now - timedelta(days=1) + + # Dates can't be within the last 24 hours, or in the future + if (start_epoch and start_epoch > date_24_hours_ago) or (end_epoch and end_epoch > date_24_hours_ago): + raise ValueError( + "All dates provided must be 24h old or older. We can't assume data newer than a day old will be present" + ) date_89_days_ago = now - _datetime.timedelta(days=89) date_8_days_ago = now - _datetime.timedelta(days=8) From 852975c1fc56c2fc7c133fe68ed72d42d4c8f904 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:40:18 +0000 Subject: [PATCH 5/6] NPI-3689 small fixes for changes incorrectly pulled across from further work --- gnssanalysis/gn_download.py | 4 +++- tests/test_download.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index 9766ec5..17794f1 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -1032,7 +1032,9 @@ def get_iau2000_file_variants_for_dates( raise ValueError("start_epoch, end_epoch or both, must be provided") needed_variants: set[Literal["standard", "daily"]] = set() - date_24_hours_ago = now - timedelta(days=1) + now = _datetime.datetime.now() + + date_24_hours_ago = now - _datetime.timedelta(days=1) # Dates can't be within the last 24 hours, or in the future if (start_epoch and start_epoch > date_24_hours_ago) or (end_epoch and end_epoch > date_24_hours_ago): diff --git a/tests/test_download.py b/tests/test_download.py index c094fa7..df052f5 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -139,7 +139,6 @@ def test_iau2000_variant_selection(self) -> None: "Open ended range should conservatively assume the range may extend past a boundary", ) - # --- Tests for invalid values --- # Invalid argument with self.assertRaises(Exception): From 93e687911a2cd5b09e3866c231149c99829628f9 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 3 Feb 2025 06:53:02 +0000 Subject: [PATCH 6/6] NPI-3689 add missing logic to IAU2000 variant picker to support legacy mode, and update tests for both this and edge cases relating to sub-ms time differences during tests --- gnssanalysis/gn_download.py | 24 ++++++++++- tests/test_download.py | 79 +++++++++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index 17794f1..0483dcb 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -1009,6 +1009,7 @@ def get_iau2000_file_variants_for_dates( start_epoch: Union[_datetime.datetime, None] = None, end_epoch: Union[_datetime.datetime, None] = None, preferred_variant: Literal["standard", "daily"] = "daily", + legacy_mode: bool = False, # TODO remove once wrapper function (download_iau2000_file()) is removed. ) -> set[Literal["standard", "daily"]]: """ Works out which variant(s) of IAU2000 files are needed, based on the given start and or end epoch. If no part of @@ -1026,11 +1027,24 @@ def get_iau2000_file_variants_for_dates( :param Union[_datetime.datetime, None] end_epoch: End of date range. Optional if start_epoch is provided. :param set[Literal["standard", "daily"] preferred_variant: For date ranges that don't require us to use a specific variant, which variant should we fall back on as a tie-breaker. Defaults to the 'daily' file. + :param bool legacy_mode: (Deprecated) for backwards compatibility only: limit to only the variant we definately + need, when we have an unbounded range (missing start or end epoch). By default this is not enabled. Can only be + used with a start_epoch OR end_epoch, not both. :return set[Literal["standard", "daily"]]: Set of IAU2000 file variant names needed for your date / date range + :raises ValueError: If invalid combination of parameters is given. Note that a start and or end epoch *must* be + given. In legacy mode, only a start OR end epoch can be specified, and preferred_variant *must* be set + to 'standard'. """ if not (start_epoch or end_epoch): raise ValueError("start_epoch, end_epoch or both, must be provided") + # Validate legacy_mode related restrictions + if legacy_mode: + if preferred_variant != "standard": # This is what has historically been used + raise ValueError("In legacy mode, preferred_variant must be set to 'standard'") + if start_epoch and end_epoch: + raise ValueError("In legacy_mode, only a start_epoch OR end_epoch can be specified (not both)") + needed_variants: set[Literal["standard", "daily"]] = set() now = _datetime.datetime.now() @@ -1068,6 +1082,11 @@ def get_iau2000_file_variants_for_dates( # Is there ambiguity in the date range (start or end not specified)? + # In legacy mode, don't proceed to consider this ambiguity. This should result in a single variant only, as we + # have already enforced that only a start OR end epoch is used in legacy mode. + if legacy_mode: + return needed_variants + # Start of range was unspecified, we have to assume it may be older than 3 months if not start_epoch: needed_variants.add("standard") @@ -1089,7 +1108,10 @@ def download_iau2000_file( Compatibility wrapper around new functions DEPRECATED since approximately version 0.0.58 """ - variants = get_iau2000_file_variants_for_dates(start_epoch=start_epoch) + # Run variant picker with legacy configuration options + variants = get_iau2000_file_variants_for_dates( + start_epoch=start_epoch, legacy_mode=True, preferred_variant="standard" + ) if len(variants) != 1: raise NotImplementedError( "Legacy wrapper for IAU2000 file download failed. Exactly one file variant should be returned based on " diff --git a/tests/test_download.py b/tests/test_download.py index df052f5..122cf5b 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -66,7 +66,7 @@ def test_iau2000_variant_selection(self) -> None: start_epoch=date_100_days_ago, end_epoch=date_7_days_ago, preferred_variant="standard" ), set(["standard", "daily"]), - "Range touching 7 days ago should conservatively include daily file, regardless of preference (standard) and other date in range", + "Range touching 7 days ago should conservatively include daily file, regardless of preference (case: standard) and other date in range", ) # Option 3: prefer daily (default at time of writing) @@ -75,29 +75,59 @@ def test_iau2000_variant_selection(self) -> None: start_epoch=date_100_days_ago, end_epoch=date_7_days_ago, preferred_variant="daily" ), set(["standard", "daily"]), - "Range touching 7 days ago should conservatively include daily file, regardless of preference (daily) and other date in range", + "Range touching 7 days ago should conservatively include daily file, regardless of preference (case: daily) and other date in range", ) self.assertEqual( - get_iau2000_file_variants_for_dates(start_epoch=date_8_days_ago, end_epoch=date_3_days_ago), + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago - timedelta(seconds=1), end_epoch=date_3_days_ago + ), + set(["daily"]), + "Recent range should always pick daily file when around (case: just over) boundary, regardless of preference (not specified)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago + timedelta(seconds=1), end_epoch=date_3_days_ago + ), + set(["daily"]), + "Recent range should always pick daily file when around (case: just under) boundary, regardless of preference (not specified)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago - timedelta(seconds=1), + end_epoch=date_3_days_ago, + preferred_variant="standard", + ), + set(["daily"]), + "Recent range should always pick daily file when around (case: just over) boundary, regardless of preference (case: standard)", + ) + + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago + timedelta(seconds=1), + end_epoch=date_3_days_ago, + preferred_variant="standard", + ), set(["daily"]), - "Recent range should always pick daily file when on boundary, regardless of preference (not specified)", + "Recent range should always pick daily file when around (case: just under) boundary, regardless of preference (case: standard)", ) self.assertEqual( get_iau2000_file_variants_for_dates( - start_epoch=date_8_days_ago, end_epoch=date_3_days_ago, preferred_variant="standard" + start_epoch=date_8_days_ago - timedelta(seconds=1), end_epoch=date_3_days_ago, preferred_variant="daily" ), set(["daily"]), - "Recent range should always pick daily file when on boundary, regardless of preference (standard)", + "Recent range should always pick daily file when around (case: just over) boundary, regardless of preference (case: daily)", ) self.assertEqual( get_iau2000_file_variants_for_dates( - start_epoch=date_8_days_ago, end_epoch=date_3_days_ago, preferred_variant="daily" + start_epoch=date_8_days_ago + timedelta(seconds=1), end_epoch=date_3_days_ago, preferred_variant="daily" ), set(["daily"]), - "Recent range should always pick daily file when on boundary, regardless of preference (daily)", + "Recent range should always pick daily file when around (case: just under) boundary, regardless of preference (case: daily)", ) # --- Tests leveraging variant preference overrride --- @@ -159,3 +189,36 @@ def test_iau2000_variant_selection(self) -> None: with self.assertRaises(ValueError): get_iau2000_file_variants_for_dates(end_epoch=date_1_day_in_future) + # Test legacy mode + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_89_days_ago, + legacy_mode=True, + preferred_variant="standard", + ), + set(["standard"]), + "In legacy mode expect *only* 'standard' file for an 89 day old start_epoch", + ) + self.assertEqual( + get_iau2000_file_variants_for_dates( + start_epoch=date_2_weeks_ago, + legacy_mode=True, + preferred_variant="standard", + ), + set(["standard"]), + "In legacy mode expect *only* 'standard' file for a two week old start_epoch", + ) + + self.assertEqual( + # As this epoch is right on the border line, adjust so that the sub-ms time elapsed between defining this + # variable at the top of this test case, and using it below, doesn't lead to a fresh calculation + # of (now - 8 days) within the function, being greater than this more 'stale' value. + get_iau2000_file_variants_for_dates( + start_epoch=date_8_days_ago + timedelta(seconds=1), + legacy_mode=True, + preferred_variant="standard", + ), + set(["daily"]), + "In legacy mode expect 'daily' file for an ~8 day old start_epoch (note: original implementation " + "would return 'standard' for anything over 7 days)", + )