From 9a6f05e3b898d520e2634f1481948be24467f995 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 23 Dec 2024 05:38:23 +0000 Subject: [PATCH 1/8] NPI-3669 add sp3 transformation and trimming functionality (from other codebase), add more trimming options, introduce utility script for making minimal SP3 files, primarily intended for unit tests --- gnssanalysis/gn_io/sp3.py | 103 +++++++++++++++++++++++- gnssanalysis/test_file_creation_util.py | 71 ++++++++++++++++ 2 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 gnssanalysis/test_file_creation_util.py diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 32bba76..f031c96 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -1,3 +1,4 @@ +from datetime import timedelta import logging import io as _io import os as _os @@ -226,6 +227,65 @@ def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = ""): return sp3_df +def filter_by_svs( + sp3_df: _pd.DataFrame, + filter_by_count: Optional[int], + filter_by_name: Optional[list[str]], + filter_to_sat_letter: Optional[str], +) -> _pd.DataFrame: + """ + Utility function to trim an SP3 DataFrame down, intended for creating small sample SP3 files for + unit testing (but could be used for other purposes). + Can filter to a specific number of SVs, to specific SV names, and to a specific constellation. + + These filters can be used together (though filter by name and filter by sat letter i.e. constellation, does + not make sense). + E.g. you may filter sats to a set of possible SV names, and also to a maximum of n sats. Or you might filter to + a specific constellation, then cap at a max of n sats. + + :param _pd.DataFrame sp3_df: input SP3 DataFrame to perform filtering on + :param Optional[int] filter_by_count: max number of sats to return + :param Optional[list[str]] filter_by_name: names of sats to constrain to + :param Optional[str] filter_to_sat_letter: name of constellation (single letter) to constrain to + :return _pd.DataFrame: new SP3 DataFrame after filtering + """ + + # Get all SV names + all_sv_names = sp3_df.index.get_level_values(1).unique().array + total_svs = len(all_sv_names) + logger.info(f"Total SVs: {total_svs}") + + # Drop SVs which don't match given names + if filter_by_name: + # Make set of every SV name to drop (exclude everything besides what we want to keep) + exclusion_list: list[str] = list(set(all_sv_names) - set(filter_by_name)) + sp3_df = sp3_df.drop(exclusion_list, level=1) + + # Drop SVs which don't match a given constellation letter (i.e. 'G', 'E', 'R', 'C') + if filter_to_sat_letter: + if len(filter_to_sat_letter) != 1: + raise ValueError( + "Name of sat constellation to filter to, must be a single char. E.g. you cannot enter 'GR'" + ) + # Make set of every SV name to drop (exclude everything besides what we want to keep) + other_constellation_sats = [sv for sv in all_sv_names if not filter_to_sat_letter.upper() in sv] + sp3_df = sp3_df.drop(other_constellation_sats, level=1) + + # Drop SVs beyond n (i.e. keep only the first n SVs) + if filter_by_count: + if filter_by_count < 0: + raise ValueError("Cannot filter to a negative number of SVs!") + if total_svs <= filter_by_count: + raise ValueError( + f"Cannot filter to max of {filter_by_count} sats, as there are only {total_svs} sats total!" + ) + # Exclusion list built by taking all sats *beyond* the amount we want to keep. + exclusion_list = all_sv_names[filter_by_count:] + sp3_df = sp3_df.drop(exclusion_list, level=1) + + return sp3_df + + def mapparm(old: Tuple[float, float], new: Tuple[float, float]) -> Tuple[float, float]: """ Evaluate the offset and scale factor needed to map values from the old range to the new range. @@ -873,7 +933,48 @@ def sp3merge( return merged_sp3 -def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple[_pd.DataFrame, list]: +def transform_sp3(src_sp3: str, dest_sp3: str, transform_fn, *args, **kwargs): + """ + Apply a transformation to an sp3 file + """ + logger.info(f"Reading file: " + str(src_sp3)) + sp3_df = read_sp3(src_sp3) + transformed_df = transform_fn(sp3_df, *args, **kwargs) + write_sp3(transformed_df, dest_sp3) + + +def trim_df( + sp3_df: _pd.DataFrame, + trim_start: timedelta = timedelta(), + trim_end: timedelta = timedelta(), + keep_first_delta_amount: Optional[timedelta] = None, +): + """ + Trim data from the start and end of an sp3 dataframe + """ + time_axis = sp3_df.index.get_level_values(0) + # Work out the new time range that we care about + first_time = min(time_axis) + first_keep_time = first_time + trim_start.total_seconds() + last_time = max(time_axis) + last_keep_time = last_time - trim_end.total_seconds() + + # Operating in mode of trimming from start, to start + x amount of time in. As opposed to trimming a delta from each end. + if keep_first_delta_amount: + first_keep_time = first_time + last_keep_time = first_time + keep_first_delta_amount.total_seconds() + + # Slice to the subset that we actually care about + trimmed_df = sp3_df.loc[first_keep_time:last_keep_time] + trimmed_df.index = trimmed_df.index.remove_unused_levels() + # trimmed_df.attrs["HEADER"].HEAD.ORB_TYPE = "FIT" + return trimmed_df + + +def sp3_hlm_trans( + a: _pd.DataFrame, + b: _pd.DataFrame, +) -> tuple[_pd.DataFrame, list]: """ Rotates sp3_b into sp3_a. diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py new file mode 100644 index 0000000..c8c962e --- /dev/null +++ b/gnssanalysis/test_file_creation_util.py @@ -0,0 +1,71 @@ +from datetime import timedelta +from typing import Optional +from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename +from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_df, write_sp3, remove_offline_sats +import logging + +logger = logging.getLogger(__name__) + + +#### Configuration #### + +src_path = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3" +dest_path = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3-trimmed" + +# Constrain to x SVs, specific SV names, both, or neither +trim_to_sv_names: Optional[list[str]] = ["G02", "G03", "G19"] +trim_to_sv_count: Optional[int] = None # 1 +trim_to_sat_letter: Optional[str] = None # "E" + +# How many epochs to include in the trimmed file (offset from start) +trim_to_num_epochs: int = 3 + +drop_offline_sats: bool = False + +#### + + +filename = src_path.rsplit("/")[-1] +print(f"Filename is: {filename}") + +# Raw data would be: determine_sp3_name_props() - that retrieves in seconds. But we want to be more generally applicable, so not just SP3 here ideally. +sample_rate: timedelta = convert_nominal_span(determine_properties_from_filename(filename)["sampling_rate"]) +print(f"sample_rate is: {sample_rate}") + + +# time_start_offset: timedelta = timedelta(0) +time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1) + + +# Load +print("Loading SP3 into DataFrame...") +sp3_df = read_sp3(src_path) + +# Trim to first x epochs +print(f"Trimming to first {trim_to_num_epochs} epochs (timedelta from start: {time_offset_from_start})") +sp3_df = trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start) + +# Filter to chosen SVs or number of SVs... +print( + f"Applying SV filters (max count: {trim_to_sv_count}, limit to names: {trim_to_sv_names}, limit to constellation: {trim_to_sat_letter})..." +) +sp3_df = filter_by_svs( + sp3_df, filter_by_count=trim_to_sv_count, filter_by_name=trim_to_sv_names, filter_to_sat_letter=trim_to_sat_letter +) + +# Drop offline sats if requested +if drop_offline_sats: + print(f"Dropping offline sats...") + sp3_df = remove_offline_sats(sp3_df) + +# Write out +print( + "Writing out new SP3 file... " + 'CAUTION: at the time of writing the header is based on stale metadata in .attrs["HEADER"], not the contents ' + "of the dataframe. It will need to be manually updated." +) +write_sp3(sp3_df, dest_path) + +# Test if we can successfully read that file... +print("Testing re-read of the output file...") +re_read = read_sp3(dest_path) From 7a8ae97e3557af59bf9323c55caf9e0ee99f0dfb Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Mon, 23 Dec 2024 22:14:12 +0000 Subject: [PATCH 2/8] NPI-3669 update filter_by_svs() to make all filtering args properly optional --- gnssanalysis/gn_io/sp3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index f031c96..0b471a8 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -229,9 +229,9 @@ def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = ""): def filter_by_svs( sp3_df: _pd.DataFrame, - filter_by_count: Optional[int], - filter_by_name: Optional[list[str]], - filter_to_sat_letter: Optional[str], + filter_by_count: Optional[int] = None, + filter_by_name: Optional[list[str]] = None, + filter_to_sat_letter: Optional[str] = None, ) -> _pd.DataFrame: """ Utility function to trim an SP3 DataFrame down, intended for creating small sample SP3 files for From 8dd48a26cad41667388bef7e3adfe167a5d4e82a Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 24 Dec 2024 00:29:56 +0000 Subject: [PATCH 3/8] NPI-3669 more unit tests for sp3 processing, filter_by_svs(), trim_df() including new functionality to take first n epochs --- tests/test_sp3.py | 107 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index 960279f..e5bd5fc 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -1,3 +1,4 @@ +from datetime import timedelta import unittest from unittest.mock import patch, mock_open from pyfakefs.fake_filesystem_unittest import TestCase @@ -5,6 +6,7 @@ import numpy as np import pandas as pd +from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename import gnssanalysis.gn_io.sp3 as sp3 from test_datasets.sp3_test_data import ( @@ -185,18 +187,119 @@ def test_velinterpolation(self, mock_file): def test_sp3_offline_sat_removal(self, mock_file): sp3_df = sp3.read_sp3("mock_path", pOnly=False) self.assertEqual( - sp3_df.index.get_level_values(1).unique().array, + sp3_df.index.get_level_values(1).unique().array.tolist(), ["G02", "G03", "G19"], "Should be three SVs in test file before removing offline ones", ) sp3_df = sp3.remove_offline_sats(sp3_df) self.assertEqual( - sp3_df.index.get_level_values(1).unique().array, + sp3_df.index.get_level_values(1).unique().array.tolist(), ["G02", "G03"], "Should be two SVs after removing offline ones", ) + # sp3_test_data_truncated_cod_final is input_data2 + @patch("builtins.open", new_callable=mock_open, read_data=input_data2) + def test_filter_by_svs(self, mock_file): + sp3_df = sp3.read_sp3("mock_path", pOnly=False) + self.assertEqual( + len(sp3_df.index.get_level_values(1).unique().array), + 34, + "Should be 34 unique SVs in test file before filtering", + ) + + sp3_df_filtered_by_count = sp3.filter_by_svs(sp3_df, filter_by_count=2) + self.assertEqual( + sp3_df_filtered_by_count.index.get_level_values(1).unique().array.tolist(), + ["G01", "G02"], + "Should be two SVs after trimming to max 2", + ) + + sp3_df_filtered_by_constellation = sp3.filter_by_svs(sp3_df, filter_to_sat_letter="R") + self.assertEqual( + sp3_df_filtered_by_constellation.index.get_level_values(1).unique().array.tolist(), + ["R01", "R02"], + "Should have only Glonass sats after filtering to constellation R", + ) + + sp3_df_filtered_by_name = sp3.filter_by_svs(sp3_df, filter_by_name=["G19", "G03"]) + self.assertEqual( + sp3_df_filtered_by_name.index.get_level_values(1).unique().array.tolist(), + ["G03", "G19"], + "Should have only specific sats after filtering by name", + ) + + @patch("builtins.open", new_callable=mock_open, read_data=offline_sat_test_data) + def test_trim_df(self, mock_file): + sp3_df = sp3.read_sp3("mock_path", pOnly=False) + # offline_sat_test_data is based on the following file, but 3 epochs, not 2 days: + filename = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3" + # Expected starting set of epochs, in j2000 seconds + expected_initial_epochs = [784792800, 784793100, 784793400] + # Those epochs as datetimes are: + # ['2024-11-13T18:00:00', '2024-11-13T18:05:00', '2024-11-13T18:10:00'], dtype='datetime64[s]' + # Our sample rate is 5 mins, so indexing from here on, is in timedeltas in multiples of 5 mins + self.assertEqual( + sp3_df.index.get_level_values(0).unique().array.tolist(), + expected_initial_epochs, + "Should be 3 epochs in test file before trimming", + ) + + # Trimming 5 mins from end should result in first two epochs only + sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(0), trim_end=timedelta(minutes=5)) + self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784792800, 784793100]) + + # After trimming end by 3 epochs, expect no data + sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(0), trim_end=timedelta(minutes=15)) + self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), []) + + # Expected resulting epochs after trimming start by 1 epoch + sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=5), trim_end=timedelta(0)) + self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784793100, 784793400]) + + # Expected resulting epochs after trimming start by 3 epochs (no data) + sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=15), trim_end=timedelta(0)) + self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), []) + + # Trim start and end by one epoch (test you can do both at once) + sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=5), trim_end=timedelta(minutes=5)) + self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784793100]) + + # Test trimming by epoch count + trim_to_num_epochs = 2 + sample_rate = convert_nominal_span(determine_properties_from_filename(filename=filename)["sampling_rate"]) + self.assertEqual( + sample_rate, timedelta(minutes=5), "Sample rate should've been parsed as 5 minutes, from filename" + ) + + sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate) + self.assertEqual( + sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(), + [784792800, 784793100], + "Should be first two epochs after trimming with trim_to_epoch_count() using sample_rate", + ) + + sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_filename=filename) + self.assertEqual( + sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(), + [784792800, 784793100], + "Should be first two epochs after trimming with trim_to_epoch_count() using filename to derive sample_rate", + ) + + # Test the keep_first_delta_amount parameter of trim_df(), used above + trim_to_num_epochs = 2 + sample_rate = timedelta(minutes=5) + time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1) + self.assertEqual(time_offset_from_start, timedelta(minutes=5)) + # Now the actual test + sp3_df_trimmed = sp3.trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start) + self.assertEqual( + sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(), + [784792800, 784793100], + "Should be two epochs after trimming with keep_first_delta_amount parameter", + ) + class TestMergeSP3(TestCase): def setUp(self): From 48c370152b0fb7142edaf573374dfeac00fc3612 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 24 Dec 2024 00:34:55 +0000 Subject: [PATCH 4/8] NPI-3669 break out trim_to_first_n_epochs() as a utility function, update test_file_creation_util.py --- gnssanalysis/gn_io/sp3.py | 23 ++++++++++++++++++++++- gnssanalysis/test_file_creation_util.py | 11 +++-------- tests/test_sp3.py | 4 ++-- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 0b471a8..29251ef 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -10,6 +10,7 @@ import pandas as _pd from scipy import interpolate as _interpolate +from .. import filenames from .. import gn_aux as _gn_aux from .. import gn_const as _gn_const from .. import gn_datetime as _gn_datetime @@ -967,10 +968,30 @@ def trim_df( # Slice to the subset that we actually care about trimmed_df = sp3_df.loc[first_keep_time:last_keep_time] trimmed_df.index = trimmed_df.index.remove_unused_levels() - # trimmed_df.attrs["HEADER"].HEAD.ORB_TYPE = "FIT" return trimmed_df +def trim_to_first_n_epochs( + sp3_df: _pd.DataFrame, + epoch_count: int, + sp3_filename: Optional[str] = None, + sp3_sample_rate: Optional[timedelta] = None, +) -> _pd.DataFrame: + """ + Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate + """ + sample_rate = sp3_sample_rate + if not sample_rate: + if not sp3_filename: + raise ValueError("Either sp3_sample_rate or sp3_filename must be provided") + sample_rate = filenames.convert_nominal_span( + filenames.determine_properties_from_filename(sp3_filename)["sampling_rate"] + ) + + time_offset_from_start: timedelta = sample_rate * (epoch_count - 1) + return trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start) + + def sp3_hlm_trans( a: _pd.DataFrame, b: _pd.DataFrame, diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py index c8c962e..6736dfd 100644 --- a/gnssanalysis/test_file_creation_util.py +++ b/gnssanalysis/test_file_creation_util.py @@ -1,7 +1,7 @@ from datetime import timedelta from typing import Optional from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename -from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_df, write_sp3, remove_offline_sats +from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_to_first_n_epochs, write_sp3, remove_offline_sats import logging logger = logging.getLogger(__name__) @@ -32,18 +32,13 @@ sample_rate: timedelta = convert_nominal_span(determine_properties_from_filename(filename)["sampling_rate"]) print(f"sample_rate is: {sample_rate}") - -# time_start_offset: timedelta = timedelta(0) -time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1) - - # Load print("Loading SP3 into DataFrame...") sp3_df = read_sp3(src_path) # Trim to first x epochs -print(f"Trimming to first {trim_to_num_epochs} epochs (timedelta from start: {time_offset_from_start})") -sp3_df = trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start) +print(f"Trimming to first {trim_to_num_epochs} epochs") +sp3_df = trim_to_first_n_epochs(sp3_df=sp3_df, epoch_count=trim_to_num_epochs, sp3_filename=filename) # Filter to chosen SVs or number of SVs... print( diff --git a/tests/test_sp3.py b/tests/test_sp3.py index e5bd5fc..8f16230 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -273,14 +273,14 @@ def test_trim_df(self, mock_file): sample_rate, timedelta(minutes=5), "Sample rate should've been parsed as 5 minutes, from filename" ) - sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate) + sp3_df_trimmed = sp3.trim_to_first_n_epochs(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate) self.assertEqual( sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(), [784792800, 784793100], "Should be first two epochs after trimming with trim_to_epoch_count() using sample_rate", ) - sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_filename=filename) + sp3_df_trimmed = sp3.trim_to_first_n_epochs(sp3_df, epoch_count=2, sp3_filename=filename) self.assertEqual( sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(), [784792800, 784793100], From 8f45be99dcb8088769f88358b65e22ffbc757fd7 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 24 Dec 2024 01:35:28 +0000 Subject: [PATCH 5/8] NPI-3669 add notes on important unit tests which remain to be added to test_sp3.py --- tests/test_sp3.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index 8f16230..1906cb4 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -136,6 +136,10 @@ def test_read_sp3_header_svs_detailed(self): end_line2_acc, 18, msg="Last ACC on test line 2 (pos 30) should be 18" ) + # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.) + # TODO add tests for correctly reading the actual content of the SP3 in addition to the header. + # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header() + def test_sp3_clock_nodata_to_nan(self): sp3_df = pd.DataFrame( {("EST", "CLK"): [999999.999999, 123456.789, 999999.999999, 987654.321]} @@ -176,6 +180,7 @@ def test_velinterpolation(self, mock_file): """ Checking if the velocity interpolation works, right now there is no data to validate, the only thing done is to check if the function runs without errors + TODO: update that to check actual expected values """ result = sp3.read_sp3("mock_path", pOnly=True) r = sp3.getVelSpline(result) @@ -300,6 +305,11 @@ def test_trim_df(self, mock_file): "Should be two epochs after trimming with keep_first_delta_amount parameter", ) + # Don't bother trying to unit test transform_df(). It is a wrapper which basically does: + # - read (parse) + # - apply function + # - write (generate content) + # TODO notes added above to implement those bits separately class TestMergeSP3(TestCase): def setUp(self): From 7241486cc9f85d47afb3d7281a71943bb360a1a8 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:30:19 +0000 Subject: [PATCH 6/8] NPI-3669 added docstrings based on PR comments, added more notes for future work, added check for incompatible arg combination to trim_df() --- gnssanalysis/gn_io/sp3.py | 35 ++++++++++++++++++++++--- gnssanalysis/test_file_creation_util.py | 2 +- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index bc3542e..3c116eb 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -639,6 +639,8 @@ def getVelPoly(sp3Df: _pd.DataFrame, deg: int = 35) -> _pd.DataFrame: def gen_sp3_header(sp3_df: _pd.DataFrame) -> str: """ Generate the header for an SP3 file based on the given DataFrame. + NOTE: much of the header information is drawn from the DataFrame attrs structure. If this has not been + updated as the DataFrame has been transformed, the header will not reflect the data. :param pandas.DataFrame sp3_df: The DataFrame containing the SP3 data. :return str: The generated SP3 header as a string. @@ -724,6 +726,8 @@ def gen_sp3_content( # Rather than: # PG01... X Y Z CLK ... VX VY VZ ... # ? + # TODO raise warnings if VEL columns are still present, and drop them before writing out, to ensure we remain + # compliant with the spec. out_buf = buf if buf is not None else _io.StringIO() if sort_outputs: @@ -953,7 +957,7 @@ def sp3merge( :param Union[List[str], None] clkpaths: The list of paths to the clk files, or None if no clk files are provided. :param bool nodata_to_nan: Flag indicating whether to convert nodata values to NaN. - :return pd.DataFrame: The merged sp3 DataFrame. + :return DataFrame: The merged sp3 DataFrame. """ sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan) for sp3_file in sp3paths] # Create a new attrs dictionary to be used for the output DataFrame @@ -971,7 +975,13 @@ def sp3merge( def transform_sp3(src_sp3: str, dest_sp3: str, transform_fn, *args, **kwargs): """ - Apply a transformation to an sp3 file + Apply a transformation to an sp3 file, by reading the file from the given path, applying the supplied + transformation function and args, and writing out a new file to the path given. + + :param str src_sp3: Path of the source SP3 file to read in. + :param str dest_sp3: Path to write out the new SP3 file to. + :param callable transform_fn: The transformation function to apply to the SP3 data once loaded. *args + and **kwargs following, are passed to this function. """ logger.info(f"Reading file: " + str(src_sp3)) sp3_df = read_sp3(src_sp3) @@ -987,6 +997,14 @@ def trim_df( ): """ Trim data from the start and end of an sp3 dataframe + + :param DataFrame sp3_df: The input SP3 DataFrame. + :param timedelta trim_start: Amount of time to trim off the start of the dataframe. + :param timedelta trim_end: Amount of time to trim off the end of the dataframe. + :param Optional[timedelta] keep_first_delta_amount: If supplied, trim the dataframe to this length. Not + compatible with trim_start and trim_end. + :return DataFrame: Dataframe trimmed to the requested time range, or requested initial amount + """ time_axis = sp3_df.index.get_level_values(0) # Work out the new time range that we care about @@ -999,6 +1017,8 @@ def trim_df( if keep_first_delta_amount: first_keep_time = first_time last_keep_time = first_time + keep_first_delta_amount.total_seconds() + if trim_start.total_seconds() != 0 or trim_end.total_seconds() != 0: + raise ValueError("keep_first_delta_amount option is not compatible with start/end time options") # Slice to the subset that we actually care about trimmed_df = sp3_df.loc[first_keep_time:last_keep_time] @@ -1014,6 +1034,13 @@ def trim_to_first_n_epochs( ) -> _pd.DataFrame: """ Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate + + :param DataFrame sp3_df: The input SP3 DataFrame. + :param int epoch_count: Trim to this many epochs from start of SP3 data (i.e. first n epochs). + :param Optional[str] sp3_filename: Name of SP3 file, just used to derive sample_rate. + :param Optional[timedelta] sp3_sample_rate: Sample rate of the SP3 data. Alternatively this can be + derived from a filename. + :return DataFrame: Dataframe trimmed to the requested number of epochs. """ sample_rate = sp3_sample_rate if not sample_rate: @@ -1037,7 +1064,7 @@ def sp3_hlm_trans( :param DataFrame a: The sp3_a DataFrame. :param DataFrame b : The sp3_b DataFrame. - :returntuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals. + :return tuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals. """ hlm = _gn_transform.get_helmert7(pt1=a.EST[["X", "Y", "Z"]].values, pt2=b.EST[["X", "Y", "Z"]].values) b.iloc[:, :3] = _gn_transform.transform7(xyz_in=b.EST[["X", "Y", "Z"]].values, hlm_params=hlm[0]) @@ -1064,7 +1091,7 @@ def diff_sp3_rac( :param bool use_offline_sat_removal: Flag indicating whether to remove satellites which are offline / have some nodata position values. Caution: ensure you turn this on if using cubic spline interpolation with data which may have holes in it (nodata). - :return: The DataFrame containing the difference in RAC coordinates. + :return DataFrame: The DataFrame containing the difference in RAC coordinates. """ hlm_modes = [None, "ECF", "ECI"] if hlm_mode not in hlm_modes: diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py index 6736dfd..e7744db 100644 --- a/gnssanalysis/test_file_creation_util.py +++ b/gnssanalysis/test_file_creation_util.py @@ -22,7 +22,7 @@ drop_offline_sats: bool = False -#### +#### End configuration #### filename = src_path.rsplit("/")[-1] From a15cd428c897c33e64b36e3fcf635c34ac814963 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:36:03 +0000 Subject: [PATCH 7/8] NPI-3669 cleanup of redundent comment --- tests/test_sp3.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_sp3.py b/tests/test_sp3.py index e357519..e15897c 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -288,12 +288,6 @@ def test_trim_df(self, mock_file): "Should be two epochs after trimming with keep_first_delta_amount parameter", ) - # Don't bother trying to unit test transform_df(). It is a wrapper which basically does: - # - read (parse) - # - apply function - # - write (generate content) - # TODO notes added above to implement those bits separately - class TestMergeSP3(TestCase): def setUp(self): From 0945e2655ea90845dd332205bda9d56f318af3bc Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Wed, 8 Jan 2025 03:28:20 +0000 Subject: [PATCH 8/8] NPI-3669 update docstrings in sp3.py for consistency, in response to PR comments - similar fixes to parallel pr --- gnssanalysis/gn_io/sp3.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 3c116eb..9d678cf 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -577,8 +577,8 @@ def parse_sp3_header(header: bytes, warn_on_negative_sv_acc_values: bool = True) def getVelSpline(sp3Df: _pd.DataFrame) -> _pd.DataFrame: """Returns the velocity spline of the input dataframe. - :param DataFrame sp3Df: The input dataframe containing position data. - :return DataFrame: The dataframe containing the velocity spline. + :param _pd.DataFrame sp3Df: The input dataframe containing position data. + :return _pd.DataFrame: The dataframe containing the velocity spline. :caution :This function cannot handle *any* NaN / nodata / non-finite position values. By contrast, getVelPoly() is more forgiving, but accuracy of results, particulary in the presence of NaNs, has not been assessed. @@ -599,9 +599,9 @@ def getVelPoly(sp3Df: _pd.DataFrame, deg: int = 35) -> _pd.DataFrame: """ Interpolates the positions for -1s and +1s in the sp3_df DataFrame and outputs velocities. - :param DataFrame sp3Df: A pandas DataFrame containing the sp3 data. + :param _pd.DataFrame sp3Df: A pandas DataFrame containing the sp3 data. :param int deg: Degree of the polynomial fit. Default is 35. - :return DataFrame: A pandas DataFrame with the interpolated velocities added as a new column. + :return _pd.DataFrame: A pandas DataFrame with the interpolated velocities added as a new column. """ est = sp3Df.unstack(1).EST[["X", "Y", "Z"]] @@ -957,7 +957,7 @@ def sp3merge( :param Union[List[str], None] clkpaths: The list of paths to the clk files, or None if no clk files are provided. :param bool nodata_to_nan: Flag indicating whether to convert nodata values to NaN. - :return DataFrame: The merged sp3 DataFrame. + :return _pd.DataFrame: The merged sp3 DataFrame. """ sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan) for sp3_file in sp3paths] # Create a new attrs dictionary to be used for the output DataFrame @@ -998,12 +998,12 @@ def trim_df( """ Trim data from the start and end of an sp3 dataframe - :param DataFrame sp3_df: The input SP3 DataFrame. + :param _pd.DataFrame sp3_df: The input SP3 DataFrame. :param timedelta trim_start: Amount of time to trim off the start of the dataframe. :param timedelta trim_end: Amount of time to trim off the end of the dataframe. :param Optional[timedelta] keep_first_delta_amount: If supplied, trim the dataframe to this length. Not compatible with trim_start and trim_end. - :return DataFrame: Dataframe trimmed to the requested time range, or requested initial amount + :return _pd.DataFrame: Dataframe trimmed to the requested time range, or requested initial amount """ time_axis = sp3_df.index.get_level_values(0) @@ -1035,12 +1035,12 @@ def trim_to_first_n_epochs( """ Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate - :param DataFrame sp3_df: The input SP3 DataFrame. + :param _pd.DataFrame sp3_df: The input SP3 DataFrame. :param int epoch_count: Trim to this many epochs from start of SP3 data (i.e. first n epochs). :param Optional[str] sp3_filename: Name of SP3 file, just used to derive sample_rate. :param Optional[timedelta] sp3_sample_rate: Sample rate of the SP3 data. Alternatively this can be derived from a filename. - :return DataFrame: Dataframe trimmed to the requested number of epochs. + :return _pd.DataFrame: Dataframe trimmed to the requested number of epochs. """ sample_rate = sp3_sample_rate if not sample_rate: @@ -1061,8 +1061,8 @@ def sp3_hlm_trans( """ Rotates sp3_b into sp3_a. - :param DataFrame a: The sp3_a DataFrame. - :param DataFrame b : The sp3_b DataFrame. + :param _pd.DataFrame a: The sp3_a DataFrame. + :param _pd.DataFrame b: The sp3_b DataFrame. :return tuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals. """ @@ -1082,8 +1082,8 @@ def diff_sp3_rac( """ Computes the difference between the two sp3 files in the radial, along-track and cross-track coordinates. - :param DataFrame sp3_baseline: The baseline sp3 DataFrame. - :param DataFrame sp3_test: The test sp3 DataFrame. + :param _pd.DataFrame sp3_baseline: The baseline sp3 DataFrame. + :param _pd.DataFrame sp3_test: The test sp3 DataFrame. :param string hlm_mode: The mode for HLM transformation. Can be None, "ECF", or "ECI". :param bool use_cubic_spline: Flag indicating whether to use cubic spline for velocity computation. Caution: cubic spline interpolation does not tolerate NaN / nodata values. Consider enabling use_offline_sat_removal if @@ -1091,7 +1091,7 @@ def diff_sp3_rac( :param bool use_offline_sat_removal: Flag indicating whether to remove satellites which are offline / have some nodata position values. Caution: ensure you turn this on if using cubic spline interpolation with data which may have holes in it (nodata). - :return DataFrame: The DataFrame containing the difference in RAC coordinates. + :return _pd.DataFrame: The DataFrame containing the difference in RAC coordinates. """ hlm_modes = [None, "ECF", "ECI"] if hlm_mode not in hlm_modes: