From 9a6f05e3b898d520e2634f1481948be24467f995 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 23 Dec 2024 05:38:23 +0000
Subject: [PATCH 1/8] NPI-3669 add sp3 transformation and trimming
 functionality (from other codebase), add more trimming options, introduce
 utility script for making minimal SP3 files, primarily intended for unit
 tests

---
 gnssanalysis/gn_io/sp3.py               | 103 +++++++++++++++++++++++-
 gnssanalysis/test_file_creation_util.py |  71 ++++++++++++++++
 2 files changed, 173 insertions(+), 1 deletion(-)
 create mode 100644 gnssanalysis/test_file_creation_util.py

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 32bba76..f031c96 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1,3 +1,4 @@
+from datetime import timedelta
 import logging
 import io as _io
 import os as _os
@@ -226,6 +227,65 @@ def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = ""):
     return sp3_df
 
 
+def filter_by_svs(
+    sp3_df: _pd.DataFrame,
+    filter_by_count: Optional[int],
+    filter_by_name: Optional[list[str]],
+    filter_to_sat_letter: Optional[str],
+) -> _pd.DataFrame:
+    """
+    Utility function to trim an SP3 DataFrame down, intended for creating small sample SP3 files for
+    unit testing (but could be used for other purposes).
+    Can filter to a specific number of SVs, to specific SV names, and to a specific constellation.
+
+    These filters can be used together (though filter by name and filter by sat letter i.e. constellation, does
+    not make sense).
+    E.g. you may filter sats to a set of possible SV names, and also to a maximum of n sats. Or you might filter to
+    a specific constellation, then cap at a max of n sats.
+
+    :param _pd.DataFrame sp3_df: input SP3 DataFrame to perform filtering on
+    :param Optional[int] filter_by_count: max number of sats to return
+    :param Optional[list[str]] filter_by_name: names of sats to constrain to
+    :param Optional[str] filter_to_sat_letter: name of constellation (single letter) to constrain to
+    :return _pd.DataFrame: new SP3 DataFrame after filtering
+    """
+
+    # Get all SV names
+    all_sv_names = sp3_df.index.get_level_values(1).unique().array
+    total_svs = len(all_sv_names)
+    logger.info(f"Total SVs: {total_svs}")
+
+    # Drop SVs which don't match given names
+    if filter_by_name:
+        # Make set of every SV name to drop (exclude everything besides what we want to keep)
+        exclusion_list: list[str] = list(set(all_sv_names) - set(filter_by_name))
+        sp3_df = sp3_df.drop(exclusion_list, level=1)
+
+    # Drop SVs which don't match a given constellation letter (i.e. 'G', 'E', 'R', 'C')
+    if filter_to_sat_letter:
+        if len(filter_to_sat_letter) != 1:
+            raise ValueError(
+                "Name of sat constellation to filter to, must be a single char. E.g. you cannot enter 'GR'"
+            )
+        # Make set of every SV name to drop (exclude everything besides what we want to keep)
+        other_constellation_sats = [sv for sv in all_sv_names if not filter_to_sat_letter.upper() in sv]
+        sp3_df = sp3_df.drop(other_constellation_sats, level=1)
+
+    # Drop SVs beyond n (i.e. keep only the first n SVs)
+    if filter_by_count:
+        if filter_by_count < 0:
+            raise ValueError("Cannot filter to a negative number of SVs!")
+        if total_svs <= filter_by_count:
+            raise ValueError(
+                f"Cannot filter to max of {filter_by_count} sats, as there are only {total_svs} sats total!"
+            )
+        # Exclusion list built by taking all sats *beyond* the amount we want to keep.
+        exclusion_list = all_sv_names[filter_by_count:]
+        sp3_df = sp3_df.drop(exclusion_list, level=1)
+
+    return sp3_df
+
+
 def mapparm(old: Tuple[float, float], new: Tuple[float, float]) -> Tuple[float, float]:
     """
     Evaluate the offset and scale factor needed to map values from the old range to the new range.
@@ -873,7 +933,48 @@ def sp3merge(
     return merged_sp3
 
 
-def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple[_pd.DataFrame, list]:
+def transform_sp3(src_sp3: str, dest_sp3: str, transform_fn, *args, **kwargs):
+    """
+    Apply a transformation to an sp3 file
+    """
+    logger.info(f"Reading file: " + str(src_sp3))
+    sp3_df = read_sp3(src_sp3)
+    transformed_df = transform_fn(sp3_df, *args, **kwargs)
+    write_sp3(transformed_df, dest_sp3)
+
+
+def trim_df(
+    sp3_df: _pd.DataFrame,
+    trim_start: timedelta = timedelta(),
+    trim_end: timedelta = timedelta(),
+    keep_first_delta_amount: Optional[timedelta] = None,
+):
+    """
+    Trim data from the start and end of an sp3 dataframe
+    """
+    time_axis = sp3_df.index.get_level_values(0)
+    # Work out the new time range that we care about
+    first_time = min(time_axis)
+    first_keep_time = first_time + trim_start.total_seconds()
+    last_time = max(time_axis)
+    last_keep_time = last_time - trim_end.total_seconds()
+
+    # Operating in mode of trimming from start, to start + x amount of time in. As opposed to trimming a delta from each end.
+    if keep_first_delta_amount:
+        first_keep_time = first_time
+        last_keep_time = first_time + keep_first_delta_amount.total_seconds()
+
+    # Slice to the subset that we actually care about
+    trimmed_df = sp3_df.loc[first_keep_time:last_keep_time]
+    trimmed_df.index = trimmed_df.index.remove_unused_levels()
+    # trimmed_df.attrs["HEADER"].HEAD.ORB_TYPE = "FIT"
+    return trimmed_df
+
+
+def sp3_hlm_trans(
+    a: _pd.DataFrame,
+    b: _pd.DataFrame,
+) -> tuple[_pd.DataFrame, list]:
     """
      Rotates sp3_b into sp3_a.
 
diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py
new file mode 100644
index 0000000..c8c962e
--- /dev/null
+++ b/gnssanalysis/test_file_creation_util.py
@@ -0,0 +1,71 @@
+from datetime import timedelta
+from typing import Optional
+from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename
+from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_df, write_sp3, remove_offline_sats
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+#### Configuration ####
+
+src_path = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3"
+dest_path = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3-trimmed"
+
+# Constrain to x SVs, specific SV names, both, or neither
+trim_to_sv_names: Optional[list[str]] = ["G02", "G03", "G19"]
+trim_to_sv_count: Optional[int] = None  # 1
+trim_to_sat_letter: Optional[str] = None  # "E"
+
+# How many epochs to include in the trimmed file (offset from start)
+trim_to_num_epochs: int = 3
+
+drop_offline_sats: bool = False
+
+####
+
+
+filename = src_path.rsplit("/")[-1]
+print(f"Filename is: {filename}")
+
+# Raw data would be: determine_sp3_name_props() - that retrieves in seconds. But we want to be more generally applicable, so not just SP3 here ideally.
+sample_rate: timedelta = convert_nominal_span(determine_properties_from_filename(filename)["sampling_rate"])
+print(f"sample_rate is: {sample_rate}")
+
+
+# time_start_offset: timedelta = timedelta(0)
+time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1)
+
+
+# Load
+print("Loading SP3 into DataFrame...")
+sp3_df = read_sp3(src_path)
+
+# Trim to first x epochs
+print(f"Trimming to first {trim_to_num_epochs} epochs (timedelta from start: {time_offset_from_start})")
+sp3_df = trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start)
+
+# Filter to chosen SVs or number of SVs...
+print(
+    f"Applying SV filters (max count: {trim_to_sv_count}, limit to names: {trim_to_sv_names}, limit to constellation: {trim_to_sat_letter})..."
+)
+sp3_df = filter_by_svs(
+    sp3_df, filter_by_count=trim_to_sv_count, filter_by_name=trim_to_sv_names, filter_to_sat_letter=trim_to_sat_letter
+)
+
+# Drop offline sats if requested
+if drop_offline_sats:
+    print(f"Dropping offline sats...")
+    sp3_df = remove_offline_sats(sp3_df)
+
+# Write out
+print(
+    "Writing out new SP3 file... "
+    'CAUTION: at the time of writing the header is based on stale metadata in .attrs["HEADER"], not the contents '
+    "of the dataframe. It will need to be manually updated."
+)
+write_sp3(sp3_df, dest_path)
+
+# Test if we can successfully read that file...
+print("Testing re-read of the output file...")
+re_read = read_sp3(dest_path)

From 7a8ae97e3557af59bf9323c55caf9e0ee99f0dfb Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Mon, 23 Dec 2024 22:14:12 +0000
Subject: [PATCH 2/8] NPI-3669 update filter_by_svs() to make all filtering
 args properly optional

---
 gnssanalysis/gn_io/sp3.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index f031c96..0b471a8 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -229,9 +229,9 @@ def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = ""):
 
 def filter_by_svs(
     sp3_df: _pd.DataFrame,
-    filter_by_count: Optional[int],
-    filter_by_name: Optional[list[str]],
-    filter_to_sat_letter: Optional[str],
+    filter_by_count: Optional[int] = None,
+    filter_by_name: Optional[list[str]] = None,
+    filter_to_sat_letter: Optional[str] = None,
 ) -> _pd.DataFrame:
     """
     Utility function to trim an SP3 DataFrame down, intended for creating small sample SP3 files for

From 8dd48a26cad41667388bef7e3adfe167a5d4e82a Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 24 Dec 2024 00:29:56 +0000
Subject: [PATCH 3/8] NPI-3669 more unit tests for sp3 processing,
 filter_by_svs(), trim_df() including new functionality to take first n epochs

---
 tests/test_sp3.py | 107 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index 960279f..e5bd5fc 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -1,3 +1,4 @@
+from datetime import timedelta
 import unittest
 from unittest.mock import patch, mock_open
 from pyfakefs.fake_filesystem_unittest import TestCase
@@ -5,6 +6,7 @@
 import numpy as np
 import pandas as pd
 
+from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename
 import gnssanalysis.gn_io.sp3 as sp3
 
 from test_datasets.sp3_test_data import (
@@ -185,18 +187,119 @@ def test_velinterpolation(self, mock_file):
     def test_sp3_offline_sat_removal(self, mock_file):
         sp3_df = sp3.read_sp3("mock_path", pOnly=False)
         self.assertEqual(
-            sp3_df.index.get_level_values(1).unique().array,
+            sp3_df.index.get_level_values(1).unique().array.tolist(),
             ["G02", "G03", "G19"],
             "Should be three SVs in test file before removing offline ones",
         )
 
         sp3_df = sp3.remove_offline_sats(sp3_df)
         self.assertEqual(
-            sp3_df.index.get_level_values(1).unique().array,
+            sp3_df.index.get_level_values(1).unique().array.tolist(),
             ["G02", "G03"],
             "Should be two SVs after removing offline ones",
         )
 
+    # sp3_test_data_truncated_cod_final is input_data2
+    @patch("builtins.open", new_callable=mock_open, read_data=input_data2)
+    def test_filter_by_svs(self, mock_file):
+        sp3_df = sp3.read_sp3("mock_path", pOnly=False)
+        self.assertEqual(
+            len(sp3_df.index.get_level_values(1).unique().array),
+            34,
+            "Should be 34 unique SVs in test file before filtering",
+        )
+
+        sp3_df_filtered_by_count = sp3.filter_by_svs(sp3_df, filter_by_count=2)
+        self.assertEqual(
+            sp3_df_filtered_by_count.index.get_level_values(1).unique().array.tolist(),
+            ["G01", "G02"],
+            "Should be two SVs after trimming to max 2",
+        )
+
+        sp3_df_filtered_by_constellation = sp3.filter_by_svs(sp3_df, filter_to_sat_letter="R")
+        self.assertEqual(
+            sp3_df_filtered_by_constellation.index.get_level_values(1).unique().array.tolist(),
+            ["R01", "R02"],
+            "Should have only Glonass sats after filtering to constellation R",
+        )
+
+        sp3_df_filtered_by_name = sp3.filter_by_svs(sp3_df, filter_by_name=["G19", "G03"])
+        self.assertEqual(
+            sp3_df_filtered_by_name.index.get_level_values(1).unique().array.tolist(),
+            ["G03", "G19"],
+            "Should have only specific sats after filtering by name",
+        )
+
+    @patch("builtins.open", new_callable=mock_open, read_data=offline_sat_test_data)
+    def test_trim_df(self, mock_file):
+        sp3_df = sp3.read_sp3("mock_path", pOnly=False)
+        # offline_sat_test_data is based on the following file, but 3 epochs, not 2 days:
+        filename = "IGS0DEMULT_20243181800_02D_05M_ORB.SP3"
+        # Expected starting set of epochs, in j2000 seconds
+        expected_initial_epochs = [784792800, 784793100, 784793400]
+        # Those epochs as datetimes are:
+        # ['2024-11-13T18:00:00', '2024-11-13T18:05:00', '2024-11-13T18:10:00'], dtype='datetime64[s]'
+        # Our sample rate is 5 mins, so indexing from here on, is in timedeltas in multiples of 5 mins
+        self.assertEqual(
+            sp3_df.index.get_level_values(0).unique().array.tolist(),
+            expected_initial_epochs,
+            "Should be 3 epochs in test file before trimming",
+        )
+
+        # Trimming 5 mins from end should result in first two epochs only
+        sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(0), trim_end=timedelta(minutes=5))
+        self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784792800, 784793100])
+
+        # After trimming end by 3 epochs, expect no data
+        sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(0), trim_end=timedelta(minutes=15))
+        self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [])
+
+        # Expected resulting epochs after trimming start by 1 epoch
+        sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=5), trim_end=timedelta(0))
+        self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784793100, 784793400])
+
+        # Expected resulting epochs after trimming start by 3 epochs (no data)
+        sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=15), trim_end=timedelta(0))
+        self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [])
+
+        # Trim start and end by one epoch (test you can do both at once)
+        sp3_df_start_trim = sp3.trim_df(sp3_df=sp3_df, trim_start=timedelta(minutes=5), trim_end=timedelta(minutes=5))
+        self.assertEqual(sp3_df_start_trim.index.get_level_values(0).unique().array.tolist(), [784793100])
+
+        # Test trimming by epoch count
+        trim_to_num_epochs = 2
+        sample_rate = convert_nominal_span(determine_properties_from_filename(filename=filename)["sampling_rate"])
+        self.assertEqual(
+            sample_rate, timedelta(minutes=5), "Sample rate should've been parsed as 5 minutes, from filename"
+        )
+
+        sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate)
+        self.assertEqual(
+            sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(),
+            [784792800, 784793100],
+            "Should be first two epochs after trimming with trim_to_epoch_count() using sample_rate",
+        )
+
+        sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_filename=filename)
+        self.assertEqual(
+            sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(),
+            [784792800, 784793100],
+            "Should be first two epochs after trimming with trim_to_epoch_count() using filename to derive sample_rate",
+        )
+
+        # Test the keep_first_delta_amount parameter of trim_df(), used above
+        trim_to_num_epochs = 2
+        sample_rate = timedelta(minutes=5)
+        time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1)
+        self.assertEqual(time_offset_from_start, timedelta(minutes=5))
+        # Now the actual test
+        sp3_df_trimmed = sp3.trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start)
+        self.assertEqual(
+            sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(),
+            [784792800, 784793100],
+            "Should be two epochs after trimming with keep_first_delta_amount parameter",
+        )
+
 
 class TestMergeSP3(TestCase):
     def setUp(self):

From 48c370152b0fb7142edaf573374dfeac00fc3612 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 24 Dec 2024 00:34:55 +0000
Subject: [PATCH 4/8] NPI-3669 break out trim_to_first_n_epochs() as a utility
 function, update test_file_creation_util.py

---
 gnssanalysis/gn_io/sp3.py               | 23 ++++++++++++++++++++++-
 gnssanalysis/test_file_creation_util.py | 11 +++--------
 tests/test_sp3.py                       |  4 ++--
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 0b471a8..29251ef 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -10,6 +10,7 @@
 import pandas as _pd
 from scipy import interpolate as _interpolate
 
+from .. import filenames
 from .. import gn_aux as _gn_aux
 from .. import gn_const as _gn_const
 from .. import gn_datetime as _gn_datetime
@@ -967,10 +968,30 @@ def trim_df(
     # Slice to the subset that we actually care about
     trimmed_df = sp3_df.loc[first_keep_time:last_keep_time]
     trimmed_df.index = trimmed_df.index.remove_unused_levels()
-    # trimmed_df.attrs["HEADER"].HEAD.ORB_TYPE = "FIT"
     return trimmed_df
 
 
+def trim_to_first_n_epochs(
+    sp3_df: _pd.DataFrame,
+    epoch_count: int,
+    sp3_filename: Optional[str] = None,
+    sp3_sample_rate: Optional[timedelta] = None,
+) -> _pd.DataFrame:
+    """
+    Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate
+    """
+    sample_rate = sp3_sample_rate
+    if not sample_rate:
+        if not sp3_filename:
+            raise ValueError("Either sp3_sample_rate or sp3_filename must be provided")
+        sample_rate = filenames.convert_nominal_span(
+            filenames.determine_properties_from_filename(sp3_filename)["sampling_rate"]
+        )
+
+    time_offset_from_start: timedelta = sample_rate * (epoch_count - 1)
+    return trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start)
+
+
 def sp3_hlm_trans(
     a: _pd.DataFrame,
     b: _pd.DataFrame,
diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py
index c8c962e..6736dfd 100644
--- a/gnssanalysis/test_file_creation_util.py
+++ b/gnssanalysis/test_file_creation_util.py
@@ -1,7 +1,7 @@
 from datetime import timedelta
 from typing import Optional
 from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename
-from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_df, write_sp3, remove_offline_sats
+from gnssanalysis.gn_io.sp3 import filter_by_svs, read_sp3, trim_to_first_n_epochs, write_sp3, remove_offline_sats
 import logging
 
 logger = logging.getLogger(__name__)
@@ -32,18 +32,13 @@
 sample_rate: timedelta = convert_nominal_span(determine_properties_from_filename(filename)["sampling_rate"])
 print(f"sample_rate is: {sample_rate}")
 
-
-# time_start_offset: timedelta = timedelta(0)
-time_offset_from_start: timedelta = sample_rate * (trim_to_num_epochs - 1)
-
-
 # Load
 print("Loading SP3 into DataFrame...")
 sp3_df = read_sp3(src_path)
 
 # Trim to first x epochs
-print(f"Trimming to first {trim_to_num_epochs} epochs (timedelta from start: {time_offset_from_start})")
-sp3_df = trim_df(sp3_df, keep_first_delta_amount=time_offset_from_start)
+print(f"Trimming to first {trim_to_num_epochs} epochs")
+sp3_df = trim_to_first_n_epochs(sp3_df=sp3_df, epoch_count=trim_to_num_epochs, sp3_filename=filename)
 
 # Filter to chosen SVs or number of SVs...
 print(
diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index e5bd5fc..8f16230 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -273,14 +273,14 @@ def test_trim_df(self, mock_file):
             sample_rate, timedelta(minutes=5), "Sample rate should've been parsed as 5 minutes, from filename"
         )
 
-        sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate)
+        sp3_df_trimmed = sp3.trim_to_first_n_epochs(sp3_df, epoch_count=2, sp3_sample_rate=sample_rate)
         self.assertEqual(
             sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(),
             [784792800, 784793100],
             "Should be first two epochs after trimming with trim_to_epoch_count() using sample_rate",
         )
 
-        sp3_df_trimmed = sp3.trim_to_epoch_count(sp3_df, epoch_count=2, sp3_filename=filename)
+        sp3_df_trimmed = sp3.trim_to_first_n_epochs(sp3_df, epoch_count=2, sp3_filename=filename)
         self.assertEqual(
             sp3_df_trimmed.index.get_level_values(0).unique().array.tolist(),
             [784792800, 784793100],

From 8f45be99dcb8088769f88358b65e22ffbc757fd7 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 24 Dec 2024 01:35:28 +0000
Subject: [PATCH 5/8] NPI-3669 add notes on important unit tests which remain
 to be added to test_sp3.py

---
 tests/test_sp3.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index 8f16230..1906cb4 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -136,6 +136,10 @@ def test_read_sp3_header_svs_detailed(self):
             end_line2_acc, 18, msg="Last ACC on test line 2 (pos 30) should be 18"
         )
 
+    # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.)
+    # TODO add tests for correctly reading the actual content of the SP3 in addition to the header.
+    # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header()
+
     def test_sp3_clock_nodata_to_nan(self):
         sp3_df = pd.DataFrame(
             {("EST", "CLK"): [999999.999999, 123456.789, 999999.999999, 987654.321]}
@@ -176,6 +180,7 @@ def test_velinterpolation(self, mock_file):
         """
         Checking if the velocity interpolation works, right now there is no data to validate, the only thing done
         is to check if the function runs without errors
+        TODO: update that to check actual expected values
         """
         result = sp3.read_sp3("mock_path", pOnly=True)
         r = sp3.getVelSpline(result)
@@ -300,6 +305,11 @@ def test_trim_df(self, mock_file):
             "Should be two epochs after trimming with keep_first_delta_amount parameter",
         )
 
+    # Don't bother trying to unit test transform_df(). It is a wrapper which basically does:
+    # - read (parse)
+    # - apply function
+    # - write (generate content)
+    # TODO notes added above to implement those bits separately
 
 class TestMergeSP3(TestCase):
     def setUp(self):

From 7241486cc9f85d47afb3d7281a71943bb360a1a8 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 7 Jan 2025 06:30:19 +0000
Subject: [PATCH 6/8] NPI-3669 added docstrings based on PR comments, added
 more notes for future work, added check for incompatible arg combination to
 trim_df()

---
 gnssanalysis/gn_io/sp3.py               | 35 ++++++++++++++++++++++---
 gnssanalysis/test_file_creation_util.py |  2 +-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index bc3542e..3c116eb 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -639,6 +639,8 @@ def getVelPoly(sp3Df: _pd.DataFrame, deg: int = 35) -> _pd.DataFrame:
 def gen_sp3_header(sp3_df: _pd.DataFrame) -> str:
     """
     Generate the header for an SP3 file based on the given DataFrame.
+    NOTE: much of the header information is drawn from the DataFrame attrs structure. If this has not been
+    updated as the DataFrame has been transformed, the header will not reflect the data.
 
     :param pandas.DataFrame sp3_df: The DataFrame containing the SP3 data.
     :return str: The generated SP3 header as a string.
@@ -724,6 +726,8 @@ def gen_sp3_content(
     # Rather than:
     # PG01... X Y Z CLK ... VX VY VZ ...
     # ?
+    # TODO raise warnings if VEL columns are still present, and drop them before writing out, to ensure we remain
+    # compliant with the spec.
 
     out_buf = buf if buf is not None else _io.StringIO()
     if sort_outputs:
@@ -953,7 +957,7 @@ def sp3merge(
     :param Union[List[str], None] clkpaths: The list of paths to the clk files, or None if no clk files are provided.
     :param bool nodata_to_nan: Flag indicating whether to convert nodata values to NaN.
 
-    :return pd.DataFrame: The merged sp3 DataFrame.
+    :return DataFrame: The merged sp3 DataFrame.
     """
     sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan) for sp3_file in sp3paths]
     # Create a new attrs dictionary to be used for the output DataFrame
@@ -971,7 +975,13 @@ def sp3merge(
 
 def transform_sp3(src_sp3: str, dest_sp3: str, transform_fn, *args, **kwargs):
     """
-    Apply a transformation to an sp3 file
+    Apply a transformation to an sp3 file, by reading the file from the given path, applying the supplied
+    transformation function and args, and writing out a new file to the path given.
+
+    :param str src_sp3: Path of the source SP3 file to read in.
+    :param str dest_sp3: Path to write out the new SP3 file to.
+    :param callable transform_fn: The transformation function to apply to the SP3 data once loaded. *args
+        and **kwargs following, are passed to this function.
     """
     logger.info(f"Reading file: " + str(src_sp3))
     sp3_df = read_sp3(src_sp3)
@@ -987,6 +997,14 @@ def trim_df(
 ):
     """
     Trim data from the start and end of an sp3 dataframe
+
+    :param DataFrame sp3_df: The input SP3 DataFrame.
+    :param timedelta trim_start: Amount of time to trim off the start of the dataframe.
+    :param timedelta trim_end: Amount of time to trim off the end of the dataframe.
+    :param Optional[timedelta] keep_first_delta_amount: If supplied, trim the dataframe to this length. Not
+        compatible with trim_start and trim_end.
+    :return DataFrame: Dataframe trimmed to the requested time range, or requested initial amount
+
     """
     time_axis = sp3_df.index.get_level_values(0)
     # Work out the new time range that we care about
@@ -999,6 +1017,8 @@ def trim_df(
     if keep_first_delta_amount:
         first_keep_time = first_time
         last_keep_time = first_time + keep_first_delta_amount.total_seconds()
+        if trim_start.total_seconds() != 0 or trim_end.total_seconds() != 0:
+            raise ValueError("keep_first_delta_amount option is not compatible with start/end time options")
 
     # Slice to the subset that we actually care about
     trimmed_df = sp3_df.loc[first_keep_time:last_keep_time]
@@ -1014,6 +1034,13 @@ def trim_to_first_n_epochs(
 ) -> _pd.DataFrame:
     """
     Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate
+
+    :param DataFrame sp3_df: The input SP3 DataFrame.
+    :param int epoch_count: Trim to this many epochs from start of SP3 data (i.e. first n epochs).
+    :param Optional[str] sp3_filename: Name of SP3 file, just used to derive sample_rate.
+    :param Optional[timedelta] sp3_sample_rate: Sample rate of the SP3 data. Alternatively this can be
+        derived from a filename.
+    :return DataFrame: Dataframe trimmed to the requested number of epochs.
     """
     sample_rate = sp3_sample_rate
     if not sample_rate:
@@ -1037,7 +1064,7 @@ def sp3_hlm_trans(
      :param DataFrame a: The sp3_a DataFrame.
      :param DataFrame b : The sp3_b DataFrame.
 
-    :returntuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals.
+    :return tuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals.
     """
     hlm = _gn_transform.get_helmert7(pt1=a.EST[["X", "Y", "Z"]].values, pt2=b.EST[["X", "Y", "Z"]].values)
     b.iloc[:, :3] = _gn_transform.transform7(xyz_in=b.EST[["X", "Y", "Z"]].values, hlm_params=hlm[0])
@@ -1064,7 +1091,7 @@ def diff_sp3_rac(
     :param bool use_offline_sat_removal: Flag indicating whether to remove satellites which are offline / have some
            nodata position values. Caution: ensure you turn this on if using cubic spline interpolation with data
            which may have holes in it (nodata).
-    :return: The DataFrame containing the difference in RAC coordinates.
+    :return DataFrame: The DataFrame containing the difference in RAC coordinates.
     """
     hlm_modes = [None, "ECF", "ECI"]
     if hlm_mode not in hlm_modes:
diff --git a/gnssanalysis/test_file_creation_util.py b/gnssanalysis/test_file_creation_util.py
index 6736dfd..e7744db 100644
--- a/gnssanalysis/test_file_creation_util.py
+++ b/gnssanalysis/test_file_creation_util.py
@@ -22,7 +22,7 @@
 
 drop_offline_sats: bool = False
 
-####
+#### End configuration ####
 
 
 filename = src_path.rsplit("/")[-1]

From a15cd428c897c33e64b36e3fcf635c34ac814963 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 7 Jan 2025 06:36:03 +0000
Subject: [PATCH 7/8] NPI-3669 cleanup of redundent comment

---
 tests/test_sp3.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index e357519..e15897c 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -288,12 +288,6 @@ def test_trim_df(self, mock_file):
             "Should be two epochs after trimming with keep_first_delta_amount parameter",
         )
 
-    # Don't bother trying to unit test transform_df(). It is a wrapper which basically does:
-    # - read (parse)
-    # - apply function
-    # - write (generate content)
-    # TODO notes added above to implement those bits separately
-
 
 class TestMergeSP3(TestCase):
     def setUp(self):

From 0945e2655ea90845dd332205bda9d56f318af3bc Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Wed, 8 Jan 2025 03:28:20 +0000
Subject: [PATCH 8/8] NPI-3669 update docstrings in sp3.py for consistency, in
 response to PR comments - similar fixes to parallel pr

---
 gnssanalysis/gn_io/sp3.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 3c116eb..9d678cf 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -577,8 +577,8 @@ def parse_sp3_header(header: bytes, warn_on_negative_sv_acc_values: bool = True)
 def getVelSpline(sp3Df: _pd.DataFrame) -> _pd.DataFrame:
     """Returns the velocity spline of the input dataframe.
 
-    :param DataFrame sp3Df: The input dataframe containing position data.
-    :return DataFrame: The dataframe containing the velocity spline.
+    :param _pd.DataFrame sp3Df: The input dataframe containing position data.
+    :return _pd.DataFrame: The dataframe containing the velocity spline.
 
     :caution :This function cannot handle *any* NaN / nodata / non-finite position values. By contrast, getVelPoly()
               is more forgiving, but accuracy of results, particulary in the presence of NaNs, has not been assessed.
@@ -599,9 +599,9 @@ def getVelPoly(sp3Df: _pd.DataFrame, deg: int = 35) -> _pd.DataFrame:
     """
     Interpolates the positions for -1s and +1s in the sp3_df DataFrame and outputs velocities.
 
-    :param DataFrame sp3Df: A pandas DataFrame containing the sp3 data.
+    :param _pd.DataFrame sp3Df: A pandas DataFrame containing the sp3 data.
     :param int deg: Degree of the polynomial fit. Default is 35.
-    :return DataFrame: A pandas DataFrame with the interpolated velocities added as a new column.
+    :return _pd.DataFrame: A pandas DataFrame with the interpolated velocities added as a new column.
 
     """
     est = sp3Df.unstack(1).EST[["X", "Y", "Z"]]
@@ -957,7 +957,7 @@ def sp3merge(
     :param Union[List[str], None] clkpaths: The list of paths to the clk files, or None if no clk files are provided.
     :param bool nodata_to_nan: Flag indicating whether to convert nodata values to NaN.
 
-    :return DataFrame: The merged sp3 DataFrame.
+    :return _pd.DataFrame: The merged sp3 DataFrame.
     """
     sp3_dfs = [read_sp3(sp3_file, nodata_to_nan=nodata_to_nan) for sp3_file in sp3paths]
     # Create a new attrs dictionary to be used for the output DataFrame
@@ -998,12 +998,12 @@ def trim_df(
     """
     Trim data from the start and end of an sp3 dataframe
 
-    :param DataFrame sp3_df: The input SP3 DataFrame.
+    :param _pd.DataFrame sp3_df: The input SP3 DataFrame.
     :param timedelta trim_start: Amount of time to trim off the start of the dataframe.
     :param timedelta trim_end: Amount of time to trim off the end of the dataframe.
     :param Optional[timedelta] keep_first_delta_amount: If supplied, trim the dataframe to this length. Not
         compatible with trim_start and trim_end.
-    :return DataFrame: Dataframe trimmed to the requested time range, or requested initial amount
+    :return _pd.DataFrame: Dataframe trimmed to the requested time range, or requested initial amount
 
     """
     time_axis = sp3_df.index.get_level_values(0)
@@ -1035,12 +1035,12 @@ def trim_to_first_n_epochs(
     """
     Utility function to trim an SP3 dataframe to the first n epochs, given either the filename, or sample rate
 
-    :param DataFrame sp3_df: The input SP3 DataFrame.
+    :param _pd.DataFrame sp3_df: The input SP3 DataFrame.
     :param int epoch_count: Trim to this many epochs from start of SP3 data (i.e. first n epochs).
     :param Optional[str] sp3_filename: Name of SP3 file, just used to derive sample_rate.
     :param Optional[timedelta] sp3_sample_rate: Sample rate of the SP3 data. Alternatively this can be
         derived from a filename.
-    :return DataFrame: Dataframe trimmed to the requested number of epochs.
+    :return _pd.DataFrame: Dataframe trimmed to the requested number of epochs.
     """
     sample_rate = sp3_sample_rate
     if not sample_rate:
@@ -1061,8 +1061,8 @@ def sp3_hlm_trans(
     """
      Rotates sp3_b into sp3_a.
 
-     :param DataFrame a: The sp3_a DataFrame.
-     :param DataFrame b : The sp3_b DataFrame.
+     :param _pd.DataFrame a: The sp3_a DataFrame.
+     :param _pd.DataFrame b: The sp3_b DataFrame.
 
     :return tuple[pandas.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals.
     """
@@ -1082,8 +1082,8 @@ def diff_sp3_rac(
     """
     Computes the difference between the two sp3 files in the radial, along-track and cross-track coordinates.
 
-    :param DataFrame sp3_baseline: The baseline sp3 DataFrame.
-    :param DataFrame sp3_test: The test sp3 DataFrame.
+    :param _pd.DataFrame sp3_baseline: The baseline sp3 DataFrame.
+    :param _pd.DataFrame sp3_test: The test sp3 DataFrame.
     :param string hlm_mode: The mode for HLM transformation. Can be None, "ECF", or "ECI".
     :param bool use_cubic_spline: Flag indicating whether to use cubic spline for velocity computation. Caution: cubic
            spline interpolation does not tolerate NaN / nodata values. Consider enabling use_offline_sat_removal if
@@ -1091,7 +1091,7 @@ def diff_sp3_rac(
     :param bool use_offline_sat_removal: Flag indicating whether to remove satellites which are offline / have some
            nodata position values. Caution: ensure you turn this on if using cubic spline interpolation with data
            which may have holes in it (nodata).
-    :return DataFrame: The DataFrame containing the difference in RAC coordinates.
+    :return _pd.DataFrame: The DataFrame containing the difference in RAC coordinates.
     """
     hlm_modes = [None, "ECF", "ECI"]
     if hlm_mode not in hlm_modes: