From de7f6a6e66cd81df2e646452fe01ec1d5945d885 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 14 Jan 2025 11:59:31 +0000
Subject: [PATCH 01/19] SP3 comment preservation and output

---
 gnssanalysis/gn_io/sp3.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 12e07e9..a48b5ae 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -29,6 +29,15 @@
     _re.VERBOSE,
 )
 
+# SP3 comment line requirements, from SP3d spec:
+#   There are now an unlimited number of comment records allowed in the header, and the maximum length of each comment
+#   record has been increased from 60 characters to 80 characters.
+# And elsewhere:
+#   For backwards compatibility, there will always be at least 4 comment lines.
+
+# TODO: checks for adding a comment which is too long / reflow it.
+# TODO: option for appending comments.
+
 _RE_SP3_COMMENT_STRIP = _re.compile(rb"^(\/\*.*$\n)", _re.MULTILINE)
 # Regex to extract Satellite Vehicle (SV) names (E.g. G02). In SP3-d (2016) up to 999 satellites can be included).
 # Regex options/flags: multiline, findall. Updated to extract expected SV count too.
@@ -497,9 +506,11 @@ def read_sp3(
     content = _gn_io.common.path2bytes(sp3_path_or_bytes)  # Will raise EOFError if file empty
 
     # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
-    comments: list = _RE_SP3_COMMENT_STRIP.findall(content)
-    for comment in comments:
+    comment_lines_bytes: list[bytes] = _RE_SP3_COMMENT_STRIP.findall(content)
+    for comment in comment_lines_bytes:
         content = content.replace(comment, b"")  # Not in place?? Really?
+    # These will be written to DataFrame.attrs["COMMENTS"] for easy access
+    comment_lines: list[str] = [line.decode("utf-8", errors="ignore") for line in comment_lines_bytes]
     # Judging by the spec for SP3-d (2016), there should only be 2 '%i' lines in the file, and they should be
     # immediately followed by the mandatory 4+ comment lines.
     # It is unclear from the specification whether comment lines can appear anywhere else. For robustness we
@@ -613,6 +624,7 @@ def read_sp3(
         sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]
     # Write header data to dataframe attributes:
     sp3_df.attrs["HEADER"] = parsed_header
+    sp3_df.attrs["COMMENTS"] = comment_lines
     sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else ""
     return sp3_df
 
@@ -843,15 +855,19 @@ def get_unique_epochs(sp3_df: _pd.DataFrame) -> _pd.Index:
     return sp3_df.index.get_level_values(0).unique()
 
 
-def gen_sp3_header(sp3_df: _pd.DataFrame) -> str:
+def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False) -> str:
     """
     Generate the header for an SP3 file based on the given DataFrame.
     NOTE: much of the header information is drawn from the DataFrame attrs structure. If this has not been
     updated as the DataFrame has been transformed, the header will not reflect the data.
 
     :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data.
+    :param bool output_comment: Write the SP3 comment lines stored with the DataFrame, into the output. Off by default.
     :return str: The generated SP3 header as a string.
     """
+    if output_comments and not "COMMENTS" in sp3_df.attrs:
+        raise IndexError("SP3 comment output requested, but comment data was not found in DataFrame.attrs")
+
     sp3_j2000 = sp3_df.index.levels[0].values
     sp3_j2000_begin = sp3_j2000[0]
 
@@ -924,8 +940,14 @@ def gen_sp3_header(sp3_df: _pd.DataFrame) -> str:
         + ["%i    0    0    0    0      0      0      0      0         0\n"]
         + ["%i    0    0    0    0      0      0      0      0         0\n"]
     )
-
+    # Default comments, which meet the specification requirement for >= 4 lines.
     comment = ["/*\n"] * 4
+    if output_comments:
+        stored_comments = sp3_df.attrs["COMMENTS"]
+        if len(stored_comments) >= 4:  # Minimum number of comment lines by spec.
+            comment = stored_comments
+        else:
+            logger.warning("SP3 comment data output skipped: SP3 DataFrame passed in had <4 comment lines!")
 
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)
 

From 80c1c470dee3b50c83ba94ad8b0760bd80f27238 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 14 Jan 2025 12:03:19 +0000
Subject: [PATCH 02/19] sp3 unit tests for content generation functions

---
 gnssanalysis/gn_utils.py             | 12 +++++++++
 tests/test_datasets/sp3_test_data.py | 39 +++++++++++++++++++++++++++-
 tests/test_sp3.py                    | 28 ++++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index 7eba4a1..162d09b 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -871,3 +871,15 @@ def clkq(
             out_file.writelines(output_str)
     else:
         print(output_str)
+
+
+def trim_line_ends(content: str) -> str:
+    """
+    Utility to strip trailing whitespace from all lines given.
+    This is useful as for example, the SP3 spec doesn't stipulate whether lines should have trailing whitespace or not,
+    and implementations vary.
+
+    :param str content: input string to strip
+    :return str: string with trailing (only, not leading) whitespace removed from each line
+    """
+    return "\n".join([line.rstrip() for line in content.split("\n")])
diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py
index 9fc18c7..04629b4 100644
--- a/tests/test_datasets/sp3_test_data.py
+++ b/tests/test_datasets/sp3_test_data.py
@@ -248,8 +248,45 @@
 EOF
 """
 
+# For testing gen_sp3_header()
+sp3_test_data_short_cod_final_header = """#dP2024  7 19  0  0  0.00000000       2 d+D   IGS20 FIT AIUB
+## 2323 432000.00000000   300.00000000 60510 0.0000000000000
++    3   G01G02G03  0  0  0  0  0  0  0  0  0  0  0  0  0  0
++          0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
++          0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
++          0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
++          0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+++        10  4  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+++         0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+++         0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+++         0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+++         0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
+%c M  cc GPS ccc cccc cccc cccc cccc ccccc ccccc ccccc ccccc
+%c cc cc ccc ccc cccc cccc cccc cccc ccccc ccccc ccccc ccccc
+%f  1.2500000  1.025000000  0.00000000000  0.000000000000000
+%f  0.0000000  0.000000000  0.00000000000  0.000000000000000
+%i    0    0    0    0      0      0      0      0         0
+%i    0    0    0    0      0      0      0      0         0
+/* Center for Orbit Determination in Europe (CODE)                              
+/* Final GNSS orbits and clocks for year-day 2024-2010                          
+/* Middle day of a 3-day long-arc GRE solution                                  
+/* Product reference - DOI 10.48350/197025                                      
+/* PCV:IGS20      OL/AL:FES2014b NONE     YN ORB:CoN CLK:CoN                    
+"""
+
+# For testing gen_sp3_content()
+sp3_test_data_short_cod_final_content = """*  2024  7 19  0  0  0.00000000
+PG01   4510.358405 -23377.282442 -11792.723580    239.322216
+PG02 -19585.529427  -8704.823858  16358.028672   -396.375750
+PG03 -17580.234088   4691.573463  19141.243267    463.949579
+*  2024  7 19  0  5  0.00000000
+PG01   4796.934856 -23696.377197 -10979.751610    239.319708
+PG02 -19881.646388  -9206.366139  15702.571850   -396.373498
+PG03 -17231.990585   4028.826042  19602.838740    463.954491
+"""
+
 
-# Deliberately broken versions of the above
+# Deliberately broken versions of the above full file.
 
 # Issue: Inconsistent number of SVs (first epoch correct, then adds one)
 # Use filename: COD0OPSFIN_20242010000_10M_05M_ORB.SP3
diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index 0daa581..5e0ff3d 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -9,6 +9,7 @@
 from gnssanalysis.filenames import convert_nominal_span, determine_properties_from_filename
 import gnssanalysis.gn_io.sp3 as sp3
 
+from gnssanalysis.gn_utils import trim_line_ends
 from test_datasets.sp3_test_data import (
     # first dataset is part of the IGS benchmark (modified to include non null data on clock):
     sp3_test_data_igs_benchmark_null_clock as input_data,
@@ -21,6 +22,10 @@
     sp3_test_data_partially_offline_sat as offline_sat_test_data,
     # For header vs content validation tests:
     sp3_test_data_cod_broken_missing_sv_in_content,
+    # For testing generate_sp3_header() and generate_sp3_content()
+    sp3_test_data_short_cod_final,  # For use as input data
+    sp3_test_data_short_cod_final_content,  # For validating content output
+    sp3_test_data_short_cod_final_header,  # For validating header output
 )
 
 
@@ -140,6 +145,29 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file
         )
     # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.)
     # TODO add tests for correctly reading the actual content of the SP3 in addition to the header.
+
+    def test_gen_sp3_fundamentals(self):
+        """
+        Tests that the SP3 header and content generation functions produce output that (apart from trailing
+        whitespace), match a known good value.
+        NOTE: leverages read_sp3() to pull in sample data, so is prone to errors in that function.
+        """
+        # Note this is suboptimal from a testing standpoint, but for now is a lot easier than manually constructing
+        # the DataFrame.
+        sp3_df = sp3.read_sp3(bytes(sp3_test_data_short_cod_final))
+
+        generated_sp3_header = sp3.gen_sp3_header(sp3_df, output_comments=True)
+        generated_sp3_content = sp3.gen_sp3_content(sp3_df)
+
+        self.assertTrue(
+            trim_line_ends(generated_sp3_header) == trim_line_ends(sp3_test_data_short_cod_final_header),
+            "SP3 header should match baseline, including retention of original comments",
+        )
+        self.assertTrue(
+            trim_line_ends(generated_sp3_content) == trim_line_ends(sp3_test_data_short_cod_final_content),
+            "SP3 content should match baseline",
+        )
+
     # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header()
     # These tests should include:
     # - Correct alignment of POS, CLK, STDPOS STDCLK, (not velocity yet), FLAGS

From 642f62dc5e83b9fc9dc00112737e2a159408cd74 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Wed, 15 Jan 2025 07:29:04 +0000
Subject: [PATCH 03/19] NPI-3688 improve sp3 generation function unit test to
 check output line by line and display diffs of the incorrect lines

---
 tests/test_sp3.py | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index 5e0ff3d..b4423bd 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -152,6 +152,11 @@ def test_gen_sp3_fundamentals(self):
         whitespace), match a known good value.
         NOTE: leverages read_sp3() to pull in sample data, so is prone to errors in that function.
         """
+
+        # Prep the baseline data to test against, including stripping each line of trailing whitespace.
+        baseline_header_lines = trim_line_ends(sp3_test_data_short_cod_final_header).splitlines()
+        baseline_content_lines = trim_line_ends(sp3_test_data_short_cod_final_content).splitlines()
+
         # Note this is suboptimal from a testing standpoint, but for now is a lot easier than manually constructing
         # the DataFrame.
         sp3_df = sp3.read_sp3(bytes(sp3_test_data_short_cod_final))
@@ -159,15 +164,38 @@ def test_gen_sp3_fundamentals(self):
         generated_sp3_header = sp3.gen_sp3_header(sp3_df, output_comments=True)
         generated_sp3_content = sp3.gen_sp3_content(sp3_df)
 
-        self.assertTrue(
-            trim_line_ends(generated_sp3_header) == trim_line_ends(sp3_test_data_short_cod_final_header),
-            "SP3 header should match baseline, including retention of original comments",
+        # As with the baseline data, prep the data under test, for comparison.
+        test_header_lines = trim_line_ends(generated_sp3_header).splitlines()
+        test_content_lines = trim_line_ends(generated_sp3_content).splitlines()
+
+        # TODO maybe we don't want to split the content, just the header
+
+        self.assertEqual(
+            len(baseline_header_lines),
+            len(test_header_lines),
+            "Baseline and test header should have same number of lines",
         )
-        self.assertTrue(
-            trim_line_ends(generated_sp3_content) == trim_line_ends(sp3_test_data_short_cod_final_content),
-            "SP3 content should match baseline",
+        self.assertEqual(
+            len(baseline_content_lines),
+            len(test_content_lines),
+            "Baseline and test content should have same number of lines",
         )
 
+        # As we know the two arrays are equal length, we can iterate as one
+        # Header first
+        for i in range(0, len(baseline_header_lines) - 1):
+            self.assertEqual(
+                baseline_header_lines[i],
+                test_header_lines[i],
+                f"Header line {i} didn't match",
+            )
+        # Same for content (maybe don't do this?)
+        for i in range(0, len(baseline_content_lines) - 1):
+            self.assertEqual(
+                baseline_content_lines[i],
+                test_content_lines[i],
+                f"Content line {i} didn't match",
+            )
     # TODO add tests for correctly generating sp3 output content with gen_sp3_content() and gen_sp3_header()
     # These tests should include:
     # - Correct alignment of POS, CLK, STDPOS STDCLK, (not velocity yet), FLAGS

From 291838744b3865a53a5ce3bcfb9845060d65d047 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 06:33:29 +0000
Subject: [PATCH 04/19] NPI-3688 improve sp3 comment output functionality to
 auto-pad, ensuring at least the minimum number of comment lines are output.
 Also change minimal comment to fit the spec, which includes an extra space.

---
 gnssanalysis/gn_io/sp3.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index a48b5ae..7104330 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -855,7 +855,7 @@ def get_unique_epochs(sp3_df: _pd.DataFrame) -> _pd.Index:
     return sp3_df.index.get_level_values(0).unique()
 
 
-def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False) -> str:
+def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_mode: bool = False) -> str:
     """
     Generate the header for an SP3 file based on the given DataFrame.
     NOTE: much of the header information is drawn from the DataFrame attrs structure. If this has not been
@@ -863,6 +863,7 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False) -> str:
 
     :param _pd.DataFrame sp3_df: The DataFrame containing the SP3 data.
     :param bool output_comment: Write the SP3 comment lines stored with the DataFrame, into the output. Off by default.
+    :param bool strict_mode: More strictly enforce SP3 specification rules (e.g. comments must have a leading space)
     :return str: The generated SP3 header as a string.
     """
     if output_comments and not "COMMENTS" in sp3_df.attrs:
@@ -941,15 +942,30 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False) -> str:
         + ["%i    0    0    0    0      0      0      0      0         0\n"]
     )
     # Default comments, which meet the specification requirement for >= 4 lines.
-    comment = ["/*\n"] * 4
+    sp3_comment_lines = ["/* \n"] * 4
     if output_comments:
-        stored_comments = sp3_df.attrs["COMMENTS"]
-        if len(stored_comments) >= 4:  # Minimum number of comment lines by spec.
-            comment = stored_comments
-        else:
-            logger.warning("SP3 comment data output skipped: SP3 DataFrame passed in had <4 comment lines!")
-
-    return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)
+        sp3_comment_lines = sp3_df.attrs["COMMENTS"]  # Use actual comments from the DataFrame, not placeholders
+
+        # Validate the lines aren't too long. And in strict mode, check comment data (if present) starts at >= column 4
+        for line in sp3_comment_lines:
+            if len(line) > 80:  # Limit in SP3d (in SP3c the max is 60, but we don't output anything older than SP3d)
+                raise ValueError(f"SP3 comment line found that was longer than 80 chars! '{line}'")
+            if strict_mode:
+                # All comment lines must begin with "/* ", whether there is further comment data there or not.
+                if len(line) < 3 or line[:2] != "/* ":  # Line too short, or first three chars weren't to spec
+                    raise ValueError(
+                        f"SP3 comment line didn't begin with '/* ' (including leading space) '{line}'. "
+                        "Turn off strict mode to skip this check"
+                    )
+
+        # Ensure >= 4 comment lines total
+        # The spec states that there must be at least 4 comment lines. Make up the difference if we are short.
+        if (short_by_lines := 4 - len(sp3_comment_lines)) > 0:
+            sp3_comment_lines.extend(["/* \n"] * short_by_lines)
+
+            # TODO check the above this appends lines, not an array of lines - write test for this.
+
+    return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + sp3_comment_lines)
 
 
 # Option 1: don't provide a buffer, the data will be returned as a string

From a5ca5fdbe93ab227c5ffe09bb835d3b911a02235 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 06:34:25 +0000
Subject: [PATCH 05/19] NPI-3688 update sp3 header generator to set format
 version to d if not already set, as this is the version it outputs

---
 gnssanalysis/gn_io/sp3.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 7104330..c559fee 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -876,6 +876,13 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
     head = header.HEAD
     sv_tbl = header.SV_INFO
 
+    if head.VERSION != "d":
+        logger.warning(
+            f"Stored SP3 header indicates version '{head.VERSION}'. Changing to version 'd' for "
+            "write-out given that's the version this implementation is designed to create"
+        )
+        head.VERSION = "d"
+
     # need to update DATETIME outside before writing
     line1 = [
         f"#{head.VERSION}{head.PV_FLAG}{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-2]}"

From ef297227ca4f8f9d0fb596426e900de92bcb6508 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 06:35:00 +0000
Subject: [PATCH 06/19] NPI-3688 add constants for SP3 comment start sequence
 and max length

---
 gnssanalysis/gn_io/sp3.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index c559fee..3d51105 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -171,6 +171,11 @@
 SP3_POS_STD_NODATA_STRING = "  "
 
 
+# Other SP3 constants
+SP3_COMMENT_START: str = "/* "
+SP3_COMMENT_MAX_LENGTH: int = 80
+
+
 def sp3_pos_nodata_to_nan(sp3_df: _pd.DataFrame) -> None:
     """
     Converts the SP3 Positional column's nodata values (0.000000) to NaNs.

From 98e2759f6a2b18f8512270a786b36f6cedfa7728 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 06:36:33 +0000
Subject: [PATCH 07/19] NPI-3688 add checks for SP3 format version, and start
 of an optional strict mode

---
 gnssanalysis/gn_io/sp3.py | 41 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 3d51105..4bc1a48 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -468,6 +468,43 @@ def check_epoch_counts_for_discrepancies(
     # All good, validation passed.
 
 
+def check_sp3_version(sp3_bytes: bytes, strict: bool = False) -> bool:
+    """
+    Reads the the SP3 version character from the provided SP3 data as bytes, and raises / warns if this
+    implementation does not support it.
+    The version character is specified as the second character in the file/header.
+    :param bytes sp3_bytes: The raw SP3 data to check.
+    :param bool strict: raise ValueError for all versions we don't actively support. Otherwise for possibly compatible
+        versions, a warning is logged and parsing is still attempted.
+    :raises ValueError: if the SP3 data is of a version we cannot parse (e.g. a, or > d)
+    :return bool: True if this version is actively supported (currently only version 'd'), False otherwise.
+    """
+
+    # Check the SP3 data we are reading is a version we support (currenly only SP3d is actively used, and support is
+    # not complete).
+    version_char = sp3_bytes[1:2]
+
+    if version_char == b"d":  # Version d, ~2016. This is the version we actively support.
+        return True
+    elif version_char > b"d":  # Too new. Bail out, we don't know what has changed.
+        raise ValueError(
+            f"SP3 file version '{version_char}' is too new! We support version 'd', and can potentially read (untested) version 'c' and 'b'"
+        )
+    elif version_char in (b"c", b"b"):  # Tentative, may not work properly.
+        if strict:
+            raise ValueError(
+                f"Support for SP3 file version '{version_char}' is untested. Refusing to read as strict mode is on."
+            )
+        logger.warning(f"Reading an older SP3 file version '{version_char}'. This may not parse correctly!")
+        return False
+    elif version_char == b"a":  # First version doesn't have constellation letters (e.g. G01, R01) as it is GPS only.
+        raise ValueError(
+            f"SP3 file version 'a' (the original version) is unsupported. We support version 'd', with partial (possible) support for 'c' and 'b'"
+        )
+    else:
+        raise ValueError(f"Failure determining SP3 data version. Got '{version_char}")
+
+
 def read_sp3(
     sp3_path_or_bytes: Union[str, Path, bytes],
     pOnly: bool = True,
@@ -478,6 +515,7 @@ def read_sp3(
     # These only apply when the above is enabled:
     skip_filename_in_discrepancy_check: bool = False,
     continue_on_discrepancies: bool = False,
+    enforce_strict_format_compliance: bool = False,  # TODO raise exceptions if anything is not to spec, even if it wouldn't necessarily break processing. E.g. min number of SV entries, min number of comments, comments having a leading space, etc.
 ) -> _pd.DataFrame:
     """Reads an SP3 file and returns the data as a pandas DataFrame.
 
@@ -510,6 +548,9 @@ def read_sp3(
     """
     content = _gn_io.common.path2bytes(sp3_path_or_bytes)  # Will raise EOFError if file empty
 
+    # Extract and check version. Raises exception for completely unsupported versions, logs warning for version b and c.
+    check_sp3_version(content, strict=enforce_strict_format_compliance)
+
     # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
     comment_lines_bytes: list[bytes] = _RE_SP3_COMMENT_STRIP.findall(content)
     for comment in comment_lines_bytes:

From bf36200c1195682228932ca777644816c547e756 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 07:16:46 +0000
Subject: [PATCH 08/19] NPI-3688 update SP3 comment handling to store and work
 with comment lines without newlines on them, but add these back in just
 before concatenating header output

---
 gnssanalysis/gn_io/sp3.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 4bc1a48..735a946 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -556,7 +556,7 @@ def read_sp3(
     for comment in comment_lines_bytes:
         content = content.replace(comment, b"")  # Not in place?? Really?
     # These will be written to DataFrame.attrs["COMMENTS"] for easy access
-    comment_lines: list[str] = [line.decode("utf-8", errors="ignore") for line in comment_lines_bytes]
+    comment_lines: list[str] = [line.decode("utf-8", errors="ignore").rstrip("\n") for line in comment_lines_bytes]
     # Judging by the spec for SP3-d (2016), there should only be 2 '%i' lines in the file, and they should be
     # immediately followed by the mandatory 4+ comment lines.
     # It is unclear from the specification whether comment lines can appear anywhere else. For robustness we
@@ -995,7 +995,7 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         + ["%i    0    0    0    0      0      0      0      0         0\n"]
     )
     # Default comments, which meet the specification requirement for >= 4 lines.
-    sp3_comment_lines = ["/* \n"] * 4
+    sp3_comment_lines = [SP3_COMMENT_START] * 4
     if output_comments:
         sp3_comment_lines = sp3_df.attrs["COMMENTS"]  # Use actual comments from the DataFrame, not placeholders
 
@@ -1014,10 +1014,12 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         # Ensure >= 4 comment lines total
         # The spec states that there must be at least 4 comment lines. Make up the difference if we are short.
         if (short_by_lines := 4 - len(sp3_comment_lines)) > 0:
-            sp3_comment_lines.extend(["/* \n"] * short_by_lines)
+            sp3_comment_lines.extend([SP3_COMMENT_START] * short_by_lines)
 
             # TODO check the above this appends lines, not an array of lines - write test for this.
 
+    # Put the newlines back on the end of each comment line, before merging into the output header
+    sp3_comment_lines = [line + "\n" for line in sp3_comment_lines]
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + sp3_comment_lines)
 
 

From 8c3ba8366cf9c1f6358b5fed2b1f58cde6ffbe53 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 16 Jan 2025 07:17:51 +0000
Subject: [PATCH 09/19] NPI-3688 initial attempt at fixing SP3 header line 1
 alignments, needs formal review

---
 gnssanalysis/gn_io/sp3.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 735a946..b7ee131 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -929,11 +929,20 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         )
         head.VERSION = "d"
 
+    # NOTE: formatting of the start epoch below, uses the utility function j20002rnxdt() which appears to have been
+    # designed for printing the epoch headers within the SP3 content. As such it starts with extra chars ie '*  2024'
+    # and ends with a newline.
+
     # need to update DATETIME outside before writing
     line1 = [
-        f"#{head.VERSION}{head.PV_FLAG}{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-2]}"
-        + f"{sp3_j2000.shape[0]:>9}{head.DATA_USED:>6}"
-        + f"{head.COORD_SYS:>6}{head.ORB_TYPE:>4}{head.AC:>5}\n"
+        f"#{head.VERSION}"  # E.g. '#d'
+        + f"{head.PV_FLAG}"  # E.g. 'P' or 'V'
+        + f"{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-1]}"  # E.g. '2024  7 19  0  0  0.00000000' # TODO check. Was indexed with [3:-2] before
+        + f"{sp3_j2000.shape[0]:>8}"  # Number of epochs E.g. '2'. TODO check what appropriate padding is. Was set to >9
+        + f"{head.DATA_USED:>4}"  # E.g. 'd+D' ? # TODO confirm intended padding. Was >6
+        + f"{head.COORD_SYS:>8}"  # E.g. 'IGS20' # TODO check. Was padded as >6
+        + f"{head.ORB_TYPE:>4}"  # E.g. 'FIT'
+        + f"{head.AC:>5}\n"  # E.g. 'GAA' or 'AIUB', etc.
     ]
 
     gpsweek, gpssec = _gn_datetime.datetime2gpsweeksec(sp3_j2000_begin)

From 1a00f62fb318dd13059a121fee1b4596e7d6cfe0 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Fri, 17 Jan 2025 06:53:41 +0000
Subject: [PATCH 10/19] NPI-3688 Fix field size and padding in gen_sp3_header()
 to make the first header line specification compliant. Update SP3 test data
 to match the SP3d spec regarding alignment of the Data Used field.

---
 gnssanalysis/gn_io/sp3.py            | 36 +++++++++++++++++++++-------
 tests/test_datasets/sp3_test_data.py |  8 +++++--
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index b7ee131..0b481b9 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -935,14 +935,34 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
 
     # need to update DATETIME outside before writing
     line1 = [
-        f"#{head.VERSION}"  # E.g. '#d'
-        + f"{head.PV_FLAG}"  # E.g. 'P' or 'V'
-        + f"{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-1]}"  # E.g. '2024  7 19  0  0  0.00000000' # TODO check. Was indexed with [3:-2] before
-        + f"{sp3_j2000.shape[0]:>8}"  # Number of epochs E.g. '2'. TODO check what appropriate padding is. Was set to >9
-        + f"{head.DATA_USED:>4}"  # E.g. 'd+D' ? # TODO confirm intended padding. Was >6
-        + f"{head.COORD_SYS:>8}"  # E.g. 'IGS20' # TODO check. Was padded as >6
-        + f"{head.ORB_TYPE:>4}"  # E.g. 'FIT'
-        + f"{head.AC:>5}\n"  # E.g. 'GAA' or 'AIUB', etc.
+        f"#{head.VERSION}"  # Col 1-2: Version symbol, A2. E.g. '#d'
+        + f"{head.PV_FLAG}"  # Col 3: Pos or Vel flag, A1. E.g. 'P' or 'V'
+        # Covers col 4-31 (start time in: year, month, day of month, hour, minute, second), various formats.
+        # Widths are: 4, 2, 2, 2, 2, f11.8 (8 digits after decimal point)
+        # Combined output E.g. '2024  7 19  0  0  0.00000000'
+        + f"{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-1]}"
+        + " "  # Col 32: Unused / space
+        + f"{sp3_j2000.shape[0]:>7}"  # Col 33-39: Num epochs, I7. E.g '_____96'
+        + " "  # Col 40
+        # Col 41-45: Data used, A5. E.g. 'ORBIT' or '__u+U', etc.
+        # SP3d spec page 14 says:
+        # - 'The data used descriptor was included for ease in distinguishing between multiple orbital solutions
+        #    from a single organization.'
+        # - 'A possible convention is given below; this is not considered final and suggestions are welcome.'
+        # - 'Combinations such as "__u+U" seem reasonable.'
+        # Given this, we infer that left padding i.e. right alignment is expected, but that this might not be strict.
+        # CODE (EU) appears to use left alignment e.g. 'd+D  '.
+        + f"{head.DATA_USED:>5}"
+        + " "  # Col 46
+        + f"{head.COORD_SYS:>5}"  # Col 47-51: Coordinate Sys, A5. E.g. 'WGS84' or 'IGS20'(?)
+        + " "  # Col 52
+        + f"{head.ORB_TYPE:>3}"  # Col 53-55: Orbit Type, A3. E.g. 'BCT' 'FIT'(?), etc. TODO what is BCT? Is that a valid type?
+        + " "  # Col 56
+        + f"{head.AC:>4}"  # Col 57-60: Agency, A4. E.g. 'MGEX', 'GAA', 'AIUB', etc.
+        + "\n"
+        # We add a newline here because currently all header content has trailing newlines, rather than being stored
+        # without them, and having them added at the end when joining together and outputting as one string.
+        # TODO that might be a nicer approach.
     ]
 
     gpsweek, gpssec = _gn_datetime.datetime2gpsweeksec(sp3_j2000_begin)
diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py
index 04629b4..0487f80 100644
--- a/tests/test_datasets/sp3_test_data.py
+++ b/tests/test_datasets/sp3_test_data.py
@@ -248,8 +248,12 @@
 EOF
 """
 
-# For testing gen_sp3_header()
-sp3_test_data_short_cod_final_header = """#dP2024  7 19  0  0  0.00000000       2 d+D   IGS20 FIT AIUB
+# Separate header and content versions of the above.
+# For testing gen_sp3_header():
+# NOTE: CODE (EU) appears to output their 'data used' value with right padding 'd+D  ' rather than left padding '  d+D'.
+# Left pad seems to be suggested for 'data used' by SP3d spec, though rules for this column in general seem less strict.
+# Therefore for the purpose of testing our header generator, this example has been *modified* to left pad 'data used'.
+sp3_test_data_short_cod_final_header = """#dP2024  7 19  0  0  0.00000000       2   d+D IGS20 FIT AIUB
 ## 2323 432000.00000000   300.00000000 60510 0.0000000000000
 +    3   G01G02G03  0  0  0  0  0  0  0  0  0  0  0  0  0  0
 +          0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0

From a5e49f5c93547e7c033b9c50ed832bc893c90eac Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Fri, 17 Jan 2025 07:40:59 +0000
Subject: [PATCH 11/19] NPI-3688 deprecated j20002rnxdt and refactored into
 three functions to make things more general. Added specific wrapper function
 for SP3 header datetime formatting.

---
 gnssanalysis/gn_datetime.py | 67 ++++++++++++++++++++++++++++++++++++-
 gnssanalysis/gn_io/sp3.py   |  2 +-
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/gnssanalysis/gn_datetime.py b/gnssanalysis/gn_datetime.py
index 7d73f24..88b0489 100644
--- a/gnssanalysis/gn_datetime.py
+++ b/gnssanalysis/gn_datetime.py
@@ -1,5 +1,7 @@
 """Base time conversion functions"""
 
+import logging
+
 from datetime import datetime as _datetime
 from datetime import timedelta as _timedelta
 from io import StringIO as _StringIO
@@ -11,6 +13,9 @@
 from . import gn_const as _gn_const
 
 
+logger = logging.getLogger(__name__)
+
+
 def gpsweekD(yr, doy, wkday_suff=False):
     """
     Convert year, day-of-year to GPS week format: WWWWD or WWWW
@@ -270,11 +275,71 @@ def mjd2j2000(mjd: _np.ndarray, seconds_frac: _np.ndarray, pea_partials=False) -
     return datetime2j2000(datetime)
 
 
+def j2000_to_igs_dt(j2000_secs: _np.ndarray) -> _np.ndarray:
+    """
+    Converts array of j2000 times to format string representation used by many IGS formats including Rinex and SP3.
+    E.g. 674913600 -> '2021-05-22T00:00:00' -> '2021  5 22  0  0 0.00000000'
+    :param _np.ndarray j2000_secs: Numpy NDArray of (typically epoch) times in J2000 seconds.
+    :return _np.ndarray: Numpy NDArray with those same times as strings.
+    """
+    datetime = j20002datetime(j2000_secs)
+    year = datetime.astype("datetime64[Y]")
+    month = datetime.astype("datetime64[M]")
+    day = datetime.astype("datetime64[D]")
+    hour = datetime.astype("datetime64[h]")
+    minute = datetime.astype("datetime64[m]")
+
+    date_y = _pd.Series(year.astype(str)).str.rjust(6).values
+    date_m = _pd.Series(((month - year).astype("int64") + 1).astype(str)).str.rjust(3).values
+    date_d = _pd.Series(((day - month).astype("int64") + 1).astype(str)).str.rjust(3).values
+
+    time_h = _pd.Series((hour - day).astype("int64").astype(str)).str.rjust(3).values
+    time_m = _pd.Series((minute - hour).astype("int64").astype(str)).str.rjust(3).values
+    # Width 12 due to one extra leading space (for easier concatenation next), then _0.00000000 format per SP3d spec:
+    time_s = (_pd.Series((datetime - minute)).view("int64") / 1e9).apply("{:.8f}".format).str.rjust(12).values
+    return date_y + date_m + date_d + time_h + time_m + time_s
+
+
+def j2000_to_igs_epoch_row_header_dt(j2000_secs: _np.ndarray) -> _np.ndarray:
+    """
+    Utility wrapper function to format J2000 time values (typically epoch values) to be written as epoch header lines
+    within the body of SP3, Rinex, etc. files.
+    E.g. 674913600 -> '2021-05-22T00:00:00' -> '*  2021  5 22  0  0 0.00000000\n'
+    :param _np.ndarray j2000_secs: Numpy NDArray of (typically epoch) times in J2000 seconds.
+    :return _np.ndarray: Numpy NDArray with those same times as strings, including epoch line lead-in and newline.
+    """
+    # Add leading "*  "s and trailing newlines around all values
+    return "*  " + j2000_to_igs_dt(j2000_secs) + "\n"
+
+
+def j2000_to_sp3_head_dt(j2000secs: _np.ndarray) -> _np.ndarray:
+    """
+    Utility wrapper function to format a J2000 time value for the SP3 header. Takes NDArray, but only expects one value
+    in it.
+    :param _np.ndarray j2000_secs: Numpy NDArray of (typically epoch) time(s) in J2000 seconds.
+    :return _np.ndarray: Numpy NDArray with those same times as strings.
+    """
+    formatted_times = j2000_to_igs_dt(j2000secs)
+
+    # If making a header there should be one value. If not it's a mistake, or at best inefficient.
+    if len(formatted_times) != 1:
+        logger.warning(
+            "More than one time value passed through. This function is meant to be used to format a single value "
+            "in the SP3 header. Check for mistakes."
+        )
+    return formatted_times[0]
+
+
+# TODO DEPRECATED.
 def j20002rnxdt(j2000secs: _np.ndarray) -> _np.ndarray:
     """
-    Converts j2000 array to rinex format string representation
+    DEPRECATED since about version 0.0.58
+    TODO remove in version 0.0.59
+    Converts array of j2000 times to rinex format string representation
     674913600 -> '2021-05-22T00:00:00' -> '*  2021  5 22  0  0 0.00000000\n'
     """
+    logger.warning("j20002rnxdt() is deprecated. Please use j2000_to_igs_epoch_row_header_dt() instead.")
+
     datetime = j20002datetime(j2000secs)
     year = datetime.astype("datetime64[Y]")
     month = datetime.astype("datetime64[M]")
diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 0b481b9..322b274 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -940,7 +940,7 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         # Covers col 4-31 (start time in: year, month, day of month, hour, minute, second), various formats.
         # Widths are: 4, 2, 2, 2, 2, f11.8 (8 digits after decimal point)
         # Combined output E.g. '2024  7 19  0  0  0.00000000'
-        + f"{_gn_datetime.j20002rnxdt(sp3_j2000_begin)[0][3:-1]}"
+        + f"{_gn_datetime.j2000_to_sp3_head_dt(sp3_j2000_begin)}"
         + " "  # Col 32: Unused / space
         + f"{sp3_j2000.shape[0]:>7}"  # Col 33-39: Num epochs, I7. E.g '_____96'
         + " "  # Col 40

From c3a90a11a8d7509639076cf74de3b2c58f851489 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Fri, 17 Jan 2025 07:50:41 +0000
Subject: [PATCH 12/19] NPI-3688 fix realignment issue in date formatting (year
 had been padded to 6 chars as a way of adding leading whitespace, which was
 confusing)

---
 gnssanalysis/gn_datetime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnssanalysis/gn_datetime.py b/gnssanalysis/gn_datetime.py
index 88b0489..a78a9b0 100644
--- a/gnssanalysis/gn_datetime.py
+++ b/gnssanalysis/gn_datetime.py
@@ -289,7 +289,7 @@ def j2000_to_igs_dt(j2000_secs: _np.ndarray) -> _np.ndarray:
     hour = datetime.astype("datetime64[h]")
     minute = datetime.astype("datetime64[m]")
 
-    date_y = _pd.Series(year.astype(str)).str.rjust(6).values
+    date_y = _pd.Series(year.astype(str)).str.rjust(4).values
     date_m = _pd.Series(((month - year).astype("int64") + 1).astype(str)).str.rjust(3).values
     date_d = _pd.Series(((day - month).astype("int64") + 1).astype(str)).str.rjust(3).values
 

From d93fb073496e37dbbe40b7939a09f05a904e6936 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 21 Jan 2025 04:15:24 +0000
Subject: [PATCH 13/19] NPI-3688 sp3 comment handling utilities

---
 gnssanalysis/gn_io/sp3.py | 118 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 322b274..5b1e11d 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -211,7 +211,123 @@ def sp3_clock_nodata_to_nan(sp3_df: _pd.DataFrame) -> None:
     sp3_df.loc[nan_mask, ("EST", "CLK")] = _np.nan
 
 
-def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = ""):
+def reflow_string_as_lines_for_comment_block(
+    comment_string: str, line_length_limit: int = SP3_COMMENT_MAX_LENGTH, comment_line_lead_in: str = SP3_COMMENT_START
+) -> list[str]:
+    """
+    Wraps the provided, arbitrary length string into an array of strings capped at the line length limit provided.
+        Tries not to break words, by splitting only on spaces if possible.
+    By default, creates SP3d compliant comment lines, with the SP3d comment lead in of '/* ' at the start of each line.
+    NOTE: expects input string NOT to have comment lead-in already applied.
+
+    :param str comment_string: Arbitrary length string to wrap / reflow.
+    :param int line_length_limit: Line length limit for output lines. This includes the length of comment_line_lead_in
+        which is prepended to the start of each line. Defaults to 80 chars, which is the limit for SP3d.
+    :param str comment_line_lead_in: Starting string of comment lines. SP3 by default i.e. '/* '
+    :return list[str]: The input string wrapped and formatted as (by default) SP3 comment lines.
+    """
+
+    # Entire comment fits without reflowing. Just add the lead-in and return immediately.
+    if (len(comment_string) + len(comment_line_lead_in)) <= line_length_limit:
+        return [comment_line_lead_in + comment_string]
+
+    # Splitting required.
+    remaining_data = comment_string
+    output_lines: list[str] = []
+
+    # What is the highest index we can seek into a string without grabbing a line that will be too long once we add
+    # the comment lead-in?
+    max_seek_index = line_length_limit - len(comment_line_lead_in) - 1
+
+    while len(remaining_data) > 0:
+        # If all remaining data will fit on a single line, we don't need to split further. Add this line, and finish.
+        if len(remaining_data) - 1 <= max_seek_index:
+            output_lines.append(comment_line_lead_in + remaining_data)
+            return output_lines
+
+        # Max seek for this line specifically. The next line worth of data, or all remaining data.
+        max_line_seek_index = min(len(remaining_data) - 1, max_seek_index)
+
+        # Find the last space within the next line worth of data.
+        unclean_split = False
+        split_index = remaining_data[:max_line_seek_index].rfind(" ")
+        if split_index == -1:  # Error: No space within that line worth of data: clean split not possible
+            unclean_split = True
+            split_index = max_line_seek_index  # Split at the end of the line, breaking words/data
+
+        # Split at determined point and add to output, then truncate remaining data to remove what we just split off.
+        output_lines.append(comment_line_lead_in + remaining_data[:split_index])
+        # We want to index new data starting *after* the space that we split on (*if* there was one). So the index
+        # below is +1 if we split on a space, +0 if not (to avoid deleting a char from the start of the next line):
+        remaining_data = remaining_data[split_index + (0 if unclean_split else 1) :]
+
+    return output_lines
+
+
+def get_sp3_comments(sp3_df: _pd.DataFrame) -> list[str]:
+    """
+    Utility function to retrieve stored SP3 comment lines from the attributes of an SP3 DataFrame.
+    :return list[str]: List of comment lines, verbatim. Note that comment line lead-in of '/* ' is not removed.
+    """
+    return sp3_df.attrs["COMMENTS"]
+
+
+def update_sp3_comments(
+    sp3_df: _pd.DataFrame, comment_lines: Union[list[str], None], comment_string: Union[str, None], ammend: bool = True
+) -> None:
+    """
+    Update SP3 comment lines in-place for the SP3 DataFrame provided. By default ammends new lines to
+    what was already there (e.g. as read in from a source SP3 file). Can optionally remove existing lines and replace
+    with the lines provided.
+
+    New comment lines Can be passed as either a list of lines and or a string to wrap and reformat as comment lines, or
+    both. All comment lines, including those already present, will be checked for compliance with the SP3 comment spec
+    (except for minimum *number* of comment lines, which is checked on write-out in gen_sp3_header()).
+
+    Order of updated comments will be:
+    - existing comments (unless ammend=False)
+    - comment_lines (if provided)
+    - comment_string (if provided)
+
+    Note: neither comment_lines or comment_string is mandatory. Both or neither can be provided.
+    If neither, existing comments will be format checked and rewritten (unless ammend is set to False; then there
+    will be no comments).
+
+    :param _pd.DataFrame sp3_df: SP3 DataFrame on which to update / replace comments (in place).
+    :param Union[list[str], None] comment_lines: List of comment lines to ammend / overwrite with. SP3 comment line
+        lead-in ('/* ') is optional and will be added if missing.
+    :param Union[str, None] comment_string: Arbitrary length string to be broken into lines and formatted as SP3
+        comments. This should NOT have SP3 comment line lead-in ('/* ') on it; that will be added.
+    :param bool ammend: Whether to ammend (specifically add additional) comment lines, or delete existing lines and
+        replace with the provided input.
+    :raises ValueError: if any lines are too long (>80 chars including lead-in sequence '/* ', added if missing)
+    """
+    # Start with the existing comment lines if we're in ammend mode, else start afresh
+    new_lines: list[str] = sp3_df.attrs["COMMENTS"] if ammend else []
+
+    if comment_lines:
+        new_lines.extend(comment_lines)
+        # Validation / adding comment lead-in if missing, will be done shortly.
+
+    # Reflow free text comments, then process all lines together
+    if comment_string:
+        new_lines.extend(reflow_string_as_lines_for_comment_block(comment_string))
+
+    # Ensure lead-in correct on all lines, and lines not too long
+    for i in range(len(new_lines) - 1):
+        if not new_lines[i].startswith(SP3_COMMENT_START):
+            # If comment start '/*' is there, we must've only failed the above check due to no space at char 3.
+            # In that case, strip the existing comment start to avoid duplicating it.
+            new_lines[i] = SP3_COMMENT_START + new_lines[i][2:] if new_lines[i][0:2] == "/*" else new_lines[i]
+        if len(new_lines[i]) > SP3_COMMENT_MAX_LENGTH:
+            raise ValueError(f"Comment line too long! Exceeded 80 chars (including lead-in): '{new_lines[i]}'")
+
+    # Write updated lines back to the DataFrame attributes
+    sp3_df.attrs["COMMENTS"] = new_lines
+    logger.info(f"Updated SP3 comment lines in DataFrame attrs. Currently {len(new_lines)} comment lines")
+
+
+def remove_offline_sats(sp3_df: _pd.DataFrame, df_friendly_name: str = "") -> _pd.DataFrame:
     """
     Remove any satellites that have "0.0" or NaN for all three position coordinates - this indicates satellite offline.
     Note that this removes a satellite which has *any* missing coordinate data, meaning a satellite with partial

From b0e10dba7cec976cd92374bb57087144240e39b2 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 10:19:02 +0000
Subject: [PATCH 14/19] NPI-3688 todo for better tests for comment handling
 functions. Linting fix

---
 tests/test_sp3.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index b3895f7..eb366bc 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -143,7 +143,7 @@ def test_read_sp3_validation_sv_count_mismatch_header_vs_content(self, mock_file
             "Header says there should be 1 epochs, however there are 2 (unique) epochs in the content (duplicate epoch check comes later).",
             "Loading SP3 with mismatch between SV count in header and in content, should raise exception",
         )
-    
+
     @patch("builtins.open", new_callable=mock_open, read_data=sp3c_example2_data)
     def test_read_sp3_correct_svs_read_when_ev_ep_present(self, mock_file):
         # This should not raise an exception; SV count should match header if parsed correctly.
@@ -218,6 +218,9 @@ def test_gen_sp3_fundamentals(self):
     # - Not including column names (can just test that output matches expected format)
     # - Not including any NaN value *anywhere*
 
+    # TODO add tests for SP3 comment handling, such as comment line reflow, append vs overwrite comments, comment
+    # format exception handling.
+
     def test_gen_sp3_content_velocity_exception_handling(self):
         """
         gen_sp3_content() velocity output should raise exception (currently unsupported).\

From eb4623860b7384d327b001255382fcfffa0ff2ab Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 10:53:55 +0000
Subject: [PATCH 15/19] NPI-3688 add todo for attribute merge test

---
 tests/test_sp3.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index eb366bc..5c454ac 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -423,6 +423,10 @@ def test_trim_df(self, mock_file):
             "Should be two epochs after trimming with keep_first_delta_amount parameter",
         )
 
+    # TODO add new test: test_merge_attrs, for attribute merge:
+    # Ensure merging attributes results in the expected intersections / max / min, depending on the attribute. E.g.
+    # total sats across all files, worst accuracy code for each sat across all files, etc.
+
 
 class TestSP3Utils(TestCase):
 

From 55b037549985bd6194e3e0f2e5d916467a64a698 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 11:03:40 +0000
Subject: [PATCH 16/19] NPI-3688 remove actioned todos and no longer relevant
 comments

---
 gnssanalysis/gn_io/sp3.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 251510c..6b427fe 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -35,9 +35,6 @@
 # And elsewhere:
 #   For backwards compatibility, there will always be at least 4 comment lines.
 
-# TODO: checks for adding a comment which is too long / reflow it.
-# TODO: option for appending comments.
-
 _RE_SP3_COMMENT_STRIP = _re.compile(rb"^(\/\*.*$\n)", _re.MULTILINE)
 
 # Regex to extract Satellite Vehicle (SV) names (E.g. G02). In SP3-d (2016) up to 999 satellites can be included).
@@ -1125,11 +1122,6 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         )
         head.VERSION = "d"
 
-    # NOTE: formatting of the start epoch below, uses the utility function j20002rnxdt() which appears to have been
-    # designed for printing the epoch headers within the SP3 content. As such it starts with extra chars ie '*  2024'
-    # and ends with a newline.
-
-    # need to update DATETIME outside before writing
     line1 = [
         f"#{head.VERSION}"  # Col 1-2: Version symbol, A2. E.g. '#d'
         + f"{head.PV_FLAG}"  # Col 3: Pos or Vel flag, A1. E.g. 'P' or 'V'

From caac7914c4bbbf1e7ce1070cd23d6c5ea8b22cc3 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 11:08:41 +0000
Subject: [PATCH 17/19] NPI-3688 fix SP3 comment update function to not expect
 optional args

---
 gnssanalysis/gn_io/sp3.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 6b427fe..cd629be 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -299,7 +299,10 @@ def get_sp3_comments(sp3_df: _pd.DataFrame) -> list[str]:
 
 
 def update_sp3_comments(
-    sp3_df: _pd.DataFrame, comment_lines: Union[list[str], None], comment_string: Union[str, None], ammend: bool = True
+    sp3_df: _pd.DataFrame,
+    comment_lines: Union[list[str], None] = None,
+    comment_string: Union[str, None] = None,
+    ammend: bool = True,
 ) -> None:
     """
     Update SP3 comment lines in-place for the SP3 DataFrame provided. By default ammends new lines to
@@ -1646,13 +1649,14 @@ def trim_to_first_n_epochs(
 def sp3_hlm_trans(
     a: _pd.DataFrame,
     b: _pd.DataFrame,
-) -> tuple[_pd.DataFrame, list]:
+) -> tuple[_pd.DataFrame, Tuple[_np.ndarray, _np.ndarray]]:
     """
     Rotates sp3_b into sp3_a.
 
     :param _pd.DataFrame a: The sp3_a DataFrame.
     :param _pd.DataFrame b: The sp3_b DataFrame.
-    :return tuple[_pd.DataFrame, list]: A tuple containing the updated sp3_b DataFrame and the HLM array with applied computed parameters and residuals.
+    :return tuple[_pd.DataFrame, Tuple[_np.ndarray, _np.ndarray]]: A tuple containing the updated sp3_b DataFrame and
+        a nested tuple containing the HLM array with applied computed parameters and residuals. TODO: is it residules first?
     """
     hlm = _gn_transform.get_helmert7(pt1=a.EST[["X", "Y", "Z"]].values, pt2=b.EST[["X", "Y", "Z"]].values)
     b.iloc[:, :3] = _gn_transform.transform7(xyz_in=b.EST[["X", "Y", "Z"]].values, hlm_params=hlm[0])

From 6fa2f97c95bdd9efa1355f67823f18d4680e002b Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 11:09:27 +0000
Subject: [PATCH 18/19] NPI-3688 remove todo after checking functionality

---
 gnssanalysis/gn_io/sp3.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index cd629be..e47840f 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -1236,8 +1236,6 @@ def gen_sp3_header(sp3_df: _pd.DataFrame, output_comments: bool = False, strict_
         if (short_by_lines := 4 - len(sp3_comment_lines)) > 0:
             sp3_comment_lines.extend([SP3_COMMENT_START] * short_by_lines)
 
-            # TODO check the above this appends lines, not an array of lines - write test for this.
-
     # Put the newlines back on the end of each comment line, before merging into the output header
     sp3_comment_lines = [line + "\n" for line in sp3_comment_lines]
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + sp3_comment_lines)

From d5f5441802c908cb90b71f0a576ba2b9847fdc99 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 30 Jan 2025 11:22:08 +0000
Subject: [PATCH 19/19] NPI-3688 cleanup docstring and parameter name relating
 to stricter SP3 checks, to better reflect current implementation state

---
 gnssanalysis/gn_io/sp3.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index e47840f..ea3f63f 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -682,10 +682,10 @@ def read_sp3(
     drop_offline_sats: bool = False,
     continue_on_ep_ev_encountered: bool = True,
     check_header_vs_filename_vs_content_discrepancies: bool = False,
-    # These only apply when the above is enabled:
+    # The following two apply when the above is enabled:
     skip_filename_in_discrepancy_check: bool = False,
     continue_on_discrepancies: bool = False,
-    enforce_strict_format_compliance: bool = False,  # TODO raise exceptions if anything is not to spec, even if it wouldn't necessarily break processing. E.g. min number of SV entries, min number of comments, comments having a leading space, etc.
+    stricter_format_checks: bool = False,
 ) -> _pd.DataFrame:
     """Reads an SP3 file and returns the data as a pandas DataFrame.
 
@@ -700,11 +700,17 @@ def read_sp3(
             raise a NotImplementedError instead.
     :param bool check_header_vs_filename_vs_content_discrepancies: enable discrepancy checks on SP3 content vs
         header vs filename.
+    :param bool skip_filename_in_discrepancy_check: If discrepancy checks enabled (see above), this allows skipping
+        the filename part of the checks, even if filename is available.
     :param bool continue_on_discrepancies: (Only applicable with check_header_vs_filename_vs_content_discrepancies)
         If True, logs a warning and continues if major discrepancies are detected between the SP3 content, SP3 header,
         and SP3 filename (if available). Set to false to raise a ValueError instead.
-    :param bool skip_filename_in_discrepancy_check: If discrepancy checks enabled (see above), this allows skipping
-        the filename part of the checks, even if filename is available.
+    :param bool stricter_format_checks: (work in progress) raise more exceptions if things are not to SP3 spec.
+        Currenly this only influences whether trying to read an SP3 version b or c file (not officially supported)
+        throws an exception or just logs a warning. In future this option may also flag things that are technically
+        incorrect but wouldn't necessarily break processing. E.g. less than min number of SV entries, min number of
+        comments, comments having a leading space, etc.
+        This parameter could be renamed to enforce_strict_format_compliance once more extensive checks are added.
     :return _pd.DataFrame: The SP3 data as a DataFrame.
     :raises FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist.
     :raises Exception: For other errors reading SP3 file/bytes
@@ -719,7 +725,7 @@ def read_sp3(
     content = _gn_io.common.path2bytes(sp3_path_or_bytes)  # Will raise EOFError if file empty
 
     # Extract and check version. Raises exception for completely unsupported versions, logs warning for version b and c.
-    check_sp3_version(content, strict=enforce_strict_format_compliance)
+    check_sp3_version(sp3_bytes=content, strict=stricter_format_checks)
 
     # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
     comment_lines_bytes: list[bytes] = _RE_SP3_COMMENT_STRIP.findall(content)