From 4870e14d57305885d3fb3d8f075ade635bef7c94 Mon Sep 17 00:00:00 2001
From: EugeneDu-GA <eugene.du@ga.gov.au>
Date: Fri, 6 Sep 2024 19:36:12 +1000
Subject: [PATCH 1/4] Fix "sv" norm type (#51)

* Fix potential errors caused by overwrite "sv" norm type with sv prns

* Ceaning

* Rename variables

* Fix non-existent "both" norm type

* Name pipeline

* Fix default norm_types for clk_difference()

* Clock unit tests from Ron

* Correct file names

* gn_download fixing

* minor

* Trim down clk unit test data

---------

Co-authored-by: Eugene_Du <eugene@industrialsciences.com.au>
---
 .github/workflows/python-cicd-units.yml |   2 +
 gnssanalysis/gn_diffaux.py              |  75 +++-----
 gnssanalysis/gn_download.py             |  13 +-
 gnssanalysis/gn_io/sp3.py               |   2 +-
 gnssanalysis/gn_utils.py                |   2 +-
 tests/test_clk.py                       |  68 +++++++
 tests/test_datasets/clk_test_data.py    | 225 ++++++++++++++++++++++++
 tests/test_datasets/sp3_test_data.py    |   7 +-
 tests/test_sp3.py                       |   4 +-
 9 files changed, 339 insertions(+), 59 deletions(-)
 create mode 100644 tests/test_clk.py
 create mode 100644 tests/test_datasets/clk_test_data.py

diff --git a/.github/workflows/python-cicd-units.yml b/.github/workflows/python-cicd-units.yml
index 1a253f4..0551e76 100644
--- a/.github/workflows/python-cicd-units.yml
+++ b/.github/workflows/python-cicd-units.yml
@@ -10,6 +10,8 @@ jobs:
       matrix:
         python-version: ["3.9", "3.10", "3.11", "3.12"]
 
+    name: Build and Test on Python ${{ matrix.python-version }}
+
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/gnssanalysis/gn_diffaux.py b/gnssanalysis/gn_diffaux.py
index fc9ada6..b89579f 100644
--- a/gnssanalysis/gn_diffaux.py
+++ b/gnssanalysis/gn_diffaux.py
@@ -316,7 +316,7 @@ def compare_clk(
 
     :param _pd.DataFrame clk_a: clk dataframe 1
     :param _pd.DataFrame clk_b: clk dataframe 2
-    :param str norm_type: normalization to apply, defaults to "both"
+    :param str norm_types: normalization to apply, defaults to ["daily", "epoch"]
     :param _Union[_np.ndarray, _pd.Index, None] ext_dt: external datetime values to filter the clk dfs, defaults to None
     :param _Union[_np.ndarray, _pd.Index, None] ext_svs: external satellites to filter the clk dfs, defaults to None
     :raises ValueError: if no common epochs between clk_a and external datetime were found
@@ -358,12 +358,13 @@ def compare_clk(
             _logging.debug("compare_clk: syncing clk_a_unst with common_svs as not equal")
             clk_a_unst = clk_a_unst[common_svs]
 
-    if len(norm_types) != 0:
-        _logging.info(f":_clk_compare:using {norm_types} clk normalization")
-        if "sv" in norm_types:
-            norm_types[norm_types.index("sv")] = _gn_io.clk.select_norm_svs_per_gnss(
+    norm_types_copy = norm_types.copy() # DO NOT overwrite norm_types otherwise it will cause errors when the function is called in a loop
+    if len(norm_types_copy) != 0:
+        _logging.info(f":compare_clk: using {norm_types_copy} clk normalization")
+        if "sv" in norm_types_copy:
+            norm_types_copy[norm_types_copy.index("sv")] = _gn_io.clk.select_norm_svs_per_gnss(
                 clk_a_unst=clk_a_unst, clk_b_unst=clk_b_unst
-            )
+            )  # get the svs to use for norm and overwrite "sv" with sv prns
 
         clk_a_unst[clk_b_unst.isna()] = (
             _np.nan
@@ -372,11 +373,10 @@ def compare_clk(
             _np.nan
         )  # replace corresponding values in clk_b_unst with NaN where clk_a_unst is NaN
 
-        # get the sv to use for norm and overwrite norm_type value with sv prn code
         _logging.info("---removing common mode from clk 1---")
-        _gn_io.clk.rm_clk_bias(clk_a_unst, norm_types=norm_types)
+        _gn_io.clk.rm_clk_bias(clk_a_unst, norm_types=norm_types_copy)
         _logging.info("---removing common mode from clk 2---")
-        _gn_io.clk.rm_clk_bias(clk_b_unst, norm_types=norm_types)
+        _gn_io.clk.rm_clk_bias(clk_b_unst, norm_types=norm_types_copy)
     return clk_a_unst - clk_b_unst
 
 
@@ -385,7 +385,7 @@ def sisre(
     sp3_b: _pd.DataFrame,
     clk_a: Union[_pd.DataFrame, None] = None,
     clk_b: Union[_pd.DataFrame, None] = None,
-    norm_type: str = "both",
+    norm_types: list = ["daily", "epoch"],
     output_mode: str = "rms",
     clean: bool = True,
     cutoff: Union[int, float, None] = None,
@@ -422,8 +422,8 @@ def sisre(
         Output of read_clk function or a similar clk DataFrame.
     clk_b : clk DataFrame b (optional)
         Output of read_clk function or a similar clk DataFrame.
-    norm_type : str
-        a norm_type parameter used for the clk values normalisations before
+    norm_types : list
+        normalization parameter used for removing the clk common modes before
         differencing.
     output_mode : str
         controls at what stage to output SISRE
@@ -475,7 +475,7 @@ def sisre(
     if (clk_a is not None) & (clk_b is not None):  # check if clk data is present
         clk_diff = (
             compare_clk(
-                clk_a, clk_b, norm_types=norm_type, ext_dt=rac_unstack.index, ext_svs=rac_unstack.columns.levels[1]
+                clk_a, clk_b, norm_types=norm_types, ext_dt=rac_unstack.index, ext_svs=rac_unstack.columns.levels[1]
             )
             * _gn_const.C_LIGHT
         )  # units are meters
@@ -540,7 +540,7 @@ def diffsp3(
     plot=False,
     write_rac_file=False,
 ):
-    # Eugene: function name and description are confusing - it seems to output the SISRE instead of SP3 orbit/clock differences against the given tolerance
+    # TODO: change function name and description as both are confusing - it seems to output the SISRE instead of SP3 orbit/clock differences against the given tolerance
     """Compares two sp3 files and outputs a dataframe of differences above tolerance if such were found"""
     sp3_a, sp3_b = _gn_io.sp3.read_sp3(sp3_a_path, nodata_to_nan=nodata_to_nan), _gn_io.sp3.read_sp3(
         sp3_b_path, nodata_to_nan=nodata_to_nan
@@ -553,23 +553,23 @@ def diffsp3(
         as_sisre = True
 
     status = 0
-    diff_rac = sisre(
+    sv_sisre = sisre(
         sp3_a=sp3_a.iloc[:, :3],
         sp3_b=sp3_b.iloc[:, :3],
         clk_a=clk_a,
         clk_b=clk_b,
-        norm_type="both",
+        norm_types=["daily", "epoch"],
         output_mode="sv",
         clean=False,
         hlm_mode=hlm_mode,
         plot=plot,
         write_rac_file=write_rac_file,
-    )  # Eugene: sisre() returns SISRE instead of RAC differences
+    )
 
-    bad_rac_vals = _diff2msg(diff_rac, tol=tol)
-    if bad_rac_vals is not None:
+    bad_sisre_vals = _diff2msg(sv_sisre, tol=tol)
+    if bad_sisre_vals is not None:
         _logging.log(
-            msg=f':diffutil found {"SISRE values" if as_sisre else "estimates"} estimates diffs above {"the extracted STDs" if tol is None else f"{tol:.1E} tolerance"}:\n{bad_rac_vals.to_string(justify="center")}\n',
+            msg=f':diffutil found {"SISRE values" if as_sisre else "estimates"} diffs above {"the extracted STDs" if tol is None else f"{tol:.1E} tolerance"}:\n{bad_sisre_vals.to_string(justify="center")}\n',
             level=log_lvl,
         )
         status = -1
@@ -624,12 +624,12 @@ def diffblq(blq_a_path, blq_b_path, tol, log_lvl):
     return status
 
 
-def diffclk(clk_a_path, clk_b_path, tol, log_lvl, norm_type="both"):
+def diffclk(clk_a_path, clk_b_path, tol, log_lvl, norm_types=["daily", "epoch"]):
     """Compares two clk files and provides a difference above atol if present. If sp3 orbits provided - does analysis using the SISRE values"""
     clk_a, clk_b = _gn_io.clk.read_clk(clk_a_path), _gn_io.clk.read_clk(clk_b_path)
 
     status = 0
-    diff_clk = compare_clk(clk_a=clk_a, clk_b=clk_b, norm_types=norm_type) * _gn_const.C_LIGHT
+    diff_clk = compare_clk(clk_a=clk_a, clk_b=clk_b, norm_types=norm_types) * _gn_const.C_LIGHT
 
     bad_clk_vals = _diff2msg(diff_clk, tol=tol)
     if bad_clk_vals is not None:
@@ -698,12 +698,10 @@ def format_index(
     :param _pd.DataFrame diff_df: The Pandas DataFrame containing SP3 or CLK differences
     :return None
     """
-    # Convert the epoch indices from J2000 seconds to python datetimes
     diff_df.index = _pd.MultiIndex.from_tuples(
         ((idx[0] + _gn_const.J2000_ORIGIN, idx[1]) for idx in diff_df.index.values)
     )
 
-    # Rename the indices
     diff_df.index = diff_df.index.set_names(["Epoch", "Satellite"])
 
 
@@ -722,25 +720,15 @@ def sp3_difference(
     base_sp3_df = _gn_io.sp3.read_sp3(str(base_sp3_file))
     test_sp3_df = _gn_io.sp3.read_sp3(str(test_sp3_file))
 
-    # Select rows with matching indices and calculate XYZ differences (ECEF)
     common_indices = base_sp3_df.index.intersection(test_sp3_df.index)
     diff_est_df = test_sp3_df.loc[common_indices, "EST"] - base_sp3_df.loc[common_indices, "EST"]
 
-    # Extract clocks and change the units from ms to ns (read_sp3 will result in sp3 units (ms))
-    # TODO: normalise clocks
-    diff_clk_df = diff_est_df["CLK"].to_frame(name="CLK") * 1e3
-
-    # Drop clocks and then change the units from km to m (read_sp3 will result in sp3 units (km))
+    diff_clk_df = diff_est_df["CLK"].to_frame(name="CLK") * 1e3  # TODO: normalise clocks
     diff_xyz_df = diff_est_df.drop(columns=["CLK"]) * 1e3
+    diff_rac_df = _gn_io.sp3.diff_sp3_rac(base_sp3_df, test_sp3_df, hlm_mode=None)  # TODO: hlm_mode
 
-    # RAC difference
-    # TODO: hlm_mode
-    diff_rac_df = _gn_io.sp3.diff_sp3_rac(base_sp3_df, test_sp3_df, hlm_mode=None)
-
-    # Drop the not-particularly needed 'EST_RAC' multi-index level
     diff_rac_df.columns = diff_rac_df.columns.droplevel(0)
 
-    # Change the units from km to m (diff_sp3_rac will result in sp3 units (km))
     diff_rac_df = diff_rac_df * 1e3
 
     diff_sp3_df = diff_xyz_df.join(diff_rac_df)
@@ -748,8 +736,6 @@ def sp3_difference(
     diff_sp3_df["Clock"] = diff_clk_df
     diff_sp3_df["|Clock|"] = diff_clk_df.abs()
 
-    # Change the epoch indices from J2000 seconds to more readable python datetimes
-    # and rename the indices properly
     format_index(diff_sp3_df)
 
     return diff_sp3_df
@@ -758,7 +744,7 @@ def sp3_difference(
 def clk_difference(
     base_clk_file: _Path,
     test_clk_file: _Path,
-    norm_types: list[str],
+    norm_types: list = [],
 ) -> _pd.DataFrame:
     """
     Compare two CLK files to calculate clock differences with common mode removed (if specified)
@@ -766,8 +752,8 @@ def clk_difference(
 
     :param _Path base_clk_file: Path of the baseline CLK file
     :param _Path test_clk_file: Path of the test CLK file
-    :param norm_types list[str]: Normalizations to apply. Available options include 'epoch', 'daily', 'sv',
-            any satellite PRN, or any combination of them, defaults to None
+    :param norm_types list: Normalizations to apply. Available options include 'epoch', 'daily', 'sv',
+            any satellite PRN, or any combination of them, defaults to empty list
     :return _pd.DataFrame: The Pandas DataFrame containing clock differences
     """
     base_clk_df = _gn_io.clk.read_clk(base_clk_file)
@@ -775,13 +761,9 @@ def clk_difference(
 
     diff_clk_df = compare_clk(test_clk_df, base_clk_df, norm_types=norm_types)
 
-    # Stack diff_clk_df to keep the format consistent with other dataframes (compare_clk() returns unstacked dataframe)
-    # and change the units from s to ns (read_clk() and compare_clk() will result in clk units (s))
     diff_clk_df = diff_clk_df.stack(dropna=False).to_frame(name="Clock") * 1e9
     diff_clk_df["|Clock|"] = diff_clk_df.abs()
 
-    # Change the epoch indices from J2000 seconds to more readable python datetimes
-    # and rename the indices properly
     format_index(diff_clk_df)
 
     return diff_clk_df
@@ -796,12 +778,10 @@ def difference_statistics(
     :param _pd.DataFrame diff_df: The Pandas DataFrame containing SP3 or CLK differences
     :return _pd.DataFrame: The Pandas DataFrame containing statistics of SP3 or CLK differences
     """
-    # Statistics of all satellites
     stats_df = diff_df.describe(percentiles=[0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95])
     stats_df.loc["rms"] = _gn_aux.rms(diff_df)
     stats_df.index = _pd.MultiIndex.from_tuples((("All", idx) for idx in stats_df.index.values))
 
-    # Statistics satellite-by-satellite
     stats_sat = (
         diff_df.groupby("Satellite")
         .describe(percentiles=[0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95])
@@ -810,7 +790,6 @@ def difference_statistics(
     rms_sat = _gn_aux.rms(diff_df, level="Satellite")
     rms_sat.index = _pd.MultiIndex.from_tuples(((sv, "rms") for sv in rms_sat.index.values))
 
-    # Merge above dataframes, rename the indices properly and re-arrange the statistics
     stats_df = _pd.concat([stats_df, stats_sat, rms_sat]).sort_index()
     stats_df.index = stats_df.index.set_names(["Satellite", "Stats"])
     stats_df = stats_df.reindex(
diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py
index 87808f9..e3a8196 100644
--- a/gnssanalysis/gn_download.py
+++ b/gnssanalysis/gn_download.py
@@ -263,6 +263,11 @@ def generate_sampling_rate(file_ext: str, analysis_center: str, solution_type: s
     """
     IGS files following the long filename convention require a content specifier
     Given the file extension, generate the content specifier
+
+    :param str file_ext: 3-char file extention of the file (e.g. SP3, SNX, ERP, etc)
+    :param str analysis_center: 3-char string identifier for Analysis Center
+    :param str solution_type: 3-char string identifier for Solution Type of file
+    :return str: 3-char string identifier for Sampling Rate of the file (e.g. 15M)
     """
     file_ext = file_ext.upper()
     sampling_rates = {
@@ -294,9 +299,7 @@ def generate_sampling_rate(file_ext: str, analysis_center: str, solution_type: s
                     center_rates = file_rates.get(key, file_rates.get(()))
                     center_rates_found = True
                     break
-                # else:
-                #     return file_rates.get(())
-            if not center_rates_found:  # DZ: bug fix
+            if not center_rates_found:
                 return file_rates.get(())
             if isinstance(center_rates, dict):
                 return center_rates.get(solution_type, center_rates.get(None))
@@ -414,7 +417,9 @@ def generate_product_filename(
             product_filename = f"igs{gps_date.yr[2:]}P{gps_date.gpswk}.snx.Z"
         else:
             hour = f"{reference_start.hour:02}"
-            product_filename = f"igu{gps_date.gpswkD}_{hour}.{file_ext}.Z"
+            prefix = "igs" if solution_type == "FIN" else "igr" if solution_type == "RAP" else "igu"
+            product_filename = f"{prefix}{gps_date.gpswkD}_{hour}.{file_ext}.Z" if solution_type == "ULT" else \
+                f"{prefix}{gps_date.gpswkD}.{file_ext}.Z"
     return product_filename, gps_date, reference_start
 
 
diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 63b7fe7..82e4195 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -812,7 +812,7 @@ def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple[_pd.DataFrame, li
     return b, hlm
 
 
-# Eugene: move to gn_diffaux.py (and other associated functions as well)?
+# TODO: move to gn_diffaux.py (and other associated functions as well)?
 def diff_sp3_rac(
     sp3_baseline: _pd.DataFrame,
     sp3_test: _pd.DataFrame,
diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index 946c6f4..7eba4a1 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -240,7 +240,7 @@ def clk(ctx, norm):
         clk_b_path=ctx.parent.params["input"][1],
         tol=ctx.parent.params["atol"],
         log_lvl=ctx.parent.params["log_lvl"],
-        norm_type=norm,
+        norm_types=norm,
     )
     diffutil_verify_status(status=status, passthrough=ctx.parent.params["passthrough"])
 
diff --git a/tests/test_clk.py b/tests/test_clk.py
new file mode 100644
index 0000000..bd9cbeb
--- /dev/null
+++ b/tests/test_clk.py
@@ -0,0 +1,68 @@
+from pyfakefs.fake_filesystem_unittest import TestCase
+
+import numpy as np
+import pandas as pd
+
+import gnssanalysis.gn_io.clk as clk
+import gnssanalysis.gn_diffaux as gn_diffaux
+
+from test_datasets.clk_test_data import (
+    # first dataset is a truncated version of file IGS0OPSRAP_20240400000_01D_05M_CLK.CLK:
+    clk_test_data_truncated_igs_rapid as input_data_igs,
+    # second dataset is a truncated version of file GFZ0OPSRAP_20240400000_01D_05M_CLK.CLK:
+    clk_test_data_truncated_gfz_rapid as input_data_gfz,
+)
+
+
+class TestClk(TestCase):
+    def setUp(self):
+        self.setUpPyfakefs()
+
+    def test_clk_read(self):
+        file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"]
+        self.fs.create_file(file_paths[0], contents=input_data_igs)
+        self.fs.create_file(file_paths[1], contents=input_data_gfz)
+
+        clk_df_igs = clk.read_clk(clk_path=file_paths[0])
+        clk_df_gfz = clk.read_clk(clk_path=file_paths[1])
+
+        self.assertEqual(len(clk_df_igs), 93, msg="Check that data generally read into df as expected")
+        self.assertEqual(len(clk_df_gfz), 90, msg="Check that data generally read into df as expected")
+        self.assertEqual(clk_df_igs.index[0][1], 760708800, msg="Check that first epoch is expressed correctly")
+        self.assertEqual(clk_df_gfz.index[0][1], 760708800, msg="Check that first epoch is expressed correctly")
+        self.assertEqual(clk_df_igs["EST"].iloc[0], 0.0001688124131169, msg="Check first datapoint is correct")
+        self.assertEqual(clk_df_gfz["EST"].iloc[0], 0.000168814651894, msg="Check first datapoint is correct")
+        self.assertEqual(clk_df_igs["EST"].iloc[-1], -0.0006105557076344, msg="Check last datapoint is correct")
+        self.assertEqual(clk_df_gfz["EST"].iloc[-1], -0.000610553573006, msg="Check last datapoint is correct")
+
+    def test_compare_clk(self):
+        file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"]
+        self.fs.create_file(file_paths[0], contents=input_data_igs)
+        self.fs.create_file(file_paths[1], contents=input_data_gfz)
+
+        clk_df_igs = clk.read_clk(clk_path=file_paths[0])
+        clk_df_gfz = clk.read_clk(clk_path=file_paths[1])
+
+        result_default = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz)
+        result_daily_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily"])
+        result_epoch_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["epoch"])
+        result_sv_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["sv"])  # G01 ref
+        result_G06 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["G06"])
+        result_daily_epoch_G04 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily", "epoch", "G04"])
+        result_epoch_G07 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["epoch", "G07"])
+        result_daily_G08 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily", "G08"])
+        result_G09_G11 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["G09", "G11"])
+
+        # Test index is as expected
+        self.assertEqual(result_default.index[0], 760708800)
+        # Test that a sample value is as expected from each result above
+        self.assertEqual(result_default["G01"].iloc[0], -4.56406886282918e-12, msg="Check datapoint is correct")
+        self.assertEqual(result_daily_only["G03"].iloc[0], 2.9891233314493365e-11, msg="Check datapoint is correct")
+        self.assertEqual(result_epoch_only["G04"].iloc[0], 2.7128617820053325e-12, msg="Check datapoint is correct")
+        self.assertEqual(result_sv_only["G05"].iloc[0], 1.1623200004470119e-10, msg="Check datapoint is correct")
+        self.assertEqual(result_G06["G06"].iloc[0], 0.0, msg="Check datapoint is correct")
+        self.assertEqual(result_daily_epoch_G04["G07"].iloc[0], 1.3071733365871419e-11, msg="Check datapoint is correct")
+        self.assertEqual(result_epoch_G07["G08"].iloc[0], -3.3217389966032004e-11, msg="Check datapoint is correct")
+        self.assertEqual(result_daily_G08["G09"].iloc[-1], 1.3818666534399365e-12, msg="Check datapoint is correct")
+        self.assertEqual(result_G09_G11["G11"].iloc[-1], 0.0, msg="Check datapoint is correct")
+        self.assertEqual(result_G09_G11["G01"].iloc[-1], 8.94520000606358e-11, msg="Check datapoint is correct")
diff --git a/tests/test_datasets/clk_test_data.py b/tests/test_datasets/clk_test_data.py
new file mode 100644
index 0000000..e35dfd4
--- /dev/null
+++ b/tests/test_datasets/clk_test_data.py
@@ -0,0 +1,225 @@
+# Central record of CLK test data sets to be shared across unit tests
+
+# first dataset is a truncated version of file IGS0OPSRAP_20240400000_01D_05M_CLK.CLK
+clk_test_data_truncated_igs_rapid = b"""     3.00           C                                       RINEX VERSION / TYPE
+CCLOCK              IGSACC @ GA MIT                         PGM / RUN BY / DATE 
+GPS week: 2300   Day: 5   MJD: 60349                        COMMENT             
+THE COMBINED CLOCKS ARE A WEIGHTED AVERAGE OF:              COMMENT             
+  cod emr esa gfz grg jgx jpl whu                           COMMENT             
+THE FOLLOWING REFERENCE CLOCKS WERE USED BY ACs:            COMMENT             
+  WTZZ SPT0 HERS BRUX                                       COMMENT             
+THE COMBINED CLOCKS ARE ALIGNED TO GPS TIME                 COMMENT             
+USING THE SATELLITE BROADCAST EPHEMERIDES                   COMMENT             
+All clocks have been re-aligned to the IGS time scale: IGST COMMENT             
+    18                                                      LEAP SECONDS        
+     2    AR    AS                                          # / TYPES OF DATA   
+IGS  IGSACC @ GA MIT                                        ANALYSIS CENTER     
+   167    IGS20 : IGS REALIZATION of THE ITRF2020           # OF SOLN STA / TRF 
+    31                                                      # OF SOLN SATS      
+G01 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12 G13 G14 G15 G16 PRN LIST            
+G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31 PRN LIST            
+G32                                                         PRN LIST            
+G                   igs20_2290.atx                          SYS / PCVS APPLIED  
+                                                            END OF HEADER       
+AS G01  2024 02 09 00 00  0.000000  2    1.688124131169e-04  2.097025617540e-11
+AS G03  2024 02 09 00 00  0.000000  2    1.809304282459e-04  1.229880405750e-11
+AS G04  2024 02 09 00 00  0.000000  2    2.869716233829e-04  1.460958644100e-11
+AS G05  2024 02 09 00 00  0.000000  2   -1.610880148801e-04  7.395050892370e-12
+AS G06  2024 02 09 00 00  0.000000  2    4.144426103639e-04  1.724965837880e-11
+AS G07  2024 02 09 00 00  0.000000  2   -5.739786238431e-05  2.102718805790e-11
+AS G08  2024 02 09 00 00  0.000000  2   -1.661056878771e-04  2.255511735470e-11
+AS G09  2024 02 09 00 00  0.000000  2    8.584180025149e-05  1.089214113860e-11
+AS G10  2024 02 09 00 00  0.000000  2   -7.256635885751e-05  1.050451294700e-11
+AS G11  2024 02 09 00 00  0.000000  2   -5.693091709181e-04  1.469189346800e-11
+AS G12  2024 02 09 00 00  0.000000  2   -4.761104802781e-04  2.357304357880e-11
+AS G13  2024 02 09 00 00  0.000000  2    6.237638687799e-04  1.301051748320e-11
+AS G14  2024 02 09 00 00  0.000000  2    3.207305758959e-04  1.300550892390e-11
+AS G15  2024 02 09 00 00  0.000000  2    1.257376231969e-04  1.518279956850e-11
+AS G16  2024 02 09 00 00  0.000000  2   -3.667854895481e-04  1.348413222630e-11
+AS G17  2024 02 09 00 00  0.000000  2    7.175777924719e-04  9.421809457670e-12
+AS G18  2024 02 09 00 00  0.000000  2   -5.513497351141e-04  1.869080022320e-11
+AS G19  2024 02 09 00 00  0.000000  2    4.380255308439e-04  1.016437994000e-11
+AS G20  2024 02 09 00 00  0.000000  2    3.882179753799e-04  1.754597986330e-11
+AS G21  2024 02 09 00 00  0.000000  2    1.356084322799e-04  1.613029373800e-11
+AS G22  2024 02 09 00 00  0.000000  2    3.435437085039e-05  2.197525401160e-11
+AS G23  2024 02 09 00 00  0.000000  2    1.521818975999e-04  1.217606955280e-11
+AS G24  2024 02 09 00 00  0.000000  2   -4.285499499451e-04  1.714714182710e-11
+AS G25  2024 02 09 00 00  0.000000  2    4.879964575919e-04  1.892067226400e-11
+AS G26  2024 02 09 00 00  0.000000  2    1.922024746019e-04  1.347356913720e-11
+AS G27  2024 02 09 00 00  0.000000  2   -2.337884902394e-06  2.310856509870e-11
+AS G28  2024 02 09 00 00  0.000000  2   -1.247641780951e-04  1.999656975110e-11
+AS G29  2024 02 09 00 00  0.000000  2   -6.073268762641e-04  1.642966288160e-11
+AS G30  2024 02 09 00 00  0.000000  2   -4.350392467851e-04  2.074406448570e-11
+AS G31  2024 02 09 00 00  0.000000  2   -2.277026597181e-04  1.558513789220e-11
+AS G32  2024 02 09 00 00  0.000000  2   -6.105543656861e-04  1.685238085940e-11
+AS G01  2024 02 09 00 05  0.000000  2    1.688128312935e-04  2.183228068350e-11
+AS G03  2024 02 09 00 05  0.000000  2    1.809377943265e-04  1.316687645420e-11
+AS G04  2024 02 09 00 05  0.000000  2    2.869744013475e-04  1.448271901610e-11
+AS G05  2024 02 09 00 05  0.000000  2   -1.610880212345e-04  9.745531722440e-12
+AS G06  2024 02 09 00 05  0.000000  2    4.144383148835e-04  1.803286586970e-11
+AS G07  2024 02 09 00 05  0.000000  2   -5.740054013711e-05  1.787787183840e-11
+AS G08  2024 02 09 00 05  0.000000  2   -1.661058376965e-04  2.237565129570e-11
+AS G09  2024 02 09 00 05  0.000000  2    8.584556503899e-05  9.254939884070e-12
+AS G10  2024 02 09 00 05  0.000000  2   -7.256702704761e-05  1.492476011780e-11
+AS G11  2024 02 09 00 05  0.000000  2   -5.693131241975e-04  1.410337662760e-11
+AS G12  2024 02 09 00 05  0.000000  2   -4.761115105725e-04  2.766622739080e-11
+AS G13  2024 02 09 00 05  0.000000  2    6.237648874265e-04  1.475166420370e-11
+AS G14  2024 02 09 00 05  0.000000  2    3.207335717595e-04  1.317071290000e-11
+AS G15  2024 02 09 00 05  0.000000  2    1.257386270025e-04  1.254167262310e-11
+AS G16  2024 02 09 00 05  0.000000  2   -3.667832061525e-04  1.192672389380e-11
+AS G17  2024 02 09 00 05  0.000000  2    7.175770822795e-04  1.141324846150e-11
+AS G18  2024 02 09 00 05  0.000000  2   -5.513523135945e-04  1.699128083780e-11
+AS G19  2024 02 09 00 05  0.000000  2    4.380268257785e-04  1.236597184980e-11
+AS G20  2024 02 09 00 05  0.000000  2    3.882174650515e-04  1.815301870810e-11
+AS G21  2024 02 09 00 05  0.000000  2    1.356076124015e-04  1.324359703490e-11
+AS G22  2024 02 09 00 05  0.000000  2    3.435233526109e-05  2.071439928490e-11
+AS G23  2024 02 09 00 05  0.000000  2    1.521842628355e-04  1.327800505520e-11
+AS G24  2024 02 09 00 05  0.000000  2   -4.285519114115e-04  1.753143636370e-11
+AS G25  2024 02 09 00 05  0.000000  2    4.879967964745e-04  1.942436173690e-11
+AS G26  2024 02 09 00 05  0.000000  2    1.922012622905e-04  1.154892968300e-11
+AS G27  2024 02 09 00 05  0.000000  2   -2.338940953886e-06  2.017085887470e-11
+AS G28  2024 02 09 00 05  0.000000  2   -1.247679288505e-04  2.153582863610e-11
+AS G29  2024 02 09 00 05  0.000000  2   -6.073267320995e-04  1.203197551100e-11
+AS G30  2024 02 09 00 05  0.000000  2   -4.350378236175e-04  2.295738977250e-11
+AS G31  2024 02 09 00 05  0.000000  2   -2.277026272105e-04  1.616281104400e-11
+AS G32  2024 02 09 00 05  0.000000  2   -6.105550569795e-04  1.903361214150e-11
+AS G01  2024 02 09 00 10  0.000000  2    1.688132948826e-04  1.730670533670e-11
+AS G03  2024 02 09 00 10  0.000000  2    1.809451633926e-04  1.365309202660e-11
+AS G04  2024 02 09 00 10  0.000000  2    2.869772769706e-04  1.133122712990e-11
+AS G05  2024 02 09 00 10  0.000000  2   -1.610890036934e-04  9.893279329870e-12
+AS G06  2024 02 09 00 10  0.000000  2    4.144339562696e-04  1.819848717220e-11
+AS G07  2024 02 09 00 10  0.000000  2   -5.740344718401e-05  1.793256650230e-11
+AS G08  2024 02 09 00 10  0.000000  2   -1.661066239514e-04  2.176186663680e-11
+AS G09  2024 02 09 00 10  0.000000  2    8.584933881069e-05  9.448736847670e-12
+AS G10  2024 02 09 00 10  0.000000  2   -7.256745815031e-05  1.932891946120e-11
+AS G11  2024 02 09 00 10  0.000000  2   -5.693170667794e-04  1.426900878410e-11
+AS G12  2024 02 09 00 10  0.000000  2   -4.761126549144e-04  2.704991834810e-11
+AS G13  2024 02 09 00 10  0.000000  2    6.237661387716e-04  1.168442092860e-11
+AS G14  2024 02 09 00 10  0.000000  2    3.207365520786e-04  1.358704258710e-11
+AS G15  2024 02 09 00 10  0.000000  2    1.257398561696e-04  1.865821565060e-11
+AS G16  2024 02 09 00 10  0.000000  2   -3.667806877484e-04  1.237122602500e-11
+AS G17  2024 02 09 00 10  0.000000  2    7.175769202416e-04  9.277588571660e-12
+AS G18  2024 02 09 00 10  0.000000  2   -5.513548362054e-04  1.845309765310e-11
+AS G19  2024 02 09 00 10  0.000000  2    4.380286366646e-04  1.115224585830e-11
+AS G20  2024 02 09 00 10  0.000000  2    3.882169040036e-04  1.786689200450e-11
+AS G21  2024 02 09 00 10  0.000000  2    1.356070408826e-04  1.510782760510e-11
+AS G22  2024 02 09 00 10  0.000000  2    3.435064314339e-05  1.624620391460e-11
+AS G23  2024 02 09 00 10  0.000000  2    1.521866425836e-04  1.484694641530e-11
+AS G24  2024 02 09 00 10  0.000000  2   -4.285538095454e-04  1.946970867170e-11
+AS G25  2024 02 09 00 10  0.000000  2    4.879971448136e-04  1.912208925110e-11
+AS G26  2024 02 09 00 10  0.000000  2    1.922001053916e-04  1.020741644160e-11
+AS G27  2024 02 09 00 10  0.000000  2   -2.339960806488e-06  1.975539775450e-11
+AS G28  2024 02 09 00 10  0.000000  2   -1.247717032874e-04  2.269439232710e-11
+AS G29  2024 02 09 00 10  0.000000  2   -6.073266263714e-04  1.327666066580e-11
+AS G30  2024 02 09 00 10  0.000000  2   -4.350363429954e-04  2.131942441540e-11
+AS G31  2024 02 09 00 10  0.000000  2   -2.277024601944e-04  1.328644624770e-11
+AS G32  2024 02 09 00 10  0.000000  2   -6.105557076344e-04  1.769249605350e-11
+"""
+
+# second dataset is a truncated version of file GFZ0OPSRAP_20240400000_01D_05M_CLK.CLK
+clk_test_data_truncated_gfz_rapid = b"""     3.00           C                                       RINEX VERSION / TYPE
+EPOS-8              GFZ                 20240210 084659 LCL PGM / RUN BY / DATE
+G                   IGS20_2290                              SYS / PCVS APPLIED
+R                   IGS20_2290                              SYS / PCVS APPLIED
+E                   IGS20_2290                              SYS / PCVS APPLIED
+     2    AS    AR                                          # / TYPES OF DATA
+GFZ  GeoForschungsZentrum Potsdam                           ANALYSIS CENTER
+     1                                                      # OF CLK REF
+hers 13212M007                                              ANALYSIS CLK REF
+Clocks are re-aligned to broadcast GPS time                 COMMENT
+   106    IGS20                                             # OF SOLN STA / TRF
+    76                                                      # OF SOLN SATS     
+G01 G03 G04 G05 G06 G07 G08 G09 G11 G12 G13 G14 G15 G16 G17 PRN LIST            
+G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31 G32 PRN LIST            
+                                                            END OF HEADER
+AS G01  2024  2  9  0  0  0.000000  1    0.168814651894E-03
+AS G03  2024  2  9  0  0  0.000000  1    0.180932601106E-03
+AS G04  2024  2  9  0  0  0.000000  1    0.286973891544E-03
+AS G05  2024  2  9  0  0  0.000000  1   -0.161085892335E-03
+AS G06  2024  2  9  0  0  0.000000  1    0.414444914814E-03
+AS G07  2024  2  9  0  0  0.000000  1   -0.573956222146E-04
+AS G08  2024  2  9  0  0  0.000000  1   -0.166103414490E-03
+AS G09  2024  2  9  0  0  0.000000  1    0.858440948432E-04
+AS G11  2024  2  9  0  0  0.000000  1   -0.569306849881E-03
+AS G12  2024  2  9  0  0  0.000000  1   -0.476108092760E-03
+AS G13  2024  2  9  0  0  0.000000  1    0.623766497081E-03
+AS G14  2024  2  9  0  0  0.000000  1    0.320732762823E-03
+AS G15  2024  2  9  0  0  0.000000  1    0.125739830446E-03
+AS G16  2024  2  9  0  0  0.000000  1   -0.366783267283E-03
+AS G17  2024  2  9  0  0  0.000000  1    0.717580032777E-03
+AS G18  2024  2  9  0  0  0.000000  1   -0.551347559124E-03
+AS G19  2024  2  9  0  0  0.000000  1    0.438027771926E-03
+AS G20  2024  2  9  0  0  0.000000  1    0.388220032766E-03
+AS G21  2024  2  9  0  0  0.000000  1    0.135610640004E-03
+AS G22  2024  2  9  0  0  0.000000  1    0.343567436171E-04
+AS G23  2024  2  9  0  0  0.000000  1    0.152184197035E-03
+AS G24  2024  2  9  0  0  0.000000  1   -0.428547681550E-03
+AS G25  2024  2  9  0  0  0.000000  1    0.487998893681E-03
+AS G26  2024  2  9  0  0  0.000000  1    0.192204859196E-03
+AS G27  2024  2  9  0  0  0.000000  1   -0.233548107527E-05
+AS G28  2024  2  9  0  0  0.000000  1   -0.124761891516E-03
+AS G29  2024  2  9  0  0  0.000000  1   -0.607324880608E-03
+AS G30  2024  2  9  0  0  0.000000  1   -0.435036821948E-03
+AS G31  2024  2  9  0  0  0.000000  1   -0.227700275843E-03
+AS G32  2024  2  9  0  0  0.000000  1   -0.610552286249E-03
+AS G01  2024  2  9  0  5  0.000000  1    0.168815104958E-03
+AS G03  2024  2  9  0  5  0.000000  1    0.180940007179E-03
+AS G04  2024  2  9  0  5  0.000000  1    0.286976700304E-03
+AS G05  2024  2  9  0  5  0.000000  1   -0.161085855226E-03
+AS G06  2024  2  9  0  5  0.000000  1    0.414440659587E-03
+AS G07  2024  2  9  0  5  0.000000  1   -0.573982543857E-04
+AS G08  2024  2  9  0  5  0.000000  1   -0.166103520113E-03
+AS G09  2024  2  9  0  5  0.000000  1    0.858479046287E-04
+AS G11  2024  2  9  0  5  0.000000  1   -0.569310763272E-03
+AS G12  2024  2  9  0  5  0.000000  1   -0.476109076224E-03
+AS G13  2024  2  9  0  5  0.000000  1    0.623767566604E-03
+AS G14  2024  2  9  0  5  0.000000  1    0.320735807379E-03
+AS G15  2024  2  9  0  5  0.000000  1    0.125740884191E-03
+AS G16  2024  2  9  0  5  0.000000  1   -0.366780941124E-03
+AS G17  2024  2  9  0  5  0.000000  1    0.717579368100E-03
+AS G18  2024  2  9  0  5  0.000000  1   -0.551350101767E-03
+AS G19  2024  2  9  0  5  0.000000  1    0.438029109401E-03
+AS G20  2024  2  9  0  5  0.000000  1    0.388219567194E-03
+AS G21  2024  2  9  0  5  0.000000  1    0.135609853577E-03
+AS G22  2024  2  9  0  5  0.000000  1    0.343547416486E-04
+AS G23  2024  2  9  0  5  0.000000  1    0.152186608520E-03
+AS G24  2024  2  9  0  5  0.000000  1   -0.428549602429E-03
+AS G25  2024  2  9  0  5  0.000000  1    0.487999269858E-03
+AS G26  2024  2  9  0  5  0.000000  1    0.192203688436E-03
+AS G27  2024  2  9  0  5  0.000000  1   -0.233648765509E-05
+AS G28  2024  2  9  0  5  0.000000  1   -0.124765599498E-03
+AS G29  2024  2  9  0  5  0.000000  1   -0.607324695831E-03
+AS G30  2024  2  9  0  5  0.000000  1   -0.435035370794E-03
+AS G31  2024  2  9  0  5  0.000000  1   -0.227700204710E-03
+AS G32  2024  2  9  0  5  0.000000  1   -0.610552937750E-03
+AS G01  2024  2  9  0 10  0.000000  1    0.168815582004E-03
+AS G03  2024  2  9  0 10  0.000000  1    0.180947385934E-03
+AS G04  2024  2  9  0 10  0.000000  1    0.286979576744E-03
+AS G05  2024  2  9  0 10  0.000000  1   -0.161086828549E-03
+AS G06  2024  2  9  0 10  0.000000  1    0.414436313230E-03
+AS G07  2024  2  9  0 10  0.000000  1   -0.574011509731E-04
+AS G08  2024  2  9  0 10  0.000000  1   -0.166104290396E-03
+AS G09  2024  2  9  0 10  0.000000  1    0.858516918987E-04
+AS G11  2024  2  9  0 10  0.000000  1   -0.569314690206E-03
+AS G12  2024  2  9  0 10  0.000000  1   -0.476110201152E-03
+AS G13  2024  2  9  0 10  0.000000  1    0.623768834597E-03
+AS G14  2024  2  9  0 10  0.000000  1    0.320738789259E-03
+AS G15  2024  2  9  0 10  0.000000  1    0.125742126934E-03
+AS G16  2024  2  9  0 10  0.000000  1   -0.366778408789E-03
+AS G17  2024  2  9  0 10  0.000000  1    0.717579222049E-03
+AS G18  2024  2  9  0 10  0.000000  1   -0.551352602614E-03
+AS G19  2024  2  9  0 10  0.000000  1    0.438030932112E-03
+AS G20  2024  2  9  0 10  0.000000  1    0.388219028071E-03
+AS G21  2024  2  9  0 10  0.000000  1    0.135609288169E-03
+AS G22  2024  2  9  0 10  0.000000  1    0.343530622033E-04
+AS G23  2024  2  9  0 10  0.000000  1    0.152189002292E-03
+AS G24  2024  2  9  0 10  0.000000  1   -0.428551485682E-03
+AS G25  2024  2  9  0 10  0.000000  1    0.487999638262E-03
+AS G26  2024  2  9  0 10  0.000000  1    0.192202552466E-03
+AS G27  2024  2  9  0 10  0.000000  1   -0.233749230024E-05
+AS G28  2024  2  9  0 10  0.000000  1   -0.124769351610E-03
+AS G29  2024  2  9  0 10  0.000000  1   -0.607324581735E-03
+AS G30  2024  2  9  0 10  0.000000  1   -0.435033874242E-03
+AS G31  2024  2  9  0 10  0.000000  1   -0.227700021100E-03
+AS G32  2024  2  9  0 10  0.000000  1   -0.610553573006E-03
+"""
diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py
index 117c825..da47071 100644
--- a/tests/test_datasets/sp3_test_data.py
+++ b/tests/test_datasets/sp3_test_data.py
@@ -1,6 +1,6 @@
 # Central record of SP3 test data sets to be shared across unit tests
 
-# dataset is part of the IGS benchmark (modified to include non null data on clock)
+# first dataset is part of the IGS benchmark (modified to include non null data on clock)
 sp3_test_data_igs_benchmark_null_clock = b"""#dV2007  4 12  0  0  0.00000000       2 ORBIT IGS14 BHN ESOC
 ## 1422 345600.00000000   900.00000000 54202 0.0000000000000
 +    2   G01G02  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
@@ -38,9 +38,10 @@
 VG01  26855.435366  -6704.236117  -3062.394499 999999.999999
 PG02  11149.555664  21314.099837  11331.977499 123456.999999
 VG02 -12578.915944  -7977.396362  26581.116225 999999.999999
-EOF"""
+EOF
+"""
 
-# second dataset a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3
+# second dataset is a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3
 sp3_test_data_truncated_cod_final = b"""#dP2024  7 19  0  0  0.00000000       2 d+D   IGS20 FIT AIUB
 ## 2323 432000.00000000   300.00000000 60510 0.0000000000000
 +   34   G01G02G03G04G05G06G07G08G09G10G11G12G13G14G15G16G17
diff --git a/tests/test_sp3.py b/tests/test_sp3.py
index ae54df4..17b8244 100644
--- a/tests/test_sp3.py
+++ b/tests/test_sp3.py
@@ -8,9 +8,9 @@
 import gnssanalysis.gn_io.sp3 as sp3
 
 from test_datasets.sp3_test_data import (
-    # dataset is part of the IGS benchmark (modified to include non null data on clock):
+    # first dataset is part of the IGS benchmark (modified to include non null data on clock):
     sp3_test_data_igs_benchmark_null_clock as input_data,
-    # second dataset a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3:
+    # second dataset is a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3:
     sp3_test_data_truncated_cod_final as input_data2,
 )
 

From eb89d6be0caf9269d69b1c929d4181d9784bf3a1 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Tue, 10 Sep 2024 07:01:09 +0000
Subject: [PATCH 2/4] NPI-3501 better empty input file checks: - path2bytes()
 now raises exceptions on all errors rather than returning None. It also
 raises an EOFError if the input data is empty. It has also been restructured
 slightly for clarity and type checking. - read_sp3() now accepts a Path or
 bytes input, and writes an empty string into the 'path' attribute if the
 input was passed as bytes.

---
 gnssanalysis/gn_io/common.py | 41 +++++++++++++++++++++++-------------
 gnssanalysis/gn_io/sp3.py    | 23 ++++++++++++++------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/gnssanalysis/gn_io/common.py b/gnssanalysis/gn_io/common.py
index abca0d2..f043fa2 100644
--- a/gnssanalysis/gn_io/common.py
+++ b/gnssanalysis/gn_io/common.py
@@ -15,32 +15,43 @@
 MB = 1024 * 1024
 
 
-def path2bytes(path: _Union[_Path, str, bytes]) -> bytes:
+def path2bytes(path_or_bytes: _Union[_Path, str, bytes]) -> bytes:
     """Main file reading function. Checks file extension and calls appropriate reading function.
     Passes through bytes if given, thus one may not routinely leave it in the top of the specific
      file reading function and be able to call it with bytes or str path without additional modifications.
 
     :param str path: input file path
     :return bytes: bytes object, decompressed if necessary
+    :raise FileNotFoundError: path didn't resolve to a file
+    :raise Exception: wrapped exception for all other exceptions raised
+    :raise EOFError: if input bytes is empty, input file is empty, or decompressed result of input file is empty.
     """
-    if isinstance(path, bytes):  # no reading is necessary - pass through.
-        return path
+    if isinstance(path_or_bytes, bytes):  # no reading is necessary - pass through.
+        if len(path_or_bytes) == 0:
+            raise EOFError("Input bytes object was empty!")
+        return path_or_bytes
+
+    if isinstance(path_or_bytes, _Path):
+        path_string = path_or_bytes.as_posix()
+    elif isinstance(path_or_bytes, str):
+        path_string = path_or_bytes
+    else:
+        raise TypeError("Must be Path, str, or bytes")
 
-    if isinstance(path, _Path):
-        path = path.as_posix()
     try:
-        if path.endswith(".Z"):
-            databytes = _lzw2bytes(path)
-        elif path.endswith(".gz"):
-            databytes = _gz2bytes(path)
+        if path_string.endswith(".Z"):
+            databytes = _lzw2bytes(path_string)
+        elif path_string.endswith(".gz"):
+            databytes = _gz2bytes(path_string)
         else:
-            databytes = _txt2bytes(path)
-    except FileNotFoundError:
-        _logging.error(f"File {path} not found. Returning empty bytes.")
-        return None
+            databytes = _txt2bytes(path_string)
+    except FileNotFoundError as fe:
+        raise fe
     except Exception as e:
-        _logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.")
-        return None
+        raise Exception(f"Error reading file '{path_string}'. Exception: {e}")
+
+    if len(databytes) == 0:
+        raise EOFError(f"Input file (or decompressed result of it) was empty. Path: '{path_string}'")
     return databytes
 
 
diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
index 82e4195..5ceaa37 100644
--- a/gnssanalysis/gn_io/sp3.py
+++ b/gnssanalysis/gn_io/sp3.py
@@ -2,7 +2,8 @@
 import io as _io
 import os as _os
 import re as _re
-from typing import Literal, Union, List, Tuple
+from typing import Literal, Optional, Union, List, Tuple
+from pathlib import Path
 
 import numpy as _np
 import pandas as _pd
@@ -238,7 +239,16 @@ def _process_sp3_block(
     return temp_sp3
 
 
-def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _pd.DataFrame:
+def description_for_path_or_bytes(path_or_bytes: Union[str, Path, bytes]) -> Optional[str]:
+    if isinstance(path_or_bytes, str) or isinstance(path_or_bytes, Path):
+        return str(path_or_bytes)
+    else:
+        return "Data passed as bytes: no path available"
+
+
+def read_sp3(
+    sp3_path_or_bytes: Union[str, Path, bytes], pOnly: bool = True, nodata_to_nan: bool = True
+) -> _pd.DataFrame:
     """Reads an SP3 file and returns the data as a pandas DataFrame.
 
 
@@ -247,7 +257,8 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
     :param bool nodata_to_nan: If True, converts 0.000000 (indicating nodata) to NaN in the SP3 POS column
             and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True.
     :return pandas.DataFrame: The SP3 data as a DataFrame.
-    :raise FileNotFoundError: If the SP3 file specified by sp3_path does not exist.
+    :raise FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist.
+    :raise Exception: For other errors reading SP3 file/bytes
 
     :note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data.
         This function reads the SP3 file, parses the header information, and extracts the data into a DataFrame.
@@ -256,7 +267,7 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
         (mm/ps) and remove unnecessary columns. If pOnly is True, only P* values are included in the DataFrame.
         If nodata_to_nan is True, nodata values in the SP3 POS and CLK columns are converted to NaN.
     """
-    content = _gn_io.common.path2bytes(str(sp3_path))
+    content = _gn_io.common.path2bytes(sp3_path_or_bytes)  # Will raise EOFError if file empty
 
     # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
     comments: list = _RE_SP3_COMMENT_STRIP.findall(content)
@@ -306,13 +317,13 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
         logging.warning(
             f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
             f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} "
-            f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
+            f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first."
         )
         # Now dedupe them, keeping the first of any clashes:
         sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]
     # Write header data to dataframe attributes:
     sp3_df.attrs["HEADER"] = parsed_header
-    sp3_df.attrs["path"] = sp3_path
+    sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else ""
     return sp3_df
 
 

From 20daa6d2b8c3221665a7bb758ac4278638c4e928 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Wed, 11 Sep 2024 06:11:08 +0000
Subject: [PATCH 3/4] NPI-3501 updated unit tests to more closely fit updated
 empty or invalid file exception handling

---
 tests/test_common.py | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/tests/test_common.py b/tests/test_common.py
index 51d379c..fc0a237 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -1,9 +1,7 @@
 import unittest
-from unittest.mock import patch, mock_open, MagicMock
-from pathlib import Path
-import logging
+from unittest.mock import patch
+from pyfakefs.fake_filesystem_unittest import TestCase
 
-# Assuming the function path2bytes is in a module named common
 from gnssanalysis.gn_io.common import path2bytes
 
 
@@ -34,19 +32,25 @@ def test_bytes_input(self):
         result = path2bytes(b"test data")
         self.assertEqual(result, b"test data")
 
-    @patch("gnssanalysis.gn_io.common._logging.error")
-    def test_file_not_found(self, mock_logging_error):
-        with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=FileNotFoundError):
-            print("testing path")
-            result = path2bytes("nonexistent.txt")
-            self.assertIsNone(result)
-            mock_logging_error.assert_called_once_with("File nonexistent.txt not found. Returning empty bytes.")
-
-    @patch("gnssanalysis.gn_io.common._logging.error")
-    def test_generic_exception(self, mock_logging_error):
-        with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=Exception("Generic error")):
-            result = path2bytes("test.txt")
-            self.assertIsNone(result)
-            mock_logging_error.assert_called_once_with(
-                "Error reading file test.txt with error Generic error. Returning empty bytes."
-            )
+
+class TestPath2BytesWithFakeFs(TestCase):
+    def setUp(self):
+        self.setUpPyfakefs()
+
+    def test_file_not_found_and_file_read(self):
+        # Create a file, but not the one we're looking for
+        self.fs.create_file("testfile.txt", contents=b"hello")
+        with self.assertRaises(FileNotFoundError):
+            path2bytes("nonexistent.txt")
+
+        # Now open the file that does exist and check the contents
+        self.assertEqual(path2bytes("testfile.txt"), b"hello")
+
+    def test_invalid_archive_expand_exception(self):
+        # Test that trying to unpack an archive file which isn't valid archive data, raises an exception
+        self.fs.create_file("invalidarchive.gz", contents=b"hello")
+        self.fs.create_file("invalidarchive.Z", contents=b"hello")
+        with self.assertRaises(Exception):
+            path2bytes("invalidarchive.gz")
+        with self.assertRaises(Exception):
+            path2bytes("invalidarchive.Z")

From a913e014f70cbce5e4308ff9e836a738b9ef659f Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Wed, 11 Sep 2024 06:27:17 +0000
Subject: [PATCH 4/4] NPI-3501 updated unit tests for path2bytes, adding a test
 for empty file exceptions (which was the initial motivation for this whole
 branch).

---
 tests/test_common.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_common.py b/tests/test_common.py
index fc0a237..01ccd82 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -38,7 +38,7 @@ def setUp(self):
         self.setUpPyfakefs()
 
     def test_file_not_found_and_file_read(self):
-        # Create a file, but not the one we're looking for
+        # Create a mock file, but not the one we're looking for
         self.fs.create_file("testfile.txt", contents=b"hello")
         with self.assertRaises(FileNotFoundError):
             path2bytes("nonexistent.txt")
@@ -46,6 +46,13 @@ def test_file_not_found_and_file_read(self):
         # Now open the file that does exist and check the contents
         self.assertEqual(path2bytes("testfile.txt"), b"hello")
 
+    def test_empty_file_exception(self):
+        # Create a mock empty file
+        self.fs.create_file("emptyfile.txt", contents=b"")
+        # We raise EOFError for empty files, and (valid) compressed files that expand to a zero-length output
+        with self.assertRaises(EOFError):
+            path2bytes("emptyfile.txt")
+
     def test_invalid_archive_expand_exception(self):
         # Test that trying to unpack an archive file which isn't valid archive data, raises an exception
         self.fs.create_file("invalidarchive.gz", contents=b"hello")