From 4870e14d57305885d3fb3d8f075ade635bef7c94 Mon Sep 17 00:00:00 2001 From: EugeneDu-GA Date: Fri, 6 Sep 2024 19:36:12 +1000 Subject: [PATCH 1/4] Fix "sv" norm type (#51) * Fix potential errors caused by overwrite "sv" norm type with sv prns * Ceaning * Rename variables * Fix non-existent "both" norm type * Name pipeline * Fix default norm_types for clk_difference() * Clock unit tests from Ron * Correct file names * gn_download fixing * minor * Trim down clk unit test data --------- Co-authored-by: Eugene_Du --- .github/workflows/python-cicd-units.yml | 2 + gnssanalysis/gn_diffaux.py | 75 +++----- gnssanalysis/gn_download.py | 13 +- gnssanalysis/gn_io/sp3.py | 2 +- gnssanalysis/gn_utils.py | 2 +- tests/test_clk.py | 68 +++++++ tests/test_datasets/clk_test_data.py | 225 ++++++++++++++++++++++++ tests/test_datasets/sp3_test_data.py | 7 +- tests/test_sp3.py | 4 +- 9 files changed, 339 insertions(+), 59 deletions(-) create mode 100644 tests/test_clk.py create mode 100644 tests/test_datasets/clk_test_data.py diff --git a/.github/workflows/python-cicd-units.yml b/.github/workflows/python-cicd-units.yml index 1a253f4..0551e76 100644 --- a/.github/workflows/python-cicd-units.yml +++ b/.github/workflows/python-cicd-units.yml @@ -10,6 +10,8 @@ jobs: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] + name: Build and Test on Python ${{ matrix.python-version }} + steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/gnssanalysis/gn_diffaux.py b/gnssanalysis/gn_diffaux.py index fc9ada6..b89579f 100644 --- a/gnssanalysis/gn_diffaux.py +++ b/gnssanalysis/gn_diffaux.py @@ -316,7 +316,7 @@ def compare_clk( :param _pd.DataFrame clk_a: clk dataframe 1 :param _pd.DataFrame clk_b: clk dataframe 2 - :param str norm_type: normalization to apply, defaults to "both" + :param str norm_types: normalization to apply, defaults to ["daily", "epoch"] :param _Union[_np.ndarray, _pd.Index, None] ext_dt: external datetime values to filter the clk dfs, defaults to None :param _Union[_np.ndarray, _pd.Index, None] ext_svs: external satellites to filter the clk dfs, defaults to None :raises ValueError: if no common epochs between clk_a and external datetime were found @@ -358,12 +358,13 @@ def compare_clk( _logging.debug("compare_clk: syncing clk_a_unst with common_svs as not equal") clk_a_unst = clk_a_unst[common_svs] - if len(norm_types) != 0: - _logging.info(f":_clk_compare:using {norm_types} clk normalization") - if "sv" in norm_types: - norm_types[norm_types.index("sv")] = _gn_io.clk.select_norm_svs_per_gnss( + norm_types_copy = norm_types.copy() # DO NOT overwrite norm_types otherwise it will cause errors when the function is called in a loop + if len(norm_types_copy) != 0: + _logging.info(f":compare_clk: using {norm_types_copy} clk normalization") + if "sv" in norm_types_copy: + norm_types_copy[norm_types_copy.index("sv")] = _gn_io.clk.select_norm_svs_per_gnss( clk_a_unst=clk_a_unst, clk_b_unst=clk_b_unst - ) + ) # get the svs to use for norm and overwrite "sv" with sv prns clk_a_unst[clk_b_unst.isna()] = ( _np.nan @@ -372,11 +373,10 @@ def compare_clk( _np.nan ) # replace corresponding values in clk_b_unst with NaN where clk_a_unst is NaN - # get the sv to use for norm and overwrite norm_type value with sv prn code _logging.info("---removing common mode from clk 1---") - _gn_io.clk.rm_clk_bias(clk_a_unst, norm_types=norm_types) + _gn_io.clk.rm_clk_bias(clk_a_unst, norm_types=norm_types_copy) _logging.info("---removing common mode from clk 2---") - _gn_io.clk.rm_clk_bias(clk_b_unst, norm_types=norm_types) + _gn_io.clk.rm_clk_bias(clk_b_unst, norm_types=norm_types_copy) return clk_a_unst - clk_b_unst @@ -385,7 +385,7 @@ def sisre( sp3_b: _pd.DataFrame, clk_a: Union[_pd.DataFrame, None] = None, clk_b: Union[_pd.DataFrame, None] = None, - norm_type: str = "both", + norm_types: list = ["daily", "epoch"], output_mode: str = "rms", clean: bool = True, cutoff: Union[int, float, None] = None, @@ -422,8 +422,8 @@ def sisre( Output of read_clk function or a similar clk DataFrame. clk_b : clk DataFrame b (optional) Output of read_clk function or a similar clk DataFrame. - norm_type : str - a norm_type parameter used for the clk values normalisations before + norm_types : list + normalization parameter used for removing the clk common modes before differencing. output_mode : str controls at what stage to output SISRE @@ -475,7 +475,7 @@ def sisre( if (clk_a is not None) & (clk_b is not None): # check if clk data is present clk_diff = ( compare_clk( - clk_a, clk_b, norm_types=norm_type, ext_dt=rac_unstack.index, ext_svs=rac_unstack.columns.levels[1] + clk_a, clk_b, norm_types=norm_types, ext_dt=rac_unstack.index, ext_svs=rac_unstack.columns.levels[1] ) * _gn_const.C_LIGHT ) # units are meters @@ -540,7 +540,7 @@ def diffsp3( plot=False, write_rac_file=False, ): - # Eugene: function name and description are confusing - it seems to output the SISRE instead of SP3 orbit/clock differences against the given tolerance + # TODO: change function name and description as both are confusing - it seems to output the SISRE instead of SP3 orbit/clock differences against the given tolerance """Compares two sp3 files and outputs a dataframe of differences above tolerance if such were found""" sp3_a, sp3_b = _gn_io.sp3.read_sp3(sp3_a_path, nodata_to_nan=nodata_to_nan), _gn_io.sp3.read_sp3( sp3_b_path, nodata_to_nan=nodata_to_nan @@ -553,23 +553,23 @@ def diffsp3( as_sisre = True status = 0 - diff_rac = sisre( + sv_sisre = sisre( sp3_a=sp3_a.iloc[:, :3], sp3_b=sp3_b.iloc[:, :3], clk_a=clk_a, clk_b=clk_b, - norm_type="both", + norm_types=["daily", "epoch"], output_mode="sv", clean=False, hlm_mode=hlm_mode, plot=plot, write_rac_file=write_rac_file, - ) # Eugene: sisre() returns SISRE instead of RAC differences + ) - bad_rac_vals = _diff2msg(diff_rac, tol=tol) - if bad_rac_vals is not None: + bad_sisre_vals = _diff2msg(sv_sisre, tol=tol) + if bad_sisre_vals is not None: _logging.log( - msg=f':diffutil found {"SISRE values" if as_sisre else "estimates"} estimates diffs above {"the extracted STDs" if tol is None else f"{tol:.1E} tolerance"}:\n{bad_rac_vals.to_string(justify="center")}\n', + msg=f':diffutil found {"SISRE values" if as_sisre else "estimates"} diffs above {"the extracted STDs" if tol is None else f"{tol:.1E} tolerance"}:\n{bad_sisre_vals.to_string(justify="center")}\n', level=log_lvl, ) status = -1 @@ -624,12 +624,12 @@ def diffblq(blq_a_path, blq_b_path, tol, log_lvl): return status -def diffclk(clk_a_path, clk_b_path, tol, log_lvl, norm_type="both"): +def diffclk(clk_a_path, clk_b_path, tol, log_lvl, norm_types=["daily", "epoch"]): """Compares two clk files and provides a difference above atol if present. If sp3 orbits provided - does analysis using the SISRE values""" clk_a, clk_b = _gn_io.clk.read_clk(clk_a_path), _gn_io.clk.read_clk(clk_b_path) status = 0 - diff_clk = compare_clk(clk_a=clk_a, clk_b=clk_b, norm_types=norm_type) * _gn_const.C_LIGHT + diff_clk = compare_clk(clk_a=clk_a, clk_b=clk_b, norm_types=norm_types) * _gn_const.C_LIGHT bad_clk_vals = _diff2msg(diff_clk, tol=tol) if bad_clk_vals is not None: @@ -698,12 +698,10 @@ def format_index( :param _pd.DataFrame diff_df: The Pandas DataFrame containing SP3 or CLK differences :return None """ - # Convert the epoch indices from J2000 seconds to python datetimes diff_df.index = _pd.MultiIndex.from_tuples( ((idx[0] + _gn_const.J2000_ORIGIN, idx[1]) for idx in diff_df.index.values) ) - # Rename the indices diff_df.index = diff_df.index.set_names(["Epoch", "Satellite"]) @@ -722,25 +720,15 @@ def sp3_difference( base_sp3_df = _gn_io.sp3.read_sp3(str(base_sp3_file)) test_sp3_df = _gn_io.sp3.read_sp3(str(test_sp3_file)) - # Select rows with matching indices and calculate XYZ differences (ECEF) common_indices = base_sp3_df.index.intersection(test_sp3_df.index) diff_est_df = test_sp3_df.loc[common_indices, "EST"] - base_sp3_df.loc[common_indices, "EST"] - # Extract clocks and change the units from ms to ns (read_sp3 will result in sp3 units (ms)) - # TODO: normalise clocks - diff_clk_df = diff_est_df["CLK"].to_frame(name="CLK") * 1e3 - - # Drop clocks and then change the units from km to m (read_sp3 will result in sp3 units (km)) + diff_clk_df = diff_est_df["CLK"].to_frame(name="CLK") * 1e3 # TODO: normalise clocks diff_xyz_df = diff_est_df.drop(columns=["CLK"]) * 1e3 + diff_rac_df = _gn_io.sp3.diff_sp3_rac(base_sp3_df, test_sp3_df, hlm_mode=None) # TODO: hlm_mode - # RAC difference - # TODO: hlm_mode - diff_rac_df = _gn_io.sp3.diff_sp3_rac(base_sp3_df, test_sp3_df, hlm_mode=None) - - # Drop the not-particularly needed 'EST_RAC' multi-index level diff_rac_df.columns = diff_rac_df.columns.droplevel(0) - # Change the units from km to m (diff_sp3_rac will result in sp3 units (km)) diff_rac_df = diff_rac_df * 1e3 diff_sp3_df = diff_xyz_df.join(diff_rac_df) @@ -748,8 +736,6 @@ def sp3_difference( diff_sp3_df["Clock"] = diff_clk_df diff_sp3_df["|Clock|"] = diff_clk_df.abs() - # Change the epoch indices from J2000 seconds to more readable python datetimes - # and rename the indices properly format_index(diff_sp3_df) return diff_sp3_df @@ -758,7 +744,7 @@ def sp3_difference( def clk_difference( base_clk_file: _Path, test_clk_file: _Path, - norm_types: list[str], + norm_types: list = [], ) -> _pd.DataFrame: """ Compare two CLK files to calculate clock differences with common mode removed (if specified) @@ -766,8 +752,8 @@ def clk_difference( :param _Path base_clk_file: Path of the baseline CLK file :param _Path test_clk_file: Path of the test CLK file - :param norm_types list[str]: Normalizations to apply. Available options include 'epoch', 'daily', 'sv', - any satellite PRN, or any combination of them, defaults to None + :param norm_types list: Normalizations to apply. Available options include 'epoch', 'daily', 'sv', + any satellite PRN, or any combination of them, defaults to empty list :return _pd.DataFrame: The Pandas DataFrame containing clock differences """ base_clk_df = _gn_io.clk.read_clk(base_clk_file) @@ -775,13 +761,9 @@ def clk_difference( diff_clk_df = compare_clk(test_clk_df, base_clk_df, norm_types=norm_types) - # Stack diff_clk_df to keep the format consistent with other dataframes (compare_clk() returns unstacked dataframe) - # and change the units from s to ns (read_clk() and compare_clk() will result in clk units (s)) diff_clk_df = diff_clk_df.stack(dropna=False).to_frame(name="Clock") * 1e9 diff_clk_df["|Clock|"] = diff_clk_df.abs() - # Change the epoch indices from J2000 seconds to more readable python datetimes - # and rename the indices properly format_index(diff_clk_df) return diff_clk_df @@ -796,12 +778,10 @@ def difference_statistics( :param _pd.DataFrame diff_df: The Pandas DataFrame containing SP3 or CLK differences :return _pd.DataFrame: The Pandas DataFrame containing statistics of SP3 or CLK differences """ - # Statistics of all satellites stats_df = diff_df.describe(percentiles=[0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95]) stats_df.loc["rms"] = _gn_aux.rms(diff_df) stats_df.index = _pd.MultiIndex.from_tuples((("All", idx) for idx in stats_df.index.values)) - # Statistics satellite-by-satellite stats_sat = ( diff_df.groupby("Satellite") .describe(percentiles=[0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95]) @@ -810,7 +790,6 @@ def difference_statistics( rms_sat = _gn_aux.rms(diff_df, level="Satellite") rms_sat.index = _pd.MultiIndex.from_tuples(((sv, "rms") for sv in rms_sat.index.values)) - # Merge above dataframes, rename the indices properly and re-arrange the statistics stats_df = _pd.concat([stats_df, stats_sat, rms_sat]).sort_index() stats_df.index = stats_df.index.set_names(["Satellite", "Stats"]) stats_df = stats_df.reindex( diff --git a/gnssanalysis/gn_download.py b/gnssanalysis/gn_download.py index 87808f9..e3a8196 100644 --- a/gnssanalysis/gn_download.py +++ b/gnssanalysis/gn_download.py @@ -263,6 +263,11 @@ def generate_sampling_rate(file_ext: str, analysis_center: str, solution_type: s """ IGS files following the long filename convention require a content specifier Given the file extension, generate the content specifier + + :param str file_ext: 3-char file extention of the file (e.g. SP3, SNX, ERP, etc) + :param str analysis_center: 3-char string identifier for Analysis Center + :param str solution_type: 3-char string identifier for Solution Type of file + :return str: 3-char string identifier for Sampling Rate of the file (e.g. 15M) """ file_ext = file_ext.upper() sampling_rates = { @@ -294,9 +299,7 @@ def generate_sampling_rate(file_ext: str, analysis_center: str, solution_type: s center_rates = file_rates.get(key, file_rates.get(())) center_rates_found = True break - # else: - # return file_rates.get(()) - if not center_rates_found: # DZ: bug fix + if not center_rates_found: return file_rates.get(()) if isinstance(center_rates, dict): return center_rates.get(solution_type, center_rates.get(None)) @@ -414,7 +417,9 @@ def generate_product_filename( product_filename = f"igs{gps_date.yr[2:]}P{gps_date.gpswk}.snx.Z" else: hour = f"{reference_start.hour:02}" - product_filename = f"igu{gps_date.gpswkD}_{hour}.{file_ext}.Z" + prefix = "igs" if solution_type == "FIN" else "igr" if solution_type == "RAP" else "igu" + product_filename = f"{prefix}{gps_date.gpswkD}_{hour}.{file_ext}.Z" if solution_type == "ULT" else \ + f"{prefix}{gps_date.gpswkD}.{file_ext}.Z" return product_filename, gps_date, reference_start diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 63b7fe7..82e4195 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -812,7 +812,7 @@ def sp3_hlm_trans(a: _pd.DataFrame, b: _pd.DataFrame) -> tuple[_pd.DataFrame, li return b, hlm -# Eugene: move to gn_diffaux.py (and other associated functions as well)? +# TODO: move to gn_diffaux.py (and other associated functions as well)? def diff_sp3_rac( sp3_baseline: _pd.DataFrame, sp3_test: _pd.DataFrame, diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py index 946c6f4..7eba4a1 100644 --- a/gnssanalysis/gn_utils.py +++ b/gnssanalysis/gn_utils.py @@ -240,7 +240,7 @@ def clk(ctx, norm): clk_b_path=ctx.parent.params["input"][1], tol=ctx.parent.params["atol"], log_lvl=ctx.parent.params["log_lvl"], - norm_type=norm, + norm_types=norm, ) diffutil_verify_status(status=status, passthrough=ctx.parent.params["passthrough"]) diff --git a/tests/test_clk.py b/tests/test_clk.py new file mode 100644 index 0000000..bd9cbeb --- /dev/null +++ b/tests/test_clk.py @@ -0,0 +1,68 @@ +from pyfakefs.fake_filesystem_unittest import TestCase + +import numpy as np +import pandas as pd + +import gnssanalysis.gn_io.clk as clk +import gnssanalysis.gn_diffaux as gn_diffaux + +from test_datasets.clk_test_data import ( + # first dataset is a truncated version of file IGS0OPSRAP_20240400000_01D_05M_CLK.CLK: + clk_test_data_truncated_igs_rapid as input_data_igs, + # second dataset is a truncated version of file GFZ0OPSRAP_20240400000_01D_05M_CLK.CLK: + clk_test_data_truncated_gfz_rapid as input_data_gfz, +) + + +class TestClk(TestCase): + def setUp(self): + self.setUpPyfakefs() + + def test_clk_read(self): + file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"] + self.fs.create_file(file_paths[0], contents=input_data_igs) + self.fs.create_file(file_paths[1], contents=input_data_gfz) + + clk_df_igs = clk.read_clk(clk_path=file_paths[0]) + clk_df_gfz = clk.read_clk(clk_path=file_paths[1]) + + self.assertEqual(len(clk_df_igs), 93, msg="Check that data generally read into df as expected") + self.assertEqual(len(clk_df_gfz), 90, msg="Check that data generally read into df as expected") + self.assertEqual(clk_df_igs.index[0][1], 760708800, msg="Check that first epoch is expressed correctly") + self.assertEqual(clk_df_gfz.index[0][1], 760708800, msg="Check that first epoch is expressed correctly") + self.assertEqual(clk_df_igs["EST"].iloc[0], 0.0001688124131169, msg="Check first datapoint is correct") + self.assertEqual(clk_df_gfz["EST"].iloc[0], 0.000168814651894, msg="Check first datapoint is correct") + self.assertEqual(clk_df_igs["EST"].iloc[-1], -0.0006105557076344, msg="Check last datapoint is correct") + self.assertEqual(clk_df_gfz["EST"].iloc[-1], -0.000610553573006, msg="Check last datapoint is correct") + + def test_compare_clk(self): + file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"] + self.fs.create_file(file_paths[0], contents=input_data_igs) + self.fs.create_file(file_paths[1], contents=input_data_gfz) + + clk_df_igs = clk.read_clk(clk_path=file_paths[0]) + clk_df_gfz = clk.read_clk(clk_path=file_paths[1]) + + result_default = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz) + result_daily_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily"]) + result_epoch_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["epoch"]) + result_sv_only = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["sv"]) # G01 ref + result_G06 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["G06"]) + result_daily_epoch_G04 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily", "epoch", "G04"]) + result_epoch_G07 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["epoch", "G07"]) + result_daily_G08 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["daily", "G08"]) + result_G09_G11 = gn_diffaux.compare_clk(clk_a=clk_df_igs, clk_b=clk_df_gfz, norm_types=["G09", "G11"]) + + # Test index is as expected + self.assertEqual(result_default.index[0], 760708800) + # Test that a sample value is as expected from each result above + self.assertEqual(result_default["G01"].iloc[0], -4.56406886282918e-12, msg="Check datapoint is correct") + self.assertEqual(result_daily_only["G03"].iloc[0], 2.9891233314493365e-11, msg="Check datapoint is correct") + self.assertEqual(result_epoch_only["G04"].iloc[0], 2.7128617820053325e-12, msg="Check datapoint is correct") + self.assertEqual(result_sv_only["G05"].iloc[0], 1.1623200004470119e-10, msg="Check datapoint is correct") + self.assertEqual(result_G06["G06"].iloc[0], 0.0, msg="Check datapoint is correct") + self.assertEqual(result_daily_epoch_G04["G07"].iloc[0], 1.3071733365871419e-11, msg="Check datapoint is correct") + self.assertEqual(result_epoch_G07["G08"].iloc[0], -3.3217389966032004e-11, msg="Check datapoint is correct") + self.assertEqual(result_daily_G08["G09"].iloc[-1], 1.3818666534399365e-12, msg="Check datapoint is correct") + self.assertEqual(result_G09_G11["G11"].iloc[-1], 0.0, msg="Check datapoint is correct") + self.assertEqual(result_G09_G11["G01"].iloc[-1], 8.94520000606358e-11, msg="Check datapoint is correct") diff --git a/tests/test_datasets/clk_test_data.py b/tests/test_datasets/clk_test_data.py new file mode 100644 index 0000000..e35dfd4 --- /dev/null +++ b/tests/test_datasets/clk_test_data.py @@ -0,0 +1,225 @@ +# Central record of CLK test data sets to be shared across unit tests + +# first dataset is a truncated version of file IGS0OPSRAP_20240400000_01D_05M_CLK.CLK +clk_test_data_truncated_igs_rapid = b""" 3.00 C RINEX VERSION / TYPE +CCLOCK IGSACC @ GA MIT PGM / RUN BY / DATE +GPS week: 2300 Day: 5 MJD: 60349 COMMENT +THE COMBINED CLOCKS ARE A WEIGHTED AVERAGE OF: COMMENT + cod emr esa gfz grg jgx jpl whu COMMENT +THE FOLLOWING REFERENCE CLOCKS WERE USED BY ACs: COMMENT + WTZZ SPT0 HERS BRUX COMMENT +THE COMBINED CLOCKS ARE ALIGNED TO GPS TIME COMMENT +USING THE SATELLITE BROADCAST EPHEMERIDES COMMENT +All clocks have been re-aligned to the IGS time scale: IGST COMMENT + 18 LEAP SECONDS + 2 AR AS # / TYPES OF DATA +IGS IGSACC @ GA MIT ANALYSIS CENTER + 167 IGS20 : IGS REALIZATION of THE ITRF2020 # OF SOLN STA / TRF + 31 # OF SOLN SATS +G01 G03 G04 G05 G06 G07 G08 G09 G10 G11 G12 G13 G14 G15 G16 PRN LIST +G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31 PRN LIST +G32 PRN LIST +G igs20_2290.atx SYS / PCVS APPLIED + END OF HEADER +AS G01 2024 02 09 00 00 0.000000 2 1.688124131169e-04 2.097025617540e-11 +AS G03 2024 02 09 00 00 0.000000 2 1.809304282459e-04 1.229880405750e-11 +AS G04 2024 02 09 00 00 0.000000 2 2.869716233829e-04 1.460958644100e-11 +AS G05 2024 02 09 00 00 0.000000 2 -1.610880148801e-04 7.395050892370e-12 +AS G06 2024 02 09 00 00 0.000000 2 4.144426103639e-04 1.724965837880e-11 +AS G07 2024 02 09 00 00 0.000000 2 -5.739786238431e-05 2.102718805790e-11 +AS G08 2024 02 09 00 00 0.000000 2 -1.661056878771e-04 2.255511735470e-11 +AS G09 2024 02 09 00 00 0.000000 2 8.584180025149e-05 1.089214113860e-11 +AS G10 2024 02 09 00 00 0.000000 2 -7.256635885751e-05 1.050451294700e-11 +AS G11 2024 02 09 00 00 0.000000 2 -5.693091709181e-04 1.469189346800e-11 +AS G12 2024 02 09 00 00 0.000000 2 -4.761104802781e-04 2.357304357880e-11 +AS G13 2024 02 09 00 00 0.000000 2 6.237638687799e-04 1.301051748320e-11 +AS G14 2024 02 09 00 00 0.000000 2 3.207305758959e-04 1.300550892390e-11 +AS G15 2024 02 09 00 00 0.000000 2 1.257376231969e-04 1.518279956850e-11 +AS G16 2024 02 09 00 00 0.000000 2 -3.667854895481e-04 1.348413222630e-11 +AS G17 2024 02 09 00 00 0.000000 2 7.175777924719e-04 9.421809457670e-12 +AS G18 2024 02 09 00 00 0.000000 2 -5.513497351141e-04 1.869080022320e-11 +AS G19 2024 02 09 00 00 0.000000 2 4.380255308439e-04 1.016437994000e-11 +AS G20 2024 02 09 00 00 0.000000 2 3.882179753799e-04 1.754597986330e-11 +AS G21 2024 02 09 00 00 0.000000 2 1.356084322799e-04 1.613029373800e-11 +AS G22 2024 02 09 00 00 0.000000 2 3.435437085039e-05 2.197525401160e-11 +AS G23 2024 02 09 00 00 0.000000 2 1.521818975999e-04 1.217606955280e-11 +AS G24 2024 02 09 00 00 0.000000 2 -4.285499499451e-04 1.714714182710e-11 +AS G25 2024 02 09 00 00 0.000000 2 4.879964575919e-04 1.892067226400e-11 +AS G26 2024 02 09 00 00 0.000000 2 1.922024746019e-04 1.347356913720e-11 +AS G27 2024 02 09 00 00 0.000000 2 -2.337884902394e-06 2.310856509870e-11 +AS G28 2024 02 09 00 00 0.000000 2 -1.247641780951e-04 1.999656975110e-11 +AS G29 2024 02 09 00 00 0.000000 2 -6.073268762641e-04 1.642966288160e-11 +AS G30 2024 02 09 00 00 0.000000 2 -4.350392467851e-04 2.074406448570e-11 +AS G31 2024 02 09 00 00 0.000000 2 -2.277026597181e-04 1.558513789220e-11 +AS G32 2024 02 09 00 00 0.000000 2 -6.105543656861e-04 1.685238085940e-11 +AS G01 2024 02 09 00 05 0.000000 2 1.688128312935e-04 2.183228068350e-11 +AS G03 2024 02 09 00 05 0.000000 2 1.809377943265e-04 1.316687645420e-11 +AS G04 2024 02 09 00 05 0.000000 2 2.869744013475e-04 1.448271901610e-11 +AS G05 2024 02 09 00 05 0.000000 2 -1.610880212345e-04 9.745531722440e-12 +AS G06 2024 02 09 00 05 0.000000 2 4.144383148835e-04 1.803286586970e-11 +AS G07 2024 02 09 00 05 0.000000 2 -5.740054013711e-05 1.787787183840e-11 +AS G08 2024 02 09 00 05 0.000000 2 -1.661058376965e-04 2.237565129570e-11 +AS G09 2024 02 09 00 05 0.000000 2 8.584556503899e-05 9.254939884070e-12 +AS G10 2024 02 09 00 05 0.000000 2 -7.256702704761e-05 1.492476011780e-11 +AS G11 2024 02 09 00 05 0.000000 2 -5.693131241975e-04 1.410337662760e-11 +AS G12 2024 02 09 00 05 0.000000 2 -4.761115105725e-04 2.766622739080e-11 +AS G13 2024 02 09 00 05 0.000000 2 6.237648874265e-04 1.475166420370e-11 +AS G14 2024 02 09 00 05 0.000000 2 3.207335717595e-04 1.317071290000e-11 +AS G15 2024 02 09 00 05 0.000000 2 1.257386270025e-04 1.254167262310e-11 +AS G16 2024 02 09 00 05 0.000000 2 -3.667832061525e-04 1.192672389380e-11 +AS G17 2024 02 09 00 05 0.000000 2 7.175770822795e-04 1.141324846150e-11 +AS G18 2024 02 09 00 05 0.000000 2 -5.513523135945e-04 1.699128083780e-11 +AS G19 2024 02 09 00 05 0.000000 2 4.380268257785e-04 1.236597184980e-11 +AS G20 2024 02 09 00 05 0.000000 2 3.882174650515e-04 1.815301870810e-11 +AS G21 2024 02 09 00 05 0.000000 2 1.356076124015e-04 1.324359703490e-11 +AS G22 2024 02 09 00 05 0.000000 2 3.435233526109e-05 2.071439928490e-11 +AS G23 2024 02 09 00 05 0.000000 2 1.521842628355e-04 1.327800505520e-11 +AS G24 2024 02 09 00 05 0.000000 2 -4.285519114115e-04 1.753143636370e-11 +AS G25 2024 02 09 00 05 0.000000 2 4.879967964745e-04 1.942436173690e-11 +AS G26 2024 02 09 00 05 0.000000 2 1.922012622905e-04 1.154892968300e-11 +AS G27 2024 02 09 00 05 0.000000 2 -2.338940953886e-06 2.017085887470e-11 +AS G28 2024 02 09 00 05 0.000000 2 -1.247679288505e-04 2.153582863610e-11 +AS G29 2024 02 09 00 05 0.000000 2 -6.073267320995e-04 1.203197551100e-11 +AS G30 2024 02 09 00 05 0.000000 2 -4.350378236175e-04 2.295738977250e-11 +AS G31 2024 02 09 00 05 0.000000 2 -2.277026272105e-04 1.616281104400e-11 +AS G32 2024 02 09 00 05 0.000000 2 -6.105550569795e-04 1.903361214150e-11 +AS G01 2024 02 09 00 10 0.000000 2 1.688132948826e-04 1.730670533670e-11 +AS G03 2024 02 09 00 10 0.000000 2 1.809451633926e-04 1.365309202660e-11 +AS G04 2024 02 09 00 10 0.000000 2 2.869772769706e-04 1.133122712990e-11 +AS G05 2024 02 09 00 10 0.000000 2 -1.610890036934e-04 9.893279329870e-12 +AS G06 2024 02 09 00 10 0.000000 2 4.144339562696e-04 1.819848717220e-11 +AS G07 2024 02 09 00 10 0.000000 2 -5.740344718401e-05 1.793256650230e-11 +AS G08 2024 02 09 00 10 0.000000 2 -1.661066239514e-04 2.176186663680e-11 +AS G09 2024 02 09 00 10 0.000000 2 8.584933881069e-05 9.448736847670e-12 +AS G10 2024 02 09 00 10 0.000000 2 -7.256745815031e-05 1.932891946120e-11 +AS G11 2024 02 09 00 10 0.000000 2 -5.693170667794e-04 1.426900878410e-11 +AS G12 2024 02 09 00 10 0.000000 2 -4.761126549144e-04 2.704991834810e-11 +AS G13 2024 02 09 00 10 0.000000 2 6.237661387716e-04 1.168442092860e-11 +AS G14 2024 02 09 00 10 0.000000 2 3.207365520786e-04 1.358704258710e-11 +AS G15 2024 02 09 00 10 0.000000 2 1.257398561696e-04 1.865821565060e-11 +AS G16 2024 02 09 00 10 0.000000 2 -3.667806877484e-04 1.237122602500e-11 +AS G17 2024 02 09 00 10 0.000000 2 7.175769202416e-04 9.277588571660e-12 +AS G18 2024 02 09 00 10 0.000000 2 -5.513548362054e-04 1.845309765310e-11 +AS G19 2024 02 09 00 10 0.000000 2 4.380286366646e-04 1.115224585830e-11 +AS G20 2024 02 09 00 10 0.000000 2 3.882169040036e-04 1.786689200450e-11 +AS G21 2024 02 09 00 10 0.000000 2 1.356070408826e-04 1.510782760510e-11 +AS G22 2024 02 09 00 10 0.000000 2 3.435064314339e-05 1.624620391460e-11 +AS G23 2024 02 09 00 10 0.000000 2 1.521866425836e-04 1.484694641530e-11 +AS G24 2024 02 09 00 10 0.000000 2 -4.285538095454e-04 1.946970867170e-11 +AS G25 2024 02 09 00 10 0.000000 2 4.879971448136e-04 1.912208925110e-11 +AS G26 2024 02 09 00 10 0.000000 2 1.922001053916e-04 1.020741644160e-11 +AS G27 2024 02 09 00 10 0.000000 2 -2.339960806488e-06 1.975539775450e-11 +AS G28 2024 02 09 00 10 0.000000 2 -1.247717032874e-04 2.269439232710e-11 +AS G29 2024 02 09 00 10 0.000000 2 -6.073266263714e-04 1.327666066580e-11 +AS G30 2024 02 09 00 10 0.000000 2 -4.350363429954e-04 2.131942441540e-11 +AS G31 2024 02 09 00 10 0.000000 2 -2.277024601944e-04 1.328644624770e-11 +AS G32 2024 02 09 00 10 0.000000 2 -6.105557076344e-04 1.769249605350e-11 +""" + +# second dataset is a truncated version of file GFZ0OPSRAP_20240400000_01D_05M_CLK.CLK +clk_test_data_truncated_gfz_rapid = b""" 3.00 C RINEX VERSION / TYPE +EPOS-8 GFZ 20240210 084659 LCL PGM / RUN BY / DATE +G IGS20_2290 SYS / PCVS APPLIED +R IGS20_2290 SYS / PCVS APPLIED +E IGS20_2290 SYS / PCVS APPLIED + 2 AS AR # / TYPES OF DATA +GFZ GeoForschungsZentrum Potsdam ANALYSIS CENTER + 1 # OF CLK REF +hers 13212M007 ANALYSIS CLK REF +Clocks are re-aligned to broadcast GPS time COMMENT + 106 IGS20 # OF SOLN STA / TRF + 76 # OF SOLN SATS +G01 G03 G04 G05 G06 G07 G08 G09 G11 G12 G13 G14 G15 G16 G17 PRN LIST +G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31 G32 PRN LIST + END OF HEADER +AS G01 2024 2 9 0 0 0.000000 1 0.168814651894E-03 +AS G03 2024 2 9 0 0 0.000000 1 0.180932601106E-03 +AS G04 2024 2 9 0 0 0.000000 1 0.286973891544E-03 +AS G05 2024 2 9 0 0 0.000000 1 -0.161085892335E-03 +AS G06 2024 2 9 0 0 0.000000 1 0.414444914814E-03 +AS G07 2024 2 9 0 0 0.000000 1 -0.573956222146E-04 +AS G08 2024 2 9 0 0 0.000000 1 -0.166103414490E-03 +AS G09 2024 2 9 0 0 0.000000 1 0.858440948432E-04 +AS G11 2024 2 9 0 0 0.000000 1 -0.569306849881E-03 +AS G12 2024 2 9 0 0 0.000000 1 -0.476108092760E-03 +AS G13 2024 2 9 0 0 0.000000 1 0.623766497081E-03 +AS G14 2024 2 9 0 0 0.000000 1 0.320732762823E-03 +AS G15 2024 2 9 0 0 0.000000 1 0.125739830446E-03 +AS G16 2024 2 9 0 0 0.000000 1 -0.366783267283E-03 +AS G17 2024 2 9 0 0 0.000000 1 0.717580032777E-03 +AS G18 2024 2 9 0 0 0.000000 1 -0.551347559124E-03 +AS G19 2024 2 9 0 0 0.000000 1 0.438027771926E-03 +AS G20 2024 2 9 0 0 0.000000 1 0.388220032766E-03 +AS G21 2024 2 9 0 0 0.000000 1 0.135610640004E-03 +AS G22 2024 2 9 0 0 0.000000 1 0.343567436171E-04 +AS G23 2024 2 9 0 0 0.000000 1 0.152184197035E-03 +AS G24 2024 2 9 0 0 0.000000 1 -0.428547681550E-03 +AS G25 2024 2 9 0 0 0.000000 1 0.487998893681E-03 +AS G26 2024 2 9 0 0 0.000000 1 0.192204859196E-03 +AS G27 2024 2 9 0 0 0.000000 1 -0.233548107527E-05 +AS G28 2024 2 9 0 0 0.000000 1 -0.124761891516E-03 +AS G29 2024 2 9 0 0 0.000000 1 -0.607324880608E-03 +AS G30 2024 2 9 0 0 0.000000 1 -0.435036821948E-03 +AS G31 2024 2 9 0 0 0.000000 1 -0.227700275843E-03 +AS G32 2024 2 9 0 0 0.000000 1 -0.610552286249E-03 +AS G01 2024 2 9 0 5 0.000000 1 0.168815104958E-03 +AS G03 2024 2 9 0 5 0.000000 1 0.180940007179E-03 +AS G04 2024 2 9 0 5 0.000000 1 0.286976700304E-03 +AS G05 2024 2 9 0 5 0.000000 1 -0.161085855226E-03 +AS G06 2024 2 9 0 5 0.000000 1 0.414440659587E-03 +AS G07 2024 2 9 0 5 0.000000 1 -0.573982543857E-04 +AS G08 2024 2 9 0 5 0.000000 1 -0.166103520113E-03 +AS G09 2024 2 9 0 5 0.000000 1 0.858479046287E-04 +AS G11 2024 2 9 0 5 0.000000 1 -0.569310763272E-03 +AS G12 2024 2 9 0 5 0.000000 1 -0.476109076224E-03 +AS G13 2024 2 9 0 5 0.000000 1 0.623767566604E-03 +AS G14 2024 2 9 0 5 0.000000 1 0.320735807379E-03 +AS G15 2024 2 9 0 5 0.000000 1 0.125740884191E-03 +AS G16 2024 2 9 0 5 0.000000 1 -0.366780941124E-03 +AS G17 2024 2 9 0 5 0.000000 1 0.717579368100E-03 +AS G18 2024 2 9 0 5 0.000000 1 -0.551350101767E-03 +AS G19 2024 2 9 0 5 0.000000 1 0.438029109401E-03 +AS G20 2024 2 9 0 5 0.000000 1 0.388219567194E-03 +AS G21 2024 2 9 0 5 0.000000 1 0.135609853577E-03 +AS G22 2024 2 9 0 5 0.000000 1 0.343547416486E-04 +AS G23 2024 2 9 0 5 0.000000 1 0.152186608520E-03 +AS G24 2024 2 9 0 5 0.000000 1 -0.428549602429E-03 +AS G25 2024 2 9 0 5 0.000000 1 0.487999269858E-03 +AS G26 2024 2 9 0 5 0.000000 1 0.192203688436E-03 +AS G27 2024 2 9 0 5 0.000000 1 -0.233648765509E-05 +AS G28 2024 2 9 0 5 0.000000 1 -0.124765599498E-03 +AS G29 2024 2 9 0 5 0.000000 1 -0.607324695831E-03 +AS G30 2024 2 9 0 5 0.000000 1 -0.435035370794E-03 +AS G31 2024 2 9 0 5 0.000000 1 -0.227700204710E-03 +AS G32 2024 2 9 0 5 0.000000 1 -0.610552937750E-03 +AS G01 2024 2 9 0 10 0.000000 1 0.168815582004E-03 +AS G03 2024 2 9 0 10 0.000000 1 0.180947385934E-03 +AS G04 2024 2 9 0 10 0.000000 1 0.286979576744E-03 +AS G05 2024 2 9 0 10 0.000000 1 -0.161086828549E-03 +AS G06 2024 2 9 0 10 0.000000 1 0.414436313230E-03 +AS G07 2024 2 9 0 10 0.000000 1 -0.574011509731E-04 +AS G08 2024 2 9 0 10 0.000000 1 -0.166104290396E-03 +AS G09 2024 2 9 0 10 0.000000 1 0.858516918987E-04 +AS G11 2024 2 9 0 10 0.000000 1 -0.569314690206E-03 +AS G12 2024 2 9 0 10 0.000000 1 -0.476110201152E-03 +AS G13 2024 2 9 0 10 0.000000 1 0.623768834597E-03 +AS G14 2024 2 9 0 10 0.000000 1 0.320738789259E-03 +AS G15 2024 2 9 0 10 0.000000 1 0.125742126934E-03 +AS G16 2024 2 9 0 10 0.000000 1 -0.366778408789E-03 +AS G17 2024 2 9 0 10 0.000000 1 0.717579222049E-03 +AS G18 2024 2 9 0 10 0.000000 1 -0.551352602614E-03 +AS G19 2024 2 9 0 10 0.000000 1 0.438030932112E-03 +AS G20 2024 2 9 0 10 0.000000 1 0.388219028071E-03 +AS G21 2024 2 9 0 10 0.000000 1 0.135609288169E-03 +AS G22 2024 2 9 0 10 0.000000 1 0.343530622033E-04 +AS G23 2024 2 9 0 10 0.000000 1 0.152189002292E-03 +AS G24 2024 2 9 0 10 0.000000 1 -0.428551485682E-03 +AS G25 2024 2 9 0 10 0.000000 1 0.487999638262E-03 +AS G26 2024 2 9 0 10 0.000000 1 0.192202552466E-03 +AS G27 2024 2 9 0 10 0.000000 1 -0.233749230024E-05 +AS G28 2024 2 9 0 10 0.000000 1 -0.124769351610E-03 +AS G29 2024 2 9 0 10 0.000000 1 -0.607324581735E-03 +AS G30 2024 2 9 0 10 0.000000 1 -0.435033874242E-03 +AS G31 2024 2 9 0 10 0.000000 1 -0.227700021100E-03 +AS G32 2024 2 9 0 10 0.000000 1 -0.610553573006E-03 +""" diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py index 117c825..da47071 100644 --- a/tests/test_datasets/sp3_test_data.py +++ b/tests/test_datasets/sp3_test_data.py @@ -1,6 +1,6 @@ # Central record of SP3 test data sets to be shared across unit tests -# dataset is part of the IGS benchmark (modified to include non null data on clock) +# first dataset is part of the IGS benchmark (modified to include non null data on clock) sp3_test_data_igs_benchmark_null_clock = b"""#dV2007 4 12 0 0 0.00000000 2 ORBIT IGS14 BHN ESOC ## 1422 345600.00000000 900.00000000 54202 0.0000000000000 + 2 G01G02 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -38,9 +38,10 @@ VG01 26855.435366 -6704.236117 -3062.394499 999999.999999 PG02 11149.555664 21314.099837 11331.977499 123456.999999 VG02 -12578.915944 -7977.396362 26581.116225 999999.999999 -EOF""" +EOF +""" -# second dataset a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3 +# second dataset is a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3 sp3_test_data_truncated_cod_final = b"""#dP2024 7 19 0 0 0.00000000 2 d+D IGS20 FIT AIUB ## 2323 432000.00000000 300.00000000 60510 0.0000000000000 + 34 G01G02G03G04G05G06G07G08G09G10G11G12G13G14G15G16G17 diff --git a/tests/test_sp3.py b/tests/test_sp3.py index ae54df4..17b8244 100644 --- a/tests/test_sp3.py +++ b/tests/test_sp3.py @@ -8,9 +8,9 @@ import gnssanalysis.gn_io.sp3 as sp3 from test_datasets.sp3_test_data import ( - # dataset is part of the IGS benchmark (modified to include non null data on clock): + # first dataset is part of the IGS benchmark (modified to include non null data on clock): sp3_test_data_igs_benchmark_null_clock as input_data, - # second dataset a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3: + # second dataset is a truncated version of file COD0OPSFIN_20242010000_01D_05M_ORB.SP3: sp3_test_data_truncated_cod_final as input_data2, ) From eb89d6be0caf9269d69b1c929d4181d9784bf3a1 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Tue, 10 Sep 2024 07:01:09 +0000 Subject: [PATCH 2/4] NPI-3501 better empty input file checks: - path2bytes() now raises exceptions on all errors rather than returning None. It also raises an EOFError if the input data is empty. It has also been restructured slightly for clarity and type checking. - read_sp3() now accepts a Path or bytes input, and writes an empty string into the 'path' attribute if the input was passed as bytes. --- gnssanalysis/gn_io/common.py | 41 +++++++++++++++++++++++------------- gnssanalysis/gn_io/sp3.py | 23 ++++++++++++++------ 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/gnssanalysis/gn_io/common.py b/gnssanalysis/gn_io/common.py index abca0d2..f043fa2 100644 --- a/gnssanalysis/gn_io/common.py +++ b/gnssanalysis/gn_io/common.py @@ -15,32 +15,43 @@ MB = 1024 * 1024 -def path2bytes(path: _Union[_Path, str, bytes]) -> bytes: +def path2bytes(path_or_bytes: _Union[_Path, str, bytes]) -> bytes: """Main file reading function. Checks file extension and calls appropriate reading function. Passes through bytes if given, thus one may not routinely leave it in the top of the specific file reading function and be able to call it with bytes or str path without additional modifications. :param str path: input file path :return bytes: bytes object, decompressed if necessary + :raise FileNotFoundError: path didn't resolve to a file + :raise Exception: wrapped exception for all other exceptions raised + :raise EOFError: if input bytes is empty, input file is empty, or decompressed result of input file is empty. """ - if isinstance(path, bytes): # no reading is necessary - pass through. - return path + if isinstance(path_or_bytes, bytes): # no reading is necessary - pass through. + if len(path_or_bytes) == 0: + raise EOFError("Input bytes object was empty!") + return path_or_bytes + + if isinstance(path_or_bytes, _Path): + path_string = path_or_bytes.as_posix() + elif isinstance(path_or_bytes, str): + path_string = path_or_bytes + else: + raise TypeError("Must be Path, str, or bytes") - if isinstance(path, _Path): - path = path.as_posix() try: - if path.endswith(".Z"): - databytes = _lzw2bytes(path) - elif path.endswith(".gz"): - databytes = _gz2bytes(path) + if path_string.endswith(".Z"): + databytes = _lzw2bytes(path_string) + elif path_string.endswith(".gz"): + databytes = _gz2bytes(path_string) else: - databytes = _txt2bytes(path) - except FileNotFoundError: - _logging.error(f"File {path} not found. Returning empty bytes.") - return None + databytes = _txt2bytes(path_string) + except FileNotFoundError as fe: + raise fe except Exception as e: - _logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.") - return None + raise Exception(f"Error reading file '{path_string}'. Exception: {e}") + + if len(databytes) == 0: + raise EOFError(f"Input file (or decompressed result of it) was empty. Path: '{path_string}'") return databytes diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py index 82e4195..5ceaa37 100644 --- a/gnssanalysis/gn_io/sp3.py +++ b/gnssanalysis/gn_io/sp3.py @@ -2,7 +2,8 @@ import io as _io import os as _os import re as _re -from typing import Literal, Union, List, Tuple +from typing import Literal, Optional, Union, List, Tuple +from pathlib import Path import numpy as _np import pandas as _pd @@ -238,7 +239,16 @@ def _process_sp3_block( return temp_sp3 -def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _pd.DataFrame: +def description_for_path_or_bytes(path_or_bytes: Union[str, Path, bytes]) -> Optional[str]: + if isinstance(path_or_bytes, str) or isinstance(path_or_bytes, Path): + return str(path_or_bytes) + else: + return "Data passed as bytes: no path available" + + +def read_sp3( + sp3_path_or_bytes: Union[str, Path, bytes], pOnly: bool = True, nodata_to_nan: bool = True +) -> _pd.DataFrame: """Reads an SP3 file and returns the data as a pandas DataFrame. @@ -247,7 +257,8 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ :param bool nodata_to_nan: If True, converts 0.000000 (indicating nodata) to NaN in the SP3 POS column and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True. :return pandas.DataFrame: The SP3 data as a DataFrame. - :raise FileNotFoundError: If the SP3 file specified by sp3_path does not exist. + :raise FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist. + :raise Exception: For other errors reading SP3 file/bytes :note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data. This function reads the SP3 file, parses the header information, and extracts the data into a DataFrame. @@ -256,7 +267,7 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ (mm/ps) and remove unnecessary columns. If pOnly is True, only P* values are included in the DataFrame. If nodata_to_nan is True, nodata values in the SP3 POS and CLK columns are converted to NaN. """ - content = _gn_io.common.path2bytes(str(sp3_path)) + content = _gn_io.common.path2bytes(sp3_path_or_bytes) # Will raise EOFError if file empty # Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n) comments: list = _RE_SP3_COMMENT_STRIP.findall(content) @@ -306,13 +317,13 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _ logging.warning( f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). " f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} " - f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first." + f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first." ) # Now dedupe them, keeping the first of any clashes: sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Write header data to dataframe attributes: sp3_df.attrs["HEADER"] = parsed_header - sp3_df.attrs["path"] = sp3_path + sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else "" return sp3_df From 20daa6d2b8c3221665a7bb758ac4278638c4e928 Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Wed, 11 Sep 2024 06:11:08 +0000 Subject: [PATCH 3/4] NPI-3501 updated unit tests to more closely fit updated empty or invalid file exception handling --- tests/test_common.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/tests/test_common.py b/tests/test_common.py index 51d379c..fc0a237 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,9 +1,7 @@ import unittest -from unittest.mock import patch, mock_open, MagicMock -from pathlib import Path -import logging +from unittest.mock import patch +from pyfakefs.fake_filesystem_unittest import TestCase -# Assuming the function path2bytes is in a module named common from gnssanalysis.gn_io.common import path2bytes @@ -34,19 +32,25 @@ def test_bytes_input(self): result = path2bytes(b"test data") self.assertEqual(result, b"test data") - @patch("gnssanalysis.gn_io.common._logging.error") - def test_file_not_found(self, mock_logging_error): - with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=FileNotFoundError): - print("testing path") - result = path2bytes("nonexistent.txt") - self.assertIsNone(result) - mock_logging_error.assert_called_once_with("File nonexistent.txt not found. Returning empty bytes.") - - @patch("gnssanalysis.gn_io.common._logging.error") - def test_generic_exception(self, mock_logging_error): - with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=Exception("Generic error")): - result = path2bytes("test.txt") - self.assertIsNone(result) - mock_logging_error.assert_called_once_with( - "Error reading file test.txt with error Generic error. Returning empty bytes." - ) + +class TestPath2BytesWithFakeFs(TestCase): + def setUp(self): + self.setUpPyfakefs() + + def test_file_not_found_and_file_read(self): + # Create a file, but not the one we're looking for + self.fs.create_file("testfile.txt", contents=b"hello") + with self.assertRaises(FileNotFoundError): + path2bytes("nonexistent.txt") + + # Now open the file that does exist and check the contents + self.assertEqual(path2bytes("testfile.txt"), b"hello") + + def test_invalid_archive_expand_exception(self): + # Test that trying to unpack an archive file which isn't valid archive data, raises an exception + self.fs.create_file("invalidarchive.gz", contents=b"hello") + self.fs.create_file("invalidarchive.Z", contents=b"hello") + with self.assertRaises(Exception): + path2bytes("invalidarchive.gz") + with self.assertRaises(Exception): + path2bytes("invalidarchive.Z") From a913e014f70cbce5e4308ff9e836a738b9ef659f Mon Sep 17 00:00:00 2001 From: Nathan <95725385+treefern@users.noreply.github.com> Date: Wed, 11 Sep 2024 06:27:17 +0000 Subject: [PATCH 4/4] NPI-3501 updated unit tests for path2bytes, adding a test for empty file exceptions (which was the initial motivation for this whole branch). --- tests/test_common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_common.py b/tests/test_common.py index fc0a237..01ccd82 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -38,7 +38,7 @@ def setUp(self): self.setUpPyfakefs() def test_file_not_found_and_file_read(self): - # Create a file, but not the one we're looking for + # Create a mock file, but not the one we're looking for self.fs.create_file("testfile.txt", contents=b"hello") with self.assertRaises(FileNotFoundError): path2bytes("nonexistent.txt") @@ -46,6 +46,13 @@ def test_file_not_found_and_file_read(self): # Now open the file that does exist and check the contents self.assertEqual(path2bytes("testfile.txt"), b"hello") + def test_empty_file_exception(self): + # Create a mock empty file + self.fs.create_file("emptyfile.txt", contents=b"") + # We raise EOFError for empty files, and (valid) compressed files that expand to a zero-length output + with self.assertRaises(EOFError): + path2bytes("emptyfile.txt") + def test_invalid_archive_expand_exception(self): # Test that trying to unpack an archive file which isn't valid archive data, raises an exception self.fs.create_file("invalidarchive.gz", contents=b"hello")