Skip to content

Commit

Permalink
NPI-3501 better empty input file checks:
Browse files Browse the repository at this point in the history
- path2bytes() now raises exceptions on all errors rather than returning None. It also raises an EOFError if the input data is empty. It has also been restructured slightly for clarity and type checking.
- read_sp3() now accepts a Path or bytes input, and writes an empty string into the 'path' attribute if the input was passed as bytes.
  • Loading branch information
treefern committed Sep 10, 2024
1 parent 4870e14 commit eb89d6b
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 21 deletions.
41 changes: 26 additions & 15 deletions gnssanalysis/gn_io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,43 @@
MB = 1024 * 1024


def path2bytes(path: _Union[_Path, str, bytes]) -> bytes:
def path2bytes(path_or_bytes: _Union[_Path, str, bytes]) -> bytes:
"""Main file reading function. Checks file extension and calls appropriate reading function.
Passes through bytes if given, thus one may not routinely leave it in the top of the specific
file reading function and be able to call it with bytes or str path without additional modifications.
:param str path: input file path
:return bytes: bytes object, decompressed if necessary
:raise FileNotFoundError: path didn't resolve to a file
:raise Exception: wrapped exception for all other exceptions raised
:raise EOFError: if input bytes is empty, input file is empty, or decompressed result of input file is empty.
"""
if isinstance(path, bytes): # no reading is necessary - pass through.
return path
if isinstance(path_or_bytes, bytes): # no reading is necessary - pass through.
if len(path_or_bytes) == 0:
raise EOFError("Input bytes object was empty!")
return path_or_bytes

if isinstance(path_or_bytes, _Path):
path_string = path_or_bytes.as_posix()
elif isinstance(path_or_bytes, str):
path_string = path_or_bytes
else:
raise TypeError("Must be Path, str, or bytes")

if isinstance(path, _Path):
path = path.as_posix()
try:
if path.endswith(".Z"):
databytes = _lzw2bytes(path)
elif path.endswith(".gz"):
databytes = _gz2bytes(path)
if path_string.endswith(".Z"):
databytes = _lzw2bytes(path_string)
elif path_string.endswith(".gz"):
databytes = _gz2bytes(path_string)
else:
databytes = _txt2bytes(path)
except FileNotFoundError:
_logging.error(f"File {path} not found. Returning empty bytes.")
return None
databytes = _txt2bytes(path_string)
except FileNotFoundError as fe:
raise fe
except Exception as e:
_logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.")
return None
raise Exception(f"Error reading file '{path_string}'. Exception: {e}")

if len(databytes) == 0:
raise EOFError(f"Input file (or decompressed result of it) was empty. Path: '{path_string}'")
return databytes


Expand Down
23 changes: 17 additions & 6 deletions gnssanalysis/gn_io/sp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import io as _io
import os as _os
import re as _re
from typing import Literal, Union, List, Tuple
from typing import Literal, Optional, Union, List, Tuple
from pathlib import Path

import numpy as _np
import pandas as _pd
Expand Down Expand Up @@ -238,7 +239,16 @@ def _process_sp3_block(
return temp_sp3


def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _pd.DataFrame:
def description_for_path_or_bytes(path_or_bytes: Union[str, Path, bytes]) -> Optional[str]:
if isinstance(path_or_bytes, str) or isinstance(path_or_bytes, Path):
return str(path_or_bytes)
else:
return "Data passed as bytes: no path available"


def read_sp3(
sp3_path_or_bytes: Union[str, Path, bytes], pOnly: bool = True, nodata_to_nan: bool = True
) -> _pd.DataFrame:
"""Reads an SP3 file and returns the data as a pandas DataFrame.
Expand All @@ -247,7 +257,8 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
:param bool nodata_to_nan: If True, converts 0.000000 (indicating nodata) to NaN in the SP3 POS column
and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True.
:return pandas.DataFrame: The SP3 data as a DataFrame.
:raise FileNotFoundError: If the SP3 file specified by sp3_path does not exist.
:raise FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist.
:raise Exception: For other errors reading SP3 file/bytes
:note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data.
This function reads the SP3 file, parses the header information, and extracts the data into a DataFrame.
Expand All @@ -256,7 +267,7 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
(mm/ps) and remove unnecessary columns. If pOnly is True, only P* values are included in the DataFrame.
If nodata_to_nan is True, nodata values in the SP3 POS and CLK columns are converted to NaN.
"""
content = _gn_io.common.path2bytes(str(sp3_path))
content = _gn_io.common.path2bytes(sp3_path_or_bytes) # Will raise EOFError if file empty

# Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
comments: list = _RE_SP3_COMMENT_STRIP.findall(content)
Expand Down Expand Up @@ -306,13 +317,13 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
logging.warning(
f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} "
f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first."
)
# Now dedupe them, keeping the first of any clashes:
sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]
# Write header data to dataframe attributes:
sp3_df.attrs["HEADER"] = parsed_header
sp3_df.attrs["path"] = sp3_path
sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else ""
return sp3_df


Expand Down

0 comments on commit eb89d6b

Please sign in to comment.