diff --git a/gnssanalysis/gn_io/igslog.py b/gnssanalysis/gn_io/igslog.py index adea66a..86cf778 100644 --- a/gnssanalysis/gn_io/igslog.py +++ b/gnssanalysis/gn_io/igslog.py @@ -121,11 +121,11 @@ class LogVersionError(Exception): pass -def find_recent_logs(logs_glob_path: str, rnx_glob_path: str = None) -> _pd.DataFrame: +def find_recent_logs(logs_glob_path: str, rnx_glob_path: Union[str, None] = None) -> _pd.DataFrame: """Takes glob expression to create list of logs, parses names into site and date and selects most recent ones :param str logs_glob_path: A glob expression for log files, e.g. /data/station_logs_IGS/*/*.log - :param str rnx_glob_path: A glob expression for rnx files, e.g. /data/pea/exs/data/*.rnx, defaults to None + :param Union[str, None] rnx_glob_path: A glob expression for rnx files, e.g. /data/pea/exs/data/*.rnx, defaults to None :return _pd.DataFrame: Returns a dataframe containing information from all station logs processed """ paths = _pd.Series(_glob.glob(pathname=logs_glob_path, recursive=False), name="PATH") @@ -153,18 +153,23 @@ def determine_log_version(data: bytes) -> str: :return str: Return the version number: "v1.0" or "v2.0" (or "Unknown" if file does not conform to standard) """ - result_v1 = _REGEX_LOG_VERSION_1.search(data) + # Remove leading newline if present, to be safe, then truncate to first line + first_line_bytes = data.lstrip(b"\n").split(b"\n")[0] + + result_v1 = _REGEX_LOG_VERSION_1.search(first_line_bytes) if result_v1: return "v1.0" - result_v2 = _REGEX_LOG_VERSION_2.search(data) + result_v2 = _REGEX_LOG_VERSION_2.search(first_line_bytes) if result_v2: return "v2.0" - raise LogVersionError("Log file does not conform to any known IGS version") + raise LogVersionError(f"File does not conform to any known IGS Site Log version. First line is: {first_line_bytes}") -def extract_id_block(data: bytes, file_path: str, file_code: str, version: str = None) -> Union[List[str], _np.array]: +def extract_id_block( + data: bytes, file_path: str, file_code: str, version: Union[str, None] = None +) -> Union[List[str], _np.ndarray]: """Extract the site ID block given the bytes object read from an IGS site log file :param bytes data: The bytes object returned from an open() call on a IGS site log in "rb" mode @@ -182,7 +187,7 @@ def extract_id_block(data: bytes, file_path: str, file_code: str, version: str = elif version == "v2.0": _REGEX_ID = _REGEX_ID_V2 else: - raise LogVersionError("Incorrect version string passed to the extract_id_block() function") + raise LogVersionError(f"Incorrect version string '{version}' passed to the extract_id_block() function") id_block = _REGEX_ID.search(data) if id_block is None: @@ -197,14 +202,15 @@ def extract_id_block(data: bytes, file_path: str, file_code: str, version: str = return id_block -def extract_location_block(data: bytes, file_path: str, version: str = None) -> Union[_re.Match[bytes], _np.array]: +def extract_location_block(data: bytes, file_path: str, version: Union[str, None] = None) -> _np.ndarray: """Extract the location block given the bytes object read from an IGS site log file :param bytes data: The bytes object returned from an open() call on a IGS site log in "rb" mode :param str file_path: The path to the file from which the "data" bytes object was obtained - :param str version: Version number of log file (e.g. "v2.0") - determined if version=None, defaults to None + :param str version: Version number of log file (e.g. "v2.0") - will be determined from input data unless + provided here. :raises LogVersionError: Raises an error if an unknown version string is passed in - :return bytes: The location block of the IGS site log + :return _np.ndarray: The location block of the IGS site log, as a numpy NDArray of strings """ if version == None: version = determine_log_version(data) @@ -214,7 +220,7 @@ def extract_location_block(data: bytes, file_path: str, version: str = None) -> elif version == "v2.0": _REGEX_LOC = _REGEX_LOC_V2 else: - raise LogVersionError("Incorrect version string passed to extract_location_block() function") + raise LogVersionError(f"Incorrect version string '{version}' passed to extract_location_block() function") location_block = _REGEX_LOC.search(data) if location_block is None: @@ -223,12 +229,13 @@ def extract_location_block(data: bytes, file_path: str, version: str = None) -> return location_block -def extract_receiver_block(data: bytes, file_path: str) -> Union[List[Tuple[bytes]], _np.array]: +def extract_receiver_block(data: bytes, file_path: str) -> Union[List[Tuple[bytes]], _np.ndarray]: """Extract the location block given the bytes object read from an IGS site log file :param bytes data: The bytes object returned from an open() call on a IGS site log in "rb" mode :param str file_path: The path to the file from which the "data" bytes object was obtained - :return List[Tuple[bytes]]: The receiver block of the data. Each list element specifies an receiver + :return List[Tuple[bytes]] or _np.ndarray: The receiver block of the data. Each list element specifies an receiver. + If regex doesn't match, an empty numpy NDArray is returned instead. """ receiver_block = _REGEX_REC.findall(data) if receiver_block == []: @@ -237,12 +244,13 @@ def extract_receiver_block(data: bytes, file_path: str) -> Union[List[Tuple[byte return receiver_block -def extract_antenna_block(data: bytes, file_path: str) -> Union[List[Tuple[bytes]], _np.array]: +def extract_antenna_block(data: bytes, file_path: str) -> Union[List[Tuple[bytes]], _np.ndarray]: """Extract the antenna block given the bytes object read from an IGS site log file :param bytes data: The bytes object returned from an open() call on a IGS site log in "rb" mode :param str file_path: The path to the file from which the "data" bytes object was obtained - :return List[Tuple[bytes]]: The antenna block of the data. Each list element specifies an antenna + :return List[Tuple[bytes]] or _np.ndarray: The antenna block of the data. Each list element specifies an antenna. + If regex doesn't match, an empty numpy NDArray is returned instead. """ antenna_block = _REGEX_ANT.findall(data) if antenna_block == []: diff --git a/tests/test_datasets/sitelog_test_data.py b/tests/test_datasets/sitelog_test_data.py index 2dbef93..731baac 100644 --- a/tests/test_datasets/sitelog_test_data.py +++ b/tests/test_datasets/sitelog_test_data.py @@ -3,8 +3,7 @@ # Dataset below is a truncated version of file abmf_20240710.log abmf_site_log_v1 = bytes( - """ - ABMF Site Information Form (site log) + """ ABMF Site Information Form (site log) International GNSS Service See Instructions at: https://files.igs.org/pub/station/general/sitelog_instr.txt @@ -168,8 +167,7 @@ # Dataset below is a truncated version of file abmf00glp_20240710.log abmf_site_log_v2 = bytes( - """ - ABMF00GLP Site Information Form (site log v2.0) + """ ABMF00GLP Site Information Form (site log v2.0) International GNSS Service See Instructions at: https://files.igs.org/pub/station/general/sitelog_instr_v2.0.txt @@ -333,8 +331,7 @@ # Dataset below is a truncated version of file aggo00arg_20230608.log aggo_site_log_v2 = bytes( - """ - AGGO00ARG Site Information Form (site log v2.0) + """ AGGO00ARG Site Information Form (site log v2.0) International GNSS Service See Instructions at: https://files.igs.org/pub/station/general/sitelog_instr_v2.0.txt