Skip to content

Commit

Permalink
Merge pull request #71 from GeoscienceAustralia/NPI-3685-streamline-s…
Browse files Browse the repository at this point in the history
…p3-incorrect-timerange-unit-test

NPI-3685 Streamline SP3 incorrect timerange unit test
  • Loading branch information
treefern authored Jan 31, 2025
2 parents 01bbebe + eba0da8 commit ecdde74
Show file tree
Hide file tree
Showing 7 changed files with 362 additions and 8,681 deletions.
5 changes: 5 additions & 0 deletions gnssanalysis/gn_io/sp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ def _process_sp3_block(
names: List[str] = _SP3_DEF_PV_NAME,
) -> _pd.DataFrame:
"""Process a single block of SP3 data.
NOTE: this process creates a temporary DataFrame for *every epoch* of SP3 data read in, complete with indexes, etc.
This process is expensive! Epoch count has far more impact on SP3 loading speed, than number of sats.
TODO It may be possible to speed up SP3 reading by changing this logic to parse the data but not build a full
DataFrame from it, only converting to a DataFrame in the parent function, once all the data is concatenated.
:param str date: The date of the SP3 data block.
Expand All @@ -378,6 +382,7 @@ def _process_sp3_block(
if not data or len(data) == 0:
return _pd.DataFrame()
epochs_dt = _pd.to_datetime(_pd.Series(date).str.slice(2, 21).values.astype(str), format=r"%Y %m %d %H %M %S")
# NOTE: setting dtype_backend="pyarrow" currently breaks parsing.
temp_sp3 = _pd.read_fwf(_io.StringIO(data), widths=widths, names=names)
# TODO set datatypes per column in advance
# TODO maybe change this after updating everyting else to use actual NaNs ?
Expand Down
49 changes: 49 additions & 0 deletions gnssanalysis/gn_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os as _os
import sys as _sys
import pathlib as _pathlib
from time import perf_counter

import click as _click

Expand Down Expand Up @@ -871,3 +872,51 @@ def clkq(
out_file.writelines(output_str)
else:
print(output_str)


class ContextTimer:
"""
Utility for measuring function execution time (e.g. for manually profiling which unit tests are taking
excessive time).
Call this as a context manager, e.g. (following are default values, apart from name)
with ContextTimer(print_time=True, name="func name", flag_if_over_sec=1.0, skip_if_under_sec=0.01) as timer:
some_function_to_time()
Based on https://stackoverflow.com/a/69156219
"""

def __init__(self, **kwargs):
if kwargs is not None:
if "print_time" in kwargs:
self.print_time = bool(kwargs["print_time"])
else:
self.print_time = True

if "name" in kwargs:
self.name = str(kwargs["name"])
else:
self.name = None

if "flag_if_over_sec" in kwargs:
self.flag_if_over_sec = float(kwargs["flag_if_over_sec"])
else:
self.flag_if_over_sec = 1.0

if "skip_if_under_sec" in kwargs:
self.skip_if_under_sec = float(kwargs["skip_if_under_sec"])
else:
self.skip_if_under_sec = 0.01

def __enter__(self):
self.start = perf_counter()
return self

def __exit__(self, type, value, traceback):
self.time = perf_counter() - self.start
if self.skip_if_under_sec and self.time < self.skip_if_under_sec: # Do skip?
return
do_flag = self.flag_if_over_sec and self.time > self.flag_if_over_sec
self.readout = (
f"{'SLOW!! ' if do_flag else ''}{self.time:.3f} sec elapsed{f' for {self.name}' if self.name else ''}"
)
if self.print_time:
print(self.readout)
6 changes: 3 additions & 3 deletions tests/test_clk.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from pyfakefs.fake_filesystem_unittest import TestCase

import numpy as np
import pandas as pd

import gnssanalysis.gn_io.clk as clk
import gnssanalysis.gn_diffaux as gn_diffaux

Expand All @@ -17,8 +14,10 @@
class TestClk(TestCase):
def setUp(self):
self.setUpPyfakefs()
self.fs.reset()

def test_clk_read(self):
self.fs.reset()
file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"]
self.fs.create_file(file_paths[0], contents=input_data_igs)
self.fs.create_file(file_paths[1], contents=input_data_gfz)
Expand All @@ -36,6 +35,7 @@ def test_clk_read(self):
self.assertEqual(clk_df_gfz["EST"].iloc[-1], -0.000610553573006, msg="Check last datapoint is correct")

def test_compare_clk(self):
self.fs.reset() # Reset pyfakefs to delete any files which may have persisted from a previous test
file_paths = ["/fake/dir/file0.clk", "/fake/dir/file1.clk"]
self.fs.create_file(file_paths[0], contents=input_data_igs)
self.fs.create_file(file_paths[1], contents=input_data_gfz)
Expand Down
Loading

0 comments on commit ecdde74

Please sign in to comment.