Skip to content

Commit

Permalink
feat: update to noisepy-io v0.2.0 (#327)
Browse files Browse the repository at this point in the history
* enforce single sampling rate for one station

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update tutorial

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update tutorial

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update io ver

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update tests for single_freq

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update get_started

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

* update monitoring

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>

---------

Signed-off-by: Yiyu Ni <niyiyu@uw.edu>
  • Loading branch information
niyiyu authored Nov 6, 2024
1 parent 2b7627d commit b8297fc
Show file tree
Hide file tree
Showing 13 changed files with 197 additions and 166 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ tutorials/ncedc_data/
tutorials/cli/tmpdata
tutorials/pnw_data
tutorials/composite_data
tutorials/monitoring/CCF_ASDF/
tutorials/monitoring/Monitoring_output.csv
tutorials/monitoring/monito_config.yml

# Jupyterbook
_build/
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Taxonomy of the NoisePy variables.
* ``step`` is the window that get skipped when sliding windows in seconds
* ``smooth_N`` number of points for smoothing the time or frequency domain discrete arrays.
* ``maxlag`` maximum length in seconds saved in files in each side of the correlation (save on storage)
* ``substack,substack_len`` boolean, window length over which to substack the correlation (to save storage or do monitoring), it has to be a multiple of ``cc_len``.
* ``substack, substack_windows`` boolean, number of window over which to substack the correlation (to save storage or do monitoring).
* ``time_chunk, nchunk`` refers to the time unit that defined a single job. for instace, ``cc_len`` is the correlation length (e.g., 1 hour, 30 min), the overall duration of the experiment is the total length (1 month, 1 year, ...). The time chunk could be 1 day: the code would loop through each cc_len window in a for loop. But each day will be sent as a thread.

## Acknowledgements
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dependencies = [
"PyYAML==6.0",
"pydantic-yaml==1.0",
"psutil>=5.9.5,<6.0.0",
"noisepy-seis-io>=0.1.16",
"noisepy-seis-io>=0.2.0",
"scipy==1.12.0"
]

Expand Down
25 changes: 20 additions & 5 deletions src/noisepy/seis/correlate.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,20 +502,35 @@ def _filter_channel_data(
logging.warning(f"No data available with sampling rate >= {sampling_rate}")
return []
if single_freq:
closest_freq = min(
frequencies,
key=lambda f: max(f - sampling_rate, 0),
)
logger.info(f"Picked {closest_freq} as the closest sampling rate to {sampling_rate}. ")
closest_freq = _get_closest_freq(frequencies, sampling_rate)
logger.info(f"Picked {closest_freq} as the closest sampling_rate to {sampling_rate}. ")
filtered_tuples = list(filter(lambda tup: tup[1].sampling_rate == closest_freq, tuples))
logger.info(f"Filtered to {len(filtered_tuples)}/{len(tuples)} channels with sampling rate == {closest_freq}")
else:
filtered_tuples = list(filter(lambda tup: tup[1].sampling_rate >= sampling_rate, tuples))
# for each station, pick the closest >= to sampling_rate
tmp = list(
map(
lambda s: [t for t in filtered_tuples if t[0].station == s],
set([t[0].station for t in filtered_tuples]),
)
)
filtered_tuples = sum(list(map(lambda t: _filt_single_station(t, sampling_rate), tmp)), [])
logger.info(f"Filtered to {len(filtered_tuples)}/{len(tuples)} channels with sampling rate >= {sampling_rate}")

return filtered_tuples


def _get_closest_freq(frequencies, sampling_rate: int):
return min(frequencies, key=lambda f: max(f - sampling_rate, 0))


def _filt_single_station(tuples: List[Tuple[Channel, ChannelData]], sampling_rate: int):
frequencies = set(t[1].sampling_rate for t in tuples)
closest_freq = _get_closest_freq(frequencies, sampling_rate)
return [t for t in tuples if t[1].sampling_rate == closest_freq]


def check_memory(params: ConfigParameters, nsta: int) -> int:
# maximum memory allowed
# TODO: Is this needed? Should it be configurable?
Expand Down
33 changes: 22 additions & 11 deletions tests/test_cross_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,32 @@
def test_read_channels():
CLOSEST_FREQ = 60
sampling_rate = 40
freqs = [10, 39, CLOSEST_FREQ, 100]
ch_data = []
for f in freqs:
ch_data1 = []
N = 5
for f in [10, 39, CLOSEST_FREQ, 100]:
cd = ChannelData.empty()
cd.sampling_rate = f
ch_data.append(cd)
N = 5
tuples = [(Channel("foo", Station("CI", "bar")), cd) for cd in ch_data] * N
ch_data1.append(cd)

cd = ChannelData.empty()
cd.sampling_rate = 100
ch_data2 = [cd]

tuples = [(Channel("foo", Station("CI", "FOO")), cd) for cd in ch_data1] * N
tuples += [(Channel("bar", Station("CI", "BAR")), cd) for cd in ch_data2] * N

# we should pick the closest frequency that is >= to the target freq, 60 in this case
# we should pick the closest frequency that is >= to the target sampling_rate
# 60 Hz in this case, for both stations
# CI.FOO returns 60 Hz
# CI.BAR returns nothing
filtered = _filter_channel_data(tuples, sampling_rate, single_freq=True)
assert N == len(filtered)
assert [t[1].sampling_rate for t in filtered] == [CLOSEST_FREQ] * N

# we should get all data at >= 40 Hz (60 and 100)
# we should pick the closest frequency that is >= to the target sampling_rate
# but might be different for each station
# CI.FOO returns 60 Hz
# CI.BAR returns 100 Hz
filtered = _filter_channel_data(tuples, sampling_rate, single_freq=False)
assert N * 2 == len(filtered)
assert all([t[1].sampling_rate >= sampling_rate for t in filtered])
Expand Down Expand Up @@ -79,11 +90,11 @@ def get_inventory(self, timespan: DateTimeRange, station: Station) -> obspy.Inve
@pytest.mark.parametrize("rm_resp", [RmResp.NO, RmResp.INV]) # add tests for SPECTRUM, RESP and POLES_ZEROS
@pytest.mark.parametrize("cc_method", [CCMethod.XCORR, CCMethod.COHERENCY, CCMethod.DECONV])
@pytest.mark.parametrize("substack", [True, False])
@pytest.mark.parametrize("substack_len", [1, 2])
@pytest.mark.parametrize("substack_windows", [1, 2])
@pytest.mark.parametrize("inc_hours", [0, 24])
@pytest.mark.parametrize("dpath", ["./data/cc", "./data/acc"])
def test_cross_correlation(
rm_resp: RmResp, cc_method: CCMethod, substack: bool, substack_len: int, inc_hours: int, dpath: str
rm_resp: RmResp, cc_method: CCMethod, substack: bool, substack_windows: int, inc_hours: int, dpath: str
):
config = ConfigParameters()
config.sampling_rate = 1.0
Expand All @@ -92,7 +103,7 @@ def test_cross_correlation(
config.inc_hours = inc_hours
if substack:
config.substack = substack
config.substack_len = substack_len * config.cc_len
config.substack_windows = substack_windows
path = os.path.join(os.path.dirname(__file__), dpath)

raw_store = SCEDCS3DataStore(path, MockCatalogMock())
Expand Down
2 changes: 1 addition & 1 deletion tutorials/cloud/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ stationxml: false
step: 450.0
storage_options: {}
substack: false
substack_len: 1800
substack_windows: 1
time_norm: no
xcorr_only: true
2 changes: 1 addition & 1 deletion tutorials/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cc_method: xcorr
smooth_N: 10
smoothspect_N: 10
substack: true
substack_len: 3600
substack_windows: 1
maxlag: 200
inc_hours: 12
max_over_std: 10
Expand Down
34 changes: 17 additions & 17 deletions tutorials/get_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"from noisepy.seis.io import plotting_modules\n",
"from noisepy.seis.io.asdfstore import ASDFRawDataStore, ASDFCCStore, ASDFStackStore\n",
"from noisepy.seis.io.datatypes import CCMethod, ConfigParameters, FreqNorm, RmResp, TimeNorm\n",
"from dateutil.parser import isoparse\n",
"from datetime import datetime, timezone\n",
"from datetimerange import DateTimeRange\n",
"import os\n",
"import shutil\n",
Expand Down Expand Up @@ -95,8 +95,8 @@
"source": [
"config = ConfigParameters() # default config parameters which can be customized\n",
"config.inc_hours = 12\n",
"config.sampling_rate= 20 # (int) Sampling rate in Hz of desired processing (it can be different than the data sampling rate)\n",
"config.cc_len= 3600 # (float) basic unit of data length for fft (sec)\n",
"config.sampling_rate = 20 # (int) Sampling rate in Hz of desired processing (it can be different than the data sampling rate)\n",
"config.cc_len = 3600 # (float) basic unit of data length for fft (sec)\n",
" # criteria for data selection\n",
"config.ncomp = 3 # 1 or 3 component data (needed to decide whether do rotation)\n",
"\n",
Expand All @@ -111,32 +111,32 @@
"config.lomax = -115 # max longitude\n",
"\n",
"# pre-processing parameters\n",
"config.step= 1800.0 # (float) overlapping between each cc_len (sec)\n",
"config.stationxml= False # station.XML file used to remove instrument response for SAC/miniseed data\n",
"config.rm_resp= RmResp.INV # select 'no' to not remove response and use 'inv' if you use the stationXML,'spectrum',\n",
"config.step = 1800.0 # (float) overlapping between each cc_len (sec)\n",
"config.stationxml = False # station.XML file used to remove instrument response for SAC/miniseed data\n",
"config.rm_resp = RmResp.INV # select 'no' to not remove response and use 'inv' if you use the stationXML,'spectrum',\n",
"config.freqmin = 0.05\n",
"config.freqmax = 2.0\n",
"config.max_over_std = 10 # threshold to remove window of bad signals: set it to 10*9 if prefer not to remove them\n",
"\n",
"# TEMPORAL and SPECTRAL NORMALISATION\n",
"config.freq_norm= FreqNorm.RMA # choose between \"rma\" for a soft whitenning or \"no\" for no whitening. Pure whitening is not implemented correctly at this point.\n",
"config.freq_norm = FreqNorm.RMA # choose between \"rma\" for a soft whitenning or \"no\" for no whitening. Pure whitening is not implemented correctly at this point.\n",
"config.smoothspect_N = 10 # moving window length to smooth spectrum amplitude (points)\n",
" # here, choose smoothspect_N for the case of a strict whitening (e.g., phase_only)\n",
"\n",
"config.time_norm = TimeNorm.NO # 'no' for no normalization, or 'rma', 'one_bit' for normalization in time domain,\n",
" # TODO: change time_norm option from \"no\" to \"None\"\n",
"config.smooth_N= 10 # moving window length for time domain normalization if selected (points)\n",
"config.smooth_N = 10 # moving window length for time domain normalization if selected (points)\n",
"\n",
"config.cc_method= CCMethod.XCORR # 'xcorr' for pure cross correlation OR 'deconv' for deconvolution;\n",
"config.cc_method = CCMethod.XCORR # 'xcorr' for pure cross correlation OR 'deconv' for deconvolution;\n",
" # FOR \"COHERENCY\" PLEASE set freq_norm to \"rma\", time_norm to \"no\" and cc_method to \"xcorr\"\n",
"\n",
"# OUTPUTS:\n",
"config.substack = True # True = smaller stacks within the time chunk. False: it will stack over inc_hours\n",
"config.substack_len = config.cc_len # how long to stack over (for monitoring purpose): need to be multiples of cc_len\n",
" # if substack=True, substack_len=2*cc_len, then you pre-stack every 2 correlation windows.\n",
" # for instance: substack=True, substack_len=cc_len means that you keep ALL of the correlations\n",
"config.substack_windows = 1 # how long to stack over (for monitoring purpose)\n",
" # if substack=True, substack_windows=2, then you pre-stack every 2 correlation windows.\n",
" # for instance: substack=True, substack_windows=1 means that you keep ALL of the correlations\n",
"\n",
"config.maxlag= 200 # lags of cross-correlation to save (sec)\n",
"config.maxlag = 200 # lags of cross-correlation to save (sec)\n",
"config.substack = True"
]
},
Expand Down Expand Up @@ -173,9 +173,9 @@
"source": [
"config.networks = [\"*\"]\n",
"config.stations = [\"A*\"]\n",
"config.channels = [\"BHE\",\"BHN\",\"BHZ\"]\n",
"config.start_date = isoparse(\"2019-02-01T00:00:00Z\")\n",
"config.end_date = isoparse(\"2019-02-02T00:00:00Z\")\n",
"config.channels = [\"BH?\"]\n",
"config.start_date = datetime(2019, 2, 1, tzinfo=timezone.utc)\n",
"config.end_date = datetime(2019, 2, 2, tzinfo=timezone.utc)\n",
"timerange = DateTimeRange(config.start_date, config.end_date)\n",
"\n",
"# Download data locally. Enters raw data path, channel types, stations, config, and fdsn server.\n",
Expand Down Expand Up @@ -218,7 +218,7 @@
"file = os.path.join(raw_data_path, \"2019_02_01_00_00_00T2019_02_01_12_00_00.h5\")\n",
"raw_store = ASDFRawDataStore(raw_data_path) # Store for reading raw data\n",
"timespans = raw_store.get_timespans()\n",
"plotting_modules.plot_waveform(raw_store, timespans[0], 'CI','ADO',0.01,0.4) # this function takes for input: filename, network, station, freqmin, freqmax for a bandpass filter"
"plotting_modules.plot_waveform(raw_store, timespans[0], 'CI','ADO', 0.01, 0.4) # this function takes for input: filename, network, station, freqmin, freqmax for a bandpass filter"
]
},
{
Expand Down
Loading

0 comments on commit b8297fc

Please sign in to comment.