Skip to content

Commit

Permalink
Merge pull request #139 from lifewatch/feature/save_daily_files
Browse files Browse the repository at this point in the history
Feature/save daily files
  • Loading branch information
cparcerisas authored Sep 13, 2024
2 parents ae71739 + 4802f20 commit f542d28
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pypam/acoustic_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ def _get_metadata_attrs(self):
d = d.__dict__[sub_k]
if isinstance(d, pathlib.Path):
d = str(d)
if isinstance(d, bool):
d = int(d)
if d is None:
d = 0
metadata_attrs[k.replace('.', '_')] = d
Expand Down
36 changes: 34 additions & 2 deletions pypam/acoustic_survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,13 @@ def _get_metadata_attrs(self):
d = d.__dict__[sub_k]
if isinstance(d, pathlib.Path):
d = str(d)
if isinstance(d, bool):
d = int(d)
metadata_attrs[k.replace('.', '_')] = d

return metadata_attrs

def evolution_multiple(self, method_list: list, band_list=None, **kwargs):
def evolution_multiple(self, method_list: list, band_list=None, save_daily=False, output_folder=None, **kwargs):
"""
Compute the method in each file and output the evolution
Returns a xarray DataSet with datetime as index and one row for each bin of each file
Expand All @@ -189,14 +191,28 @@ def evolution_multiple(self, method_list: list, band_list=None, **kwargs):
Bands to filter. Can be multiple bands (all of them will be analyzed) or only one band. A band is
represented with a tuple as (low_freq, high_freq). If set to None, the broadband up to the Nyquist
frequency will be analyzed
save_daily : boolean
Set to True to save daily netcdf files instead of returning a huge big file (useful for long deployments)
output_folder : str or Path
Directory to save the netcdf files. Only works with save_daily
**kwargs :
Any accepted parameter for the method_name
"""
if save_daily and output_folder is None:
raise ValueError('output_folder must not be none to save daily netcdf files')
if isinstance(output_folder, str):
output_folder = pathlib.Path(output_folder)
ds = xarray.Dataset(attrs=self._get_metadata_attrs())
f = operator.methodcaller('_apply_multiple', method_list=method_list, binsize=self.binsize,
nfft=self.nfft, fft_overlap=self.fft_overlap, bin_overlap=self.bin_overlap,
band_list=band_list, **kwargs)
start_date, end_date = self.start_end_timestamp()
current_date = start_date.date()
for sound_file in self._files():
if save_daily and (sound_file.date.date() > current_date):
ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date))
ds = xarray.Dataset(attrs=self._get_metadata_attrs())
current_date = sound_file.date.date()
ds_output = f(sound_file)
ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs)
self.current_chunk_id += ds.id.max()
Expand All @@ -218,24 +234,40 @@ def evolution(self, method_name, band_list=None, **kwargs):
"""
return self.evolution_multiple(method_list=[method_name], band_list=band_list, **kwargs)

def evolution_freq_dom(self, method_name, **kwargs):
def evolution_freq_dom(self, method_name, save_daily=False, output_folder=None, **kwargs):
"""
Returns the evolution of frequency domain parameters
Parameters
----------
method_name : str
Name of the method of the acoustic_file class to compute
save_daily : boolean
Set to True to save daily netcdf files instead of returning a huge big file (useful for long deployments)
output_folder : str or Path
Directory to save the netcdf files. Only works with save_daily
Returns
-------
A xarray DataSet with a row per bin with the method name output
"""
if save_daily and output_folder is None:
raise ValueError('output_folder must not be none to save daily netcdf files')
if isinstance(output_folder, str):
output_folder = pathlib.Path(output_folder)
ds = xarray.Dataset(attrs=self._get_metadata_attrs())
f = operator.methodcaller(method_name, binsize=self.binsize, nfft=self.nfft, fft_overlap=self.fft_overlap,
bin_overlap=self.bin_overlap, **kwargs)
start_date, end_date = self.start_end_timestamp()
current_date = start_date.date()
for sound_file in self._files():
if save_daily and (sound_file.date.date() > current_date):
ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date))
ds = xarray.Dataset(attrs=self._get_metadata_attrs())
current_date = sound_file.date.date()
ds_output = f(sound_file)
ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs)
self.current_chunk_id += ds.id.max()
if save_daily:
ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date))
return ds

def timestamps_array(self):
Expand Down
3 changes: 2 additions & 1 deletion pypam/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def generate_deployment(self, idx):
ds = xarray.Dataset()
if self.frequency_features not in [[], None]:
for f in self.frequency_features:
freq_evo = asa.evolution_freq_dom(f, band=None, db=True)
freq_evo = asa.evolution_freq_dom(f, band=None, db=True, save_daily=True,
output_folder=self.output_folder.joinpath('deployments'))
for data_var in freq_evo.data_vars:
ds = ds.merge(freq_evo[data_var])
if self.temporal_features not in [[], None]:
Expand Down

0 comments on commit f542d28

Please sign in to comment.