From 52e51ce1e83fe56804dfeac3458440f3b03ec002 Mon Sep 17 00:00:00 2001 From: Clea Parcerisas Date: Wed, 4 Sep 2024 11:02:50 +0200 Subject: [PATCH 1/2] first approach to save daily files in evolution_frequency and apply_multiple --- pypam/acoustic_survey.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/pypam/acoustic_survey.py b/pypam/acoustic_survey.py index 5bc4a01..243f1af 100644 --- a/pypam/acoustic_survey.py +++ b/pypam/acoustic_survey.py @@ -176,7 +176,7 @@ def _get_metadata_attrs(self): return metadata_attrs - def evolution_multiple(self, method_list: list, band_list=None, **kwargs): + def evolution_multiple(self, method_list: list, band_list=None, save_daily=True, output_folder=None, **kwargs): """ Compute the method in each file and output the evolution Returns a xarray DataSet with datetime as index and one row for each bin of each file @@ -189,14 +189,29 @@ def evolution_multiple(self, method_list: list, band_list=None, **kwargs): Bands to filter. Can be multiple bands (all of them will be analyzed) or only one band. A band is represented with a tuple as (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed + save_daily : boolean + Set to True to save/yield daily netcdf files instead of a huge big file (useful for long deployments) + output_folder : str or Path + Directory to save the netcdf files **kwargs : Any accepted parameter for the method_name """ + if output_folder is not None: + if not isinstance(output_folder, pathlib.Path): + output_folder = pathlib.Path(output_folder) ds = xarray.Dataset(attrs=self._get_metadata_attrs()) f = operator.methodcaller('_apply_multiple', method_list=method_list, binsize=self.binsize, nfft=self.nfft, fft_overlap=self.fft_overlap, bin_overlap=self.bin_overlap, band_list=band_list, **kwargs) + start_date, end_date = self.start_end_timestamp() + current_date = start_date.date for sound_file in self._files(): + if save_daily and (sound_file.date.date > current_date): + if output_folder is not None: + ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) + else: + yield ds + ds = xarray.Dataset(attrs=self._get_metadata_attrs()) ds_output = f(sound_file) ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs) self.current_chunk_id += ds.id.max() @@ -218,7 +233,7 @@ def evolution(self, method_name, band_list=None, **kwargs): """ return self.evolution_multiple(method_list=[method_name], band_list=band_list, **kwargs) - def evolution_freq_dom(self, method_name, **kwargs): + def evolution_freq_dom(self, method_name, save_daily=True, output_folder=None, **kwargs): """ Returns the evolution of frequency domain parameters Parameters @@ -232,7 +247,15 @@ def evolution_freq_dom(self, method_name, **kwargs): ds = xarray.Dataset(attrs=self._get_metadata_attrs()) f = operator.methodcaller(method_name, binsize=self.binsize, nfft=self.nfft, fft_overlap=self.fft_overlap, bin_overlap=self.bin_overlap, **kwargs) + start_date, end_date = self.start_end_timestamp() + current_date = start_date.date for sound_file in self._files(): + if save_daily and (sound_file.date.date > current_date): + if output_folder is not None: + ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) + else: + yield ds + ds = xarray.Dataset(attrs=self._get_metadata_attrs()) ds_output = f(sound_file) ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs) self.current_chunk_id += ds.id.max() From 4802f20cf4f95cff543f1ec3b71130ed95dc0c2e Mon Sep 17 00:00:00 2001 From: Clea Parcerisas Date: Wed, 4 Sep 2024 11:51:32 +0200 Subject: [PATCH 2/2] removed yield function and only made save_daily an option when a path is given --- pypam/acoustic_file.py | 2 ++ pypam/acoustic_survey.py | 47 ++++++++++++++++++++++++---------------- pypam/dataset.py | 3 ++- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/pypam/acoustic_file.py b/pypam/acoustic_file.py index 88f37a1..9b19482 100644 --- a/pypam/acoustic_file.py +++ b/pypam/acoustic_file.py @@ -454,6 +454,8 @@ def _get_metadata_attrs(self): d = d.__dict__[sub_k] if isinstance(d, pathlib.Path): d = str(d) + if isinstance(d, bool): + d = int(d) if d is None: d = 0 metadata_attrs[k.replace('.', '_')] = d diff --git a/pypam/acoustic_survey.py b/pypam/acoustic_survey.py index 243f1af..5a82adb 100644 --- a/pypam/acoustic_survey.py +++ b/pypam/acoustic_survey.py @@ -172,11 +172,13 @@ def _get_metadata_attrs(self): d = d.__dict__[sub_k] if isinstance(d, pathlib.Path): d = str(d) + if isinstance(d, bool): + d = int(d) metadata_attrs[k.replace('.', '_')] = d return metadata_attrs - def evolution_multiple(self, method_list: list, band_list=None, save_daily=True, output_folder=None, **kwargs): + def evolution_multiple(self, method_list: list, band_list=None, save_daily=False, output_folder=None, **kwargs): """ Compute the method in each file and output the evolution Returns a xarray DataSet with datetime as index and one row for each bin of each file @@ -190,28 +192,27 @@ def evolution_multiple(self, method_list: list, band_list=None, save_daily=True, represented with a tuple as (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed save_daily : boolean - Set to True to save/yield daily netcdf files instead of a huge big file (useful for long deployments) + Set to True to save daily netcdf files instead of returning a huge big file (useful for long deployments) output_folder : str or Path - Directory to save the netcdf files + Directory to save the netcdf files. Only works with save_daily **kwargs : Any accepted parameter for the method_name """ - if output_folder is not None: - if not isinstance(output_folder, pathlib.Path): - output_folder = pathlib.Path(output_folder) + if save_daily and output_folder is None: + raise ValueError('output_folder must not be none to save daily netcdf files') + if isinstance(output_folder, str): + output_folder = pathlib.Path(output_folder) ds = xarray.Dataset(attrs=self._get_metadata_attrs()) f = operator.methodcaller('_apply_multiple', method_list=method_list, binsize=self.binsize, nfft=self.nfft, fft_overlap=self.fft_overlap, bin_overlap=self.bin_overlap, band_list=band_list, **kwargs) start_date, end_date = self.start_end_timestamp() - current_date = start_date.date + current_date = start_date.date() for sound_file in self._files(): - if save_daily and (sound_file.date.date > current_date): - if output_folder is not None: - ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) - else: - yield ds + if save_daily and (sound_file.date.date() > current_date): + ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) ds = xarray.Dataset(attrs=self._get_metadata_attrs()) + current_date = sound_file.date.date() ds_output = f(sound_file) ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs) self.current_chunk_id += ds.id.max() @@ -233,32 +234,40 @@ def evolution(self, method_name, band_list=None, **kwargs): """ return self.evolution_multiple(method_list=[method_name], band_list=band_list, **kwargs) - def evolution_freq_dom(self, method_name, save_daily=True, output_folder=None, **kwargs): + def evolution_freq_dom(self, method_name, save_daily=False, output_folder=None, **kwargs): """ Returns the evolution of frequency domain parameters Parameters ---------- method_name : str Name of the method of the acoustic_file class to compute + save_daily : boolean + Set to True to save daily netcdf files instead of returning a huge big file (useful for long deployments) + output_folder : str or Path + Directory to save the netcdf files. Only works with save_daily Returns ------- A xarray DataSet with a row per bin with the method name output """ + if save_daily and output_folder is None: + raise ValueError('output_folder must not be none to save daily netcdf files') + if isinstance(output_folder, str): + output_folder = pathlib.Path(output_folder) ds = xarray.Dataset(attrs=self._get_metadata_attrs()) f = operator.methodcaller(method_name, binsize=self.binsize, nfft=self.nfft, fft_overlap=self.fft_overlap, bin_overlap=self.bin_overlap, **kwargs) start_date, end_date = self.start_end_timestamp() - current_date = start_date.date + current_date = start_date.date() for sound_file in self._files(): - if save_daily and (sound_file.date.date > current_date): - if output_folder is not None: - ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) - else: - yield ds + if save_daily and (sound_file.date.date() > current_date): + ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) ds = xarray.Dataset(attrs=self._get_metadata_attrs()) + current_date = sound_file.date.date() ds_output = f(sound_file) ds = utils.merge_ds(ds, ds_output, self.file_dependent_attrs) self.current_chunk_id += ds.id.max() + if save_daily: + ds.to_netcdf(output_folder.joinpath('%s.nc' % current_date)) return ds def timestamps_array(self): diff --git a/pypam/dataset.py b/pypam/dataset.py index 22872ce..684dce6 100644 --- a/pypam/dataset.py +++ b/pypam/dataset.py @@ -161,7 +161,8 @@ def generate_deployment(self, idx): ds = xarray.Dataset() if self.frequency_features not in [[], None]: for f in self.frequency_features: - freq_evo = asa.evolution_freq_dom(f, band=None, db=True) + freq_evo = asa.evolution_freq_dom(f, band=None, db=True, save_daily=True, + output_folder=self.output_folder.joinpath('deployments')) for data_var in freq_evo.data_vars: ds = ds.merge(freq_evo[data_var]) if self.temporal_features not in [[], None]: