From b886d1a8bade202f1868c4e4812e041e53eb157a Mon Sep 17 00:00:00 2001 From: Clea Parcerisas Date: Tue, 3 Sep 2024 14:23:42 +0200 Subject: [PATCH] added option of having gridded data. Fixes #127 and #79 --- pypam/acoustic_chunk.py | 322 +++++----------------------------- pypam/acoustic_file.py | 111 ++++++------ pypam/acoustic_survey.py | 88 ++++------ pypam/utils.py | 11 ++ tests/test_acoustic_file.py | 22 +++ tests/test_acoustic_survey.py | 10 +- 6 files changed, 182 insertions(+), 382 deletions(-) diff --git a/pypam/acoustic_chunk.py b/pypam/acoustic_chunk.py index 4a52402..9e10bad 100644 --- a/pypam/acoustic_chunk.py +++ b/pypam/acoustic_chunk.py @@ -18,7 +18,6 @@ import xarray from tqdm.auto import tqdm -from pypam import nmf from pypam import plots from pypam import signal as sig from pypam import utils @@ -66,7 +65,7 @@ def __init__(self, sfile_start, sfile_end, hydrophone, p_ref, time_bin, chunk_id self.file_end_path = sfile_end self.file_start = sf.SoundFile(self.file_start_path, 'r') self.file_end = sf.SoundFile(self.file_end_path, mode='r') - self.fs = self.file_start.samplerate # Should be the same for both files, only reading one! + self.fs = self.file_start.samplerate # Should be the same for both files, only reading one! # Time and id self.time_bin = time_bin @@ -116,7 +115,6 @@ def freq_resolution_window(self, freq_resolution): 'it must be downsampled!') return nfft - def wav2upa(self, wav=None): """ Compute the pressure from the wav signal @@ -163,7 +161,7 @@ def db2upa(self, db=None): """ if db is None: wav = self._signal.signal - db = 10 * np.log10(wav**2) + db = 10 * np.log10(wav ** 2) # return np.power(10, db / 20.0 - np.log10(self.p_ref)) return utils.to_mag(wave=db, ref=self.p_ref) @@ -203,7 +201,7 @@ def read(self): else: first_chunk = self.file_start.read(frames=-1, always_2d=True)[:, self.channel] self.file_end.seek(0) - second_chunk = self.file_end.read(frames=self.end_frame)[:, self.channel] + second_chunk = self.file_end.read(frames=self.end_frame, always_2d=True)[:, self.channel] signal = np.concatenate((first_chunk, second_chunk)) # Read the signal and prepare it for analysis @@ -217,7 +215,7 @@ def read(self): return signal - def _apply_multiple(self, method_list, band_list=None, **kwargs): + def apply_multiple(self, method_list, band_list=None, **kwargs): """ Apply multiple methods per bin to save computational time @@ -235,7 +233,7 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs): ------- DataFrame with time as index and a multiindex column with band, method as levels. """ - signal = self.signal.copy() + signal = self.signal downsample = False # Bands selected to study @@ -276,8 +274,8 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs): coords={'id': [self.chunk_id], 'file_id': ('id', [self.chunk_file_id]), 'datetime': ('id', [self.time_bin]), - 'start_sample': ('id', [self.start_sample]), - 'end_sample': ('id', [self.end_sample]), + 'start_sample': ('id', [self.start_frame]), + 'end_sample': ('id', [self.end_frame]), 'band': [j], 'low_freq': ('band', [band[0]]), 'high_freq': ('band', [band[1]])}, @@ -288,155 +286,15 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs): else: ds_bands = xarray.concat((ds_bands, methods_output), 'band') - ds_bands.attrs = self._get_metadata_attrs() return ds_bands - def _apply(self, method_name, binsize=None, db=True, band_list=None, bin_overlap=0, **kwargs): - """ - Apply one single method - - Parameters - ---------- - method_name : string - Name of the method to apply - binsize : float, in sec - Time window considered. If set to None, only one value is returned - overlap : float [0 to 1] - Percentage to overlap the bin windows - db : bool - If set to True the result will be given in db, otherwise in upa - """ - return self._apply_multiple(method_list=[method_name], binsize=binsize, bin_overlap=bin_overlap, - db=db, band_list=band_list, **kwargs) - - def rms(self, binsize=None, bin_overlap=0, db=True): - """ - Calculation of root mean squared value (rms) of the signal in upa for each bin - Returns Dataframe with 'datetime' as index and 'rms' value as a column - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - db : bool - If set to True the result will be given in db, otherwise in upa - """ - rms_ds = self._apply(method_name='rms', binsize=binsize, bin_overlap=bin_overlap, db=db) - return rms_ds - - def aci(self, binsize=None, bin_overlap=0, nfft=1024, fft_overlap=0.5): - """ - Calculation of root mean squared value (rms) of the signal in upa for each bin - Returns Dataframe with 'datetime' as index and 'rms' value as a column - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - nfft : int - Window size for processing - fft_overlap : float [0 to 1] - Percentage to overlap the bin windows - """ - aci_ds = self._apply(method_name='aci', binsize=binsize, bin_overlap=bin_overlap, nfft=nfft, - fft_overlap=fft_overlap) - return aci_ds - - def dynamic_range(self, binsize=None, bin_overlap=0, db=True): - """ - Compute the dynamic range of each bin - Returns a dataframe with datetime as index and dr as column - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - db : bool - If set to True the result will be given in db, otherwise in upa - """ - dr_ds = self._apply(method_name='dynamic_range', binsize=binsize, bin_overlap=bin_overlap, db=db) - return dr_ds - - def cumulative_dynamic_range(self, binsize=None, bin_overlap=0, db=True): - """ - Compute the cumulative dynamic range for each bin - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - db : bool - If set to True the result will be given in db, otherwise in upa^2 - - Returns - ------- - DataFrame with an extra column with the cumulative sum of dynamic range of each bin - """ - cumdr = self.dynamic_range(binsize=binsize, bin_overlap=bin_overlap, db=db) - cumdr['cumsum_dr'] = cumdr.dr.cumsum() - return cumdr - - def octaves_levels(self, binsize=None, bin_overlap=0, db=True, band=None, **kwargs): - """ - Return the octave levels - Parameters - ---------- - binsize: float - Length in seconds of the bin to analyze - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - db: boolean - Set to True if the result should be in decibels - band: list or tuple - List or tuple of [low_frequency, high_frequency] - - Returns - ------- - DataFrame with multiindex columns with levels method and band. The method is '3-oct' - - """ - return self._octaves_levels(fraction=1, binsize=binsize, bin_overlap=bin_overlap, db=db, band=band) - - def third_octaves_levels(self, binsize=None, bin_overlap=0, db=True, band=None, **kwargs): - """ - Return the octave levels - Parameters - ---------- - binsize: float - Length in seconds of the bin to analyze - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - db: boolean - Set to True if the result should be in decibels - band: list or tuple - List or tuple of [low_frequency, high_frequency] - - Returns - ------- - DataFrame with multiindex columns with levels method and band. The method is '3-oct' - - """ - return self._octaves_levels(fraction=3, binsize=binsize, bin_overlap=bin_overlap, db=db, band=band) - - def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band=None): + def octaves_levels(self, fraction=1, db=True, band=None): """ Return the octave levels Parameters ---------- fraction: int Fraction of the desired octave. Set to 1 for octave bands, set to 3 for 1/3-octave bands - binsize: float - Length in seconds of the bin to analyze - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows db: boolean Set to True if the result should be in decibels @@ -449,38 +307,28 @@ def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band if band is None: band = [None, self.fs / 2] - oct_str = 'oct%s' % fraction # Create an empty dataset - da = xarray.DataArray() - units_attrs = output_units.get_units_attrs(method_name='octave_levels', p_ref=self.p_ref, log=db) - for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap): - signal.set_band(band, downsample=downsample) - fbands, levels = signal.octave_levels(db, fraction) - da_levels = xarray.DataArray(data=[levels], - coords={'id': [i], 'start_sample': ('id', [start_sample]), - 'end_sample': ('id', [end_sample]), 'datetime': ('id', [time_bin]), - 'frequency': fbands}, - dims=['id', 'frequency'] - ) - if i == 0: - da = da_levels - else: - da = xarray.concat((da, da_levels), 'id') - da.attrs.update(units_attrs) - ds = xarray.Dataset(data_vars={oct_str: da}, attrs=self._get_metadata_attrs()) - return ds - - def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_overlap=0, db=True, + signal = self.signal + signal.set_band(band, downsample=downsample) + fbands, levels = signal.octave_levels(db, fraction) + da_levels = xarray.DataArray(data=[levels], + coords={'id': [self.chunk_id], + 'file_id': ('id', [self.chunk_file_id]), + 'start_sample': ('id', [self.start_frame]), + 'end_sample': ('id', [self.end_frame]), + 'datetime': ('id', [self.time_bin]), + 'frequency': fbands}, + dims=['id', 'frequency'] + ) + return da_levels + + def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, db=True, method='density', band=None, percentiles=None): """ Parameters ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows nfft : int Length of the fft window in samples. Power of 2. fft_overlap : float [0 to 1] @@ -503,8 +351,8 @@ def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_over if band is None: band = [0, self.fs / 2] - spectra_ds = self._spectrum(scaling=method, binsize=binsize, nfft=nfft, fft_overlap=fft_overlap, - db=False, bin_overlap=bin_overlap, percentiles=percentiles, band=band) + spectra_ds = self.spectrum(scaling=method, nfft=nfft, fft_overlap=fft_overlap, + db=False, percentiles=percentiles, band=band) millidecade_bands_limits, millidecade_bands_c = utils.get_hybrid_millidecade_limits(band, nfft) fft_bin_width = band[1] * 2 / nfft hybrid_millidecade_ds = utils.spectra_ds_to_bands(spectra_ds['band_%s' % method], @@ -513,17 +361,12 @@ def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_over spectra_ds['millidecade_bands'] = hybrid_millidecade_ds return spectra_ds - def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, - scaling='density', db=True, band=None): + def spectrogram(self, nfft=512, fft_overlap=0.5, scaling='density', db=True, band=None): """ Return the spectrogram of the signal (entire file) Parameters ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows db : bool If set to True the result will be given in db, otherwise in upa^2 nfft : int @@ -551,27 +394,20 @@ def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, if band is None: band = [None, self.fs / 2] - da = xarray.DataArray() - for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap): - signal.set_band(band, downsample=downsample) - freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db) - da_sxx = xarray.DataArray([sxx], coords={'id': [i], - 'start_sample': ('id', [start_sample]), - 'end_sample': ('id', [end_sample]), - 'datetime': ('id', [time_bin]), - 'frequency': freq, 'time': t}, - dims=['id', 'frequency', 'time']) - if i == 0: - da = da_sxx - else: - da = xarray.concat((da, da_sxx), 'id') - units_attrs = output_units.get_units_attrs(method_name='spectrogram_' + scaling, p_ref=self.p_ref, log=db) - da.attrs.update(units_attrs) - ds = xarray.Dataset(data_vars={'spectrogram': da}, attrs=self._get_metadata_attrs()) - return ds - - def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, - db=True, percentiles=None, band=None): + signal = self.signal + signal.set_band(band, downsample=downsample) + freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db) + da_sxx = xarray.DataArray([sxx], coords={'id': [self.chunk_id], + 'file_id': ('id', [self.chunk_file_id]), + 'start_sample': ('id', [self.start_frame]), + 'end_sample': ('id', [self.end_frame]), + 'datetime': ('id', [self.time_bin]), + 'frequency': freq, 'time': t}, + dims=['id', 'frequency', 'time']) + return da_sxx + + def spectrum(self, scaling='density', nfft=512, fft_overlap=0.5, + db=True, percentiles=None, band=None): """ Return the spectrum : frequency distribution of every bin (periodogram) Returns Dataframe with 'datetime' as index and a column for each frequency and each @@ -581,10 +417,6 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff ---------- scaling : string Can be set to 'spectrum' or 'density' depending on the desired output - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows nfft : int Length of the fft window in samples. Power of 2. fft_overlap : float [0 to 1] @@ -631,77 +463,13 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff return ds_bin - def psd(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, db=True, percentiles=None, band=None): - """ - Return the power spectrum density (PSD) of all the file (units^2 / Hz) re 1 V 1 upa - Returns a Dataframe with 'datetime' as index and a column for each frequency and each - percentile - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - nfft : int - Length of the fft window in samples. Recommended power of 2. - fft_overlap : float [0 to 1] - Percentage to overlap the bin windows - db : bool - If set to True the result will be given in db, otherwise in upa^2 - percentiles : list or None - List of all the percentiles that have to be returned. If set to empty list, - no percentiles is returned - band : tuple or None - Band to filter the spectrogram in. A band is represented with a tuple - or a list - as - (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed - """ - psd_ds = self._spectrum(scaling='density', binsize=binsize, nfft=nfft, fft_overlap=fft_overlap, - db=db, bin_overlap=bin_overlap, percentiles=percentiles, band=band) - return psd_ds - - def power_spectrum(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, - db=True, percentiles=None, band=None): - """ - Return the power spectrum of all the file (units^2 / Hz) re 1 V 1 upa - Returns a Dataframe with 'datetime' as index and a column for each frequency and - each percentile - - Parameters - ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows - nfft : int - Length of the fft window in samples. Power of 2. - fft_overlap : float [0 to 1] - Percentage to overlap the windows in the fft - db : bool - If set to True the result will be given in db, otherwise in upa^2 - percentiles : list or None - List of all the percentiles that have to be returned. If set to empty list, - no percentiles is returned - band : tuple or None - Band to filter the spectrogram in. A band is represented with a tuple - or a list - as - (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed - """ - - spectrum_ds = self._spectrum(scaling='spectrum', binsize=binsize, nfft=nfft, fft_overlap=fft_overlap, - db=db, bin_overlap=bin_overlap, percentiles=percentiles, band=band) - return spectrum_ds - - def spd(self, binsize=None, bin_overlap=0, h=0.1, nfft=512, fft_overlap=0.5, + def spd(self, h=0.1, nfft=512, fft_overlap=0.5, db=True, percentiles=None, min_val=None, max_val=None, band=None): """ Return the spectral probability density. Parameters ---------- - binsize : float, in sec - Time window considered. If set to None, only one value is returned - bin_overlap : float [0 to 1] - Percentage to overlap the bin windows h : float Histogram bin width (in the correspondent units, upa or db) nfft : int @@ -738,8 +506,8 @@ def spd(self, binsize=None, bin_overlap=0, h=0.1, nfft=512, fft_overlap=0.5, list of matrices with all the probabilities """ - psd_evolution = self.psd(binsize=binsize, nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles, - bin_overlap=bin_overlap, band=band) + psd_evolution = self.psd(nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles, + band=band) return utils.compute_spd(psd_evolution, h=h, percentiles=percentiles, max_val=max_val, min_val=min_val) def source_separation(self, window_time=1.0, n_sources=15, binsize=None, save_path=None, verbose=False, band=None): @@ -793,7 +561,7 @@ def plot_spectrum_median(self, scaling='density', db=True, log=True, save_path=N Where to save the images **kwargs : any attribute valid on psd() function """ - psd = self._spectrum(db=db, scaling=scaling, **kwargs) + psd = self.spectrum(db=db, scaling=scaling, **kwargs) plots.plot_spectrum_median(ds=psd, data_var='band_' + scaling, log=log, save_path=save_path) def plot_spectrum_per_chunk(self, scaling='density', db=True, log=True, save_path=None, **kwargs): @@ -812,7 +580,7 @@ def plot_spectrum_per_chunk(self, scaling='density', db=True, log=True, save_pat Where to save the images **kwargs : any attribute valid on psd() function """ - psd = self._spectrum(db=db, scaling=scaling, **kwargs) + psd = self.spectrum(db=db, scaling=scaling, **kwargs) plots.plot_spectrum_per_chunk(ds=psd, data_var='band_' + scaling, log=log, save_path=save_path) def plot_spectrogram(self, db=True, log=True, save_path=None, **kwargs): diff --git a/pypam/acoustic_file.py b/pypam/acoustic_file.py index c6ca3e5..bcae1a6 100644 --- a/pypam/acoustic_file.py +++ b/pypam/acoustic_file.py @@ -5,8 +5,6 @@ __status__ = "Development" import datetime -import operator -import os import pathlib import zipfile @@ -16,10 +14,8 @@ import seaborn as sns import soundfile as sf import xarray -from tqdm.auto import tqdm from pypam import plots -from pypam import signal as sig from pypam import utils from pypam import units as output_units from pypam import acoustic_chunk @@ -53,26 +49,31 @@ class AcuFile: Set to True to subtract the dc noise (root mean squared value """ - def __init__(self, sfile, hydrophone, p_ref, start_frame=0, timezone='UTC', channel=0, + def __init__(self, sfile, hydrophone, p_ref, sfile_next=None, gridded=True, timezone='UTC', channel=0, calibration=None, dc_subtract=False, chunk_id_start=0): # Save hydrophone model self.hydrophone = hydrophone # Get the date from the name file_name = utils.parse_file_name(sfile) - - try: - self.date = hydrophone.get_name_datetime(file_name) - except ValueError: - self.date = datetime.datetime.now() - print('Filename %s does not match the %s file structure. Setting time to now...' % - (file_name, self.hydrophone.name)) + self.date = utils.get_file_datetime(file_name, hydrophone) # Signal self.file_path = sfile self.file = sf.SoundFile(self.file_path, 'r') self.fs = self.file.samplerate + # Check if next file is continuous + self.date_end = self.date + datetime.timedelta(seconds=self.total_time()) + self.file_path_next = None + if sfile_next is not None: + next_file_name = utils.parse_file_name(sfile_next) + next_date = utils.get_file_datetime(next_file_name, self.hydrophone) + + if (next_date - self.date_end) < datetime.timedelta(seconds=1): + self.file_path_next = sfile_next + self.file_next = sf.SoundFile(self.file_path_next, 'r') + # Reference pressure in upa self.p_ref = p_ref @@ -89,16 +90,23 @@ def __init__(self, sfile, hydrophone, p_ref, start_frame=0, timezone='UTC', chan self.wav = None self.time = None - # Set a starting frame for the file - if calibration is None: - self._start_frame = start_frame - elif calibration < 0: - self._start_frame = self.hydrophone.calibrate(self.file_path) - if self._start_frame is None: - self._start_frame = 0 - else: - self._start_frame = int(calibration * self.fs) + if gridded and calibration is not None: + raise AttributeError('The options gridded and calibration are not compatible') self.calibration = calibration + self.gridded = gridded + + if gridded: + self._start_frame = int((pd.to_datetime(self.date).ceil('min') - self.date).total_seconds() * self.fs) + else: + # Set a starting frame for the file + if calibration is None: + self._start_frame = 0 + elif calibration < 0: + self._start_frame = self.hydrophone.calibrate(self.file_path) + if self._start_frame is None: + self._start_frame = 0 + else: + self._start_frame = int(calibration * self.fs) self.dc_subtract = dc_subtract @@ -140,25 +148,41 @@ def _bins(self, binsize=None, bin_overlap=0): else: blocksize = self.samples(binsize) noverlap = int(bin_overlap * blocksize) - n_blocks = self._n_blocks(blocksize, noverlap=noverlap) + if self.file_path_next is not None: + is_there_next = True + else: + is_there_next = False + n_blocks = self._n_blocks(blocksize, noverlap=noverlap, add_last=is_there_next) time_array, _, _ = self._time_array(binsize, bin_overlap=bin_overlap) chunk_id = self.chunk_id_start + file_end = self.file_path for i in np.arange(n_blocks): - # Select the desired channel time_bin = time_array[i] - start_sample = i * blocksize + self._start_frame - end_sample = start_sample + blocksize - chunk = acoustic_chunk.AcuChunk(sfile_start=self.file_path, sfile_end=self.file_path, + start_sample = i * (blocksize - noverlap) + self._start_frame + if i == n_blocks - 1: + if is_there_next: + end_sample = blocksize - (self.file.frames - start_sample) + file_end = self.file_path_next + else: + end_sample = self.file.frames + else: + end_sample = start_sample + blocksize + chunk = acoustic_chunk.AcuChunk(sfile_start=self.file_path, sfile_end=file_end, start_frame=start_sample, end_frame=end_sample, hydrophone=self.hydrophone, p_ref=self.p_ref, chunk_id=chunk_id, chunk_file_id=i, time_bin=time_bin) chunk_id += 1 yield i, chunk + self.file.seek(0) - def _n_blocks(self, blocksize, noverlap): - return int(np.floor(self.file.frames - self._start_frame) / (blocksize - noverlap)) + def _n_blocks(self, blocksize, noverlap, add_last): + if add_last: + n_blocks = int(np.ceil((self.file.frames - self._start_frame) / (blocksize - noverlap))) + else: + n_blocks = int(np.floor((self.file.frames - self._start_frame) / (blocksize - noverlap))) + return n_blocks def samples(self, bintime): """ @@ -316,7 +340,7 @@ def _time_array(self, binsize=None, bin_overlap=0): blocks_samples = np.arange(start=self._start_frame, stop=self.file.frames - 1, step=blocksize) end_samples = blocks_samples + blocksize incr = pd.to_timedelta(blocks_samples / self.fs, unit='seconds') - self.time = self.date + datetime.timedelta(seconds=self._start_frame / self.fs) + incr + self.time = self.date + incr if self.timezone != 'UTC': self.time = pd.to_datetime(self.time).tz_localize(self.timezone).tz_convert('UTC').tz_convert(None) return self.time, blocks_samples.astype(int), end_samples.astype(int) @@ -475,14 +499,13 @@ def _apply_multiple(self, method_list, binsize=None, band_list=None, bin_overlap sorted_bands = [band] + sorted_bands else: sorted_bands = sorted_bands + [band] - log = True if 'db' in kwargs.keys(): if not kwargs['db']: log = False # Define an empty dataset ds = xarray.Dataset() for i, chunk in self._bins(binsize, bin_overlap=bin_overlap): - ds_bands = chunk._apply_multiple(method_list=method_list, band_list=None, **kwargs) + ds_bands = chunk.apply_multiple(method_list=method_list, band_list=None, **kwargs) if i == 0: ds = ds_bands else: @@ -653,15 +676,8 @@ def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band # Create an empty dataset da = xarray.DataArray() units_attrs = output_units.get_units_attrs(method_name='octave_levels', p_ref=self.p_ref, log=db) - for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap): - signal.set_band(band, downsample=downsample) - fbands, levels = signal.octave_levels(db, fraction) - da_levels = xarray.DataArray(data=[levels], - coords={'id': [i], 'start_sample': ('id', [start_sample]), - 'end_sample': ('id', [end_sample]), 'datetime': ('id', [time_bin]), - 'frequency': fbands}, - dims=['id', 'frequency'] - ) + for i, chunk in self._bins(binsize, bin_overlap=bin_overlap): + da_levels = chunk._octaves_levels(fraction=fraction, db=db, band=band) if i == 0: da = da_levels else: @@ -751,15 +767,8 @@ def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, band = [None, self.fs / 2] da = xarray.DataArray() - for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap): - signal.set_band(band, downsample=downsample) - freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db) - da_sxx = xarray.DataArray([sxx], coords={'id': [i], - 'start_sample': ('id', [start_sample]), - 'end_sample': ('id', [end_sample]), - 'datetime': ('id', [time_bin]), - 'frequency': freq, 'time': t}, - dims=['id', 'frequency', 'time']) + for i, chunk in self._bins(binsize, bin_overlap=bin_overlap): + da_sxx = chunk.spectrogram(nfft=nfft, fft_overlap=fft_overlap, scaling=scaling, db=db) if i == 0: da = da_sxx else: @@ -806,8 +815,8 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff spectrum_str = 'band_' + scaling ds = xarray.DataArray() for i, chunk in self._bins(binsize, bin_overlap=bin_overlap): - ds_bin = chunk._spectrum(scaling=scaling, binsize=binsize, bin_overlap=bin_overlap, - nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles, band=band) + ds_bin = chunk.spectrum(scaling=scaling, nfft=nfft, fft_overlap=fft_overlap, db=db, + percentiles=percentiles, band=band) if i == 0: ds = ds_bin else: diff --git a/pypam/acoustic_survey.py b/pypam/acoustic_survey.py index c9a0c41..5bc4a01 100644 --- a/pypam/acoustic_survey.py +++ b/pypam/acoustic_survey.py @@ -112,19 +112,24 @@ def __init__(self, self.file_dependent_attrs = ['file_path', '_start_frame', 'end_to_end_calibration'] self.current_chunk_id = 0 + self.gridded = gridded_data + def _files(self): """ Iterator that returns AcuFile for each wav file in the folder """ self.current_chunk_id = 0 - for file_list in tqdm(self.acu_files): + for file_i, file_list in tqdm(enumerate(self.acu_files), total=len(self.acu_files)): wav_file = file_list[0] - print(wav_file) - sound_file = self._hydro_file(wav_file, chunk_id_start=self.current_chunk_id) + if file_i >= (len(self.acu_files) - 1): + next_file = None + else: + next_file = self.acu_files[file_i+1][0] + sound_file = self._hydro_file(wav_file, wav_file_next=next_file, chunk_id_start=self.current_chunk_id) if sound_file.is_in_period(self.period) and sound_file.file.frames > 0: yield sound_file - def _hydro_file(self, wav_file, chunk_id_start=0): + def _hydro_file(self, wav_file, wav_file_next=None, chunk_id_start=0): """ Return the AcuFile object from the wav_file Parameters @@ -136,9 +141,11 @@ def _hydro_file(self, wav_file, chunk_id_start=0): ------- Object AcuFile """ - hydro_file = acoustic_file.AcuFile(sfile=wav_file, hydrophone=self.hydrophone, p_ref=self.p_ref, + hydro_file = acoustic_file.AcuFile(sfile=wav_file, sfile_next=wav_file_next, + hydrophone=self.hydrophone, p_ref=self.p_ref, timezone=self.timezone, channel=self.channel, calibration=self.calibration, - dc_subtract=self.dc_subtract, chunk_id_start=chunk_id_start) + dc_subtract=self.dc_subtract, chunk_id_start=chunk_id_start, + gridded=self.gridded) return hydro_file def _get_metadata_attrs(self): @@ -248,14 +255,12 @@ def start_end_timestamp(self): Return the start and the end timestamps """ wav_file = self.acu_files[0][0] - print(wav_file) sound_file = self._hydro_file(wav_file) start_datetime = sound_file.date file_list = self.acu_files[-1] wav_file = file_list[0] - print(wav_file) sound_file = self._hydro_file(wav_file) end_datetime = sound_file.date + datetime.timedelta(seconds=sound_file.total_time()) @@ -526,19 +531,6 @@ def __init__(self, folder_path, zipped=False, include_dirs=False, extension='.wa extra_extensions = [] self.extra_extensions = extra_extensions - def __getitem__(self, n): - """ - Get n wav file - """ - self.__iter__() - self.n = n - return self.__next__() - - def __iter__(self): - """ - Iteration - """ - self.n = 0 if not self.zipped: if self.recursive: self.files_list = sorted(self.folder_path.glob('**/*%s' % self.extension)) @@ -547,57 +539,47 @@ def __iter__(self): else: if self.recursive: self.folder_list = sorted(self.folder_path.iterdir()) - self.zipped_subfolder = AcousticFolder(self.folder_list[self.n], - extra_extensions=self.extra_extensions, - zipped=self.zipped, - include_dirs=self.recursive) + self.files_list = [] + for fol in self.folder_list: + self.zipped_subfolder = AcousticFolder(fol, + extra_extensions=self.extra_extensions, + zipped=self.zipped, + include_dirs=self.recursive) + np.concatenate((self.files_list, self.zipped_subfolder.files_list)) else: zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True) self.files_list = [] total_files_list = zipped_folder.namelist() - for f in total_files_list: + for f in total_files_list: extension = f.split(".")[-1] if extension == 'wav': self.files_list.append(f) - return self - def __next__(self): + def __getitem__(self, index): """ - Next wav file + Get n wav file """ - if self.n < len(self.files_list): + if index < len(self.files_list): files_list = [] if self.zipped: - if self.recursive: - try: - self.files_list = self.zipped_subfolder.__next__() - except StopIteration: - self.n += 1 - self.zipped_subfolder = AcousticFolder(self.folder_list[self.n], - extra_extensions=self.extra_extensions, - zipped=self.zipped, - include_dirs=self.recursive) - else: - file_name = self.files_list[self.n] - zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True) - wav_file = zipped_folder.open(file_name) - files_list.append(wav_file) - for extension in self.extra_extensions: - ext_file_name = file_name.parent.joinpath( - file_name.name.replace(self.extension, extension)) - files_list.append(zipped_folder.open(ext_file_name)) - self.n += 1 - return files_list + file_name = self.files_list[index] + zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True) + wav_file = zipped_folder.open(file_name) + files_list.append(wav_file) + for extension in self.extra_extensions: + ext_file_name = file_name.parent.joinpath( + file_name.name.replace(self.extension, extension)) + files_list.append(zipped_folder.open(ext_file_name)) + return files_list else: - wav_path = self.files_list[self.n] + wav_path = self.files_list[index] files_list.append(wav_path) for extension in self.extra_extensions: files_list.append(pathlib.Path(str(wav_path).replace(self.extension, extension))) - self.n += 1 return files_list else: - raise StopIteration + raise IndexError def __len__(self): if not self.zipped: diff --git a/pypam/utils.py b/pypam/utils.py index 639bc0b..285d161 100644 --- a/pypam/utils.py +++ b/pypam/utils.py @@ -53,6 +53,7 @@ from functools import partial import zipfile import os +import datetime try: import dask @@ -913,3 +914,13 @@ def parse_file_name(sfile): raise Exception('The filename has to be either a Path object or a string') return file_name + + +def get_file_datetime(file_name, hydrophone): + try: + date = hydrophone.get_name_datetime(file_name) + except ValueError: + date = datetime.datetime.now() + print('Filename %s does not match the %s file structure. Setting time to now...' % + (file_name, hydrophone.name)) + return date diff --git a/tests/test_acoustic_file.py b/tests/test_acoustic_file.py index eceb1ec..6671ea1 100644 --- a/tests/test_acoustic_file.py +++ b/tests/test_acoustic_file.py @@ -4,6 +4,8 @@ from tests import skip_unless_with_plots import pathlib import matplotlib.pyplot as plt +import datetime +import pandas as pd plt.rcParams.update(plt.rcParamsDefault) @@ -21,6 +23,8 @@ class TestAcuFile(unittest.TestCase): def setUp(self) -> None: self.acu_file = AcuFile('tests/test_data/67416073.210610033655.wav', soundtrap, 1) + self.acu_file_gridded = AcuFile('tests/test_data/67416073.210610033655.wav', soundtrap, + 1, gridded=True) @skip_unless_with_plots() def test_plots(self): @@ -34,6 +38,24 @@ def test_millidecade_bands(self): self.acu_file.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=None, bin_overlap=0, db=True, method='density', band=None) + def test_millidecade_bands_gridded(self): + nfft = 8000 + binsize = 60 + ds = self.acu_file_gridded.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=binsize, bin_overlap=0, db=True, + method='spectrum', band=None) + assert ds.datetime.dt.time.values[0].second == 0 + + def test_non_zero_bin_overlap(self): + nfft = 8000 + binsize = 60 + bin_overlap = 0.5 + ds = self.acu_file_gridded.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=binsize, + bin_overlap=bin_overlap, db=True, method='spectrum', + band=None) + assert (pd.to_timedelta(ds.datetime.values[1] - ds.datetime.values[0]) == + pd.to_timedelta(datetime.timedelta(seconds=binsize * bin_overlap))) + assert (ds.start_sample.values[1] - ds.start_sample.values[0]) == (binsize * bin_overlap) * 8000 + def test_update_freq_cal(self): ds_psd = self.acu_file.psd() ds_psd_updated = self.acu_file.update_freq_cal(ds=ds_psd, data_var='band_density') diff --git a/tests/test_acoustic_survey.py b/tests/test_acoustic_survey.py index c4474d2..703875d 100644 --- a/tests/test_acoustic_survey.py +++ b/tests/test_acoustic_survey.py @@ -37,7 +37,7 @@ third_octaves = None dc_subtract = True -include_dirs = True +include_dirs = False zipped_files = False # Don't plot if it is running on CI @@ -51,6 +51,8 @@ def setUp(self) -> None: self.asa_flac = ASA(hydrophone=soundtrap, folder_path=folder_path.joinpath('flac_data'), binsize=binsize, nfft=nfft, timezone='UTC', include_dirs=include_dirs, zipped=zipped_files, dc_subtract=dc_subtract, extension='.flac') + self.asa_gridded = ASA(hydrophone=soundtrap, folder_path=folder_path, binsize=binsize, nfft=nfft, timezone='UTC', + include_dirs=include_dirs, zipped=zipped_files, dc_subtract=dc_subtract, gridded_data=True) def test_empty_directory(self): with self.assertRaises(ValueError) as context: @@ -127,6 +129,12 @@ def test_millidecade_bands(self): plt.legend() plt.show() + def test_millidecade_bands_gridded(self): + # Set the frequency resolution to 1 Hz + # Check for the complete broadband and a band filtered in the lower frequencies + milli_psd = self.asa_gridded.hybrid_millidecade_bands(db=True, method='density', band=[0, 4000], percentiles=None) + assert milli_psd.datetime.dt.time.values[0].second == 0 + def test_spectrogram(self): self.asa.apply_to_all('spectrogram')