From b886d1a8bade202f1868c4e4812e041e53eb157a Mon Sep 17 00:00:00 2001
From: Clea Parcerisas <clea.parcerisas@vliz.be>
Date: Tue, 3 Sep 2024 14:23:42 +0200
Subject: [PATCH] added option of having gridded data. Fixes #127 and #79

---
 pypam/acoustic_chunk.py       | 322 +++++-----------------------------
 pypam/acoustic_file.py        | 111 ++++++------
 pypam/acoustic_survey.py      |  88 ++++------
 pypam/utils.py                |  11 ++
 tests/test_acoustic_file.py   |  22 +++
 tests/test_acoustic_survey.py |  10 +-
 6 files changed, 182 insertions(+), 382 deletions(-)

diff --git a/pypam/acoustic_chunk.py b/pypam/acoustic_chunk.py
index 4a52402..9e10bad 100644
--- a/pypam/acoustic_chunk.py
+++ b/pypam/acoustic_chunk.py
@@ -18,7 +18,6 @@
 import xarray
 from tqdm.auto import tqdm
 
-from pypam import nmf
 from pypam import plots
 from pypam import signal as sig
 from pypam import utils
@@ -66,7 +65,7 @@ def __init__(self, sfile_start, sfile_end, hydrophone, p_ref, time_bin, chunk_id
         self.file_end_path = sfile_end
         self.file_start = sf.SoundFile(self.file_start_path, 'r')
         self.file_end = sf.SoundFile(self.file_end_path, mode='r')
-        self.fs = self.file_start.samplerate # Should be the same for both files, only reading one!
+        self.fs = self.file_start.samplerate  # Should be the same for both files, only reading one!
 
         # Time and id
         self.time_bin = time_bin
@@ -116,7 +115,6 @@ def freq_resolution_window(self, freq_resolution):
                             'it must be downsampled!')
         return nfft
 
-
     def wav2upa(self, wav=None):
         """
         Compute the pressure from the wav signal
@@ -163,7 +161,7 @@ def db2upa(self, db=None):
         """
         if db is None:
             wav = self._signal.signal
-            db = 10 * np.log10(wav**2)
+            db = 10 * np.log10(wav ** 2)
         # return np.power(10, db / 20.0 - np.log10(self.p_ref))
         return utils.to_mag(wave=db, ref=self.p_ref)
 
@@ -203,7 +201,7 @@ def read(self):
             else:
                 first_chunk = self.file_start.read(frames=-1, always_2d=True)[:, self.channel]
                 self.file_end.seek(0)
-                second_chunk = self.file_end.read(frames=self.end_frame)[:, self.channel]
+                second_chunk = self.file_end.read(frames=self.end_frame, always_2d=True)[:, self.channel]
                 signal = np.concatenate((first_chunk, second_chunk))
 
             # Read the signal and prepare it for analysis
@@ -217,7 +215,7 @@ def read(self):
 
         return signal
 
-    def _apply_multiple(self, method_list, band_list=None, **kwargs):
+    def apply_multiple(self, method_list, band_list=None, **kwargs):
         """
         Apply multiple methods per bin to save computational time
 
@@ -235,7 +233,7 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs):
         -------
         DataFrame with time as index and a multiindex column with band, method as levels.
         """
-        signal = self.signal.copy()
+        signal = self.signal
         downsample = False
 
         # Bands selected to study
@@ -276,8 +274,8 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs):
                                                                coords={'id': [self.chunk_id],
                                                                        'file_id': ('id', [self.chunk_file_id]),
                                                                        'datetime': ('id', [self.time_bin]),
-                                                                       'start_sample': ('id', [self.start_sample]),
-                                                                       'end_sample': ('id', [self.end_sample]),
+                                                                       'start_sample': ('id', [self.start_frame]),
+                                                                       'end_sample': ('id', [self.end_frame]),
                                                                        'band': [j],
                                                                        'low_freq': ('band', [band[0]]),
                                                                        'high_freq': ('band', [band[1]])},
@@ -288,155 +286,15 @@ def _apply_multiple(self, method_list, band_list=None, **kwargs):
                 else:
                     ds_bands = xarray.concat((ds_bands, methods_output), 'band')
 
-        ds_bands.attrs = self._get_metadata_attrs()
         return ds_bands
 
-    def _apply(self, method_name, binsize=None, db=True, band_list=None, bin_overlap=0, **kwargs):
-        """
-        Apply one single method
-
-        Parameters
-        ----------
-        method_name : string
-            Name of the method to apply
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db : bool
-            If set to True the result will be given in db, otherwise in upa
-        """
-        return self._apply_multiple(method_list=[method_name], binsize=binsize, bin_overlap=bin_overlap,
-                                    db=db, band_list=band_list, **kwargs)
-
-    def rms(self, binsize=None, bin_overlap=0, db=True):
-        """
-        Calculation of root mean squared value (rms) of the signal in upa for each bin
-        Returns Dataframe with 'datetime' as index and 'rms' value as a column
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db : bool
-            If set to True the result will be given in db, otherwise in upa
-        """
-        rms_ds = self._apply(method_name='rms', binsize=binsize, bin_overlap=bin_overlap, db=db)
-        return rms_ds
-
-    def aci(self, binsize=None, bin_overlap=0, nfft=1024, fft_overlap=0.5):
-        """
-        Calculation of root mean squared value (rms) of the signal in upa for each bin
-        Returns Dataframe with 'datetime' as index and 'rms' value as a column
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        nfft : int
-            Window size for processing
-        fft_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        """
-        aci_ds = self._apply(method_name='aci', binsize=binsize, bin_overlap=bin_overlap, nfft=nfft,
-                             fft_overlap=fft_overlap)
-        return aci_ds
-
-    def dynamic_range(self, binsize=None, bin_overlap=0, db=True):
-        """
-        Compute the dynamic range of each bin
-        Returns a dataframe with datetime as index and dr as column
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db : bool
-            If set to True the result will be given in db, otherwise in upa
-        """
-        dr_ds = self._apply(method_name='dynamic_range', binsize=binsize, bin_overlap=bin_overlap, db=db)
-        return dr_ds
-
-    def cumulative_dynamic_range(self, binsize=None, bin_overlap=0, db=True):
-        """
-        Compute the cumulative dynamic range for each bin
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db : bool
-            If set to True the result will be given in db, otherwise in upa^2
-
-        Returns
-        -------
-        DataFrame with an extra column with the cumulative sum of dynamic range of each bin
-        """
-        cumdr = self.dynamic_range(binsize=binsize, bin_overlap=bin_overlap, db=db)
-        cumdr['cumsum_dr'] = cumdr.dr.cumsum()
-        return cumdr
-
-    def octaves_levels(self, binsize=None, bin_overlap=0, db=True, band=None, **kwargs):
-        """
-        Return the octave levels
-        Parameters
-        ----------
-        binsize: float
-            Length in seconds of the bin to analyze
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db: boolean
-            Set to True if the result should be in decibels
-        band: list or tuple
-            List or tuple of [low_frequency, high_frequency]
-
-        Returns
-        -------
-        DataFrame with multiindex columns with levels method and band. The method is '3-oct'
-
-        """
-        return self._octaves_levels(fraction=1, binsize=binsize, bin_overlap=bin_overlap, db=db, band=band)
-
-    def third_octaves_levels(self, binsize=None, bin_overlap=0, db=True, band=None, **kwargs):
-        """
-        Return the octave levels
-        Parameters
-        ----------
-        binsize: float
-            Length in seconds of the bin to analyze
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db: boolean
-            Set to True if the result should be in decibels
-        band: list or tuple
-            List or tuple of [low_frequency, high_frequency]
-
-        Returns
-        -------
-        DataFrame with multiindex columns with levels method and band. The method is '3-oct'
-
-        """
-        return self._octaves_levels(fraction=3, binsize=binsize, bin_overlap=bin_overlap, db=db, band=band)
-
-    def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band=None):
+    def octaves_levels(self, fraction=1, db=True, band=None):
         """
         Return the octave levels
         Parameters
         ----------
         fraction: int
             Fraction of the desired octave. Set to 1 for octave bands, set to 3 for 1/3-octave bands
-        binsize: float
-            Length in seconds of the bin to analyze
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
         db: boolean
             Set to True if the result should be in decibels
 
@@ -449,38 +307,28 @@ def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band
 
         if band is None:
             band = [None, self.fs / 2]
-        oct_str = 'oct%s' % fraction
 
         # Create an empty dataset
-        da = xarray.DataArray()
-        units_attrs = output_units.get_units_attrs(method_name='octave_levels', p_ref=self.p_ref, log=db)
-        for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
-            signal.set_band(band, downsample=downsample)
-            fbands, levels = signal.octave_levels(db, fraction)
-            da_levels = xarray.DataArray(data=[levels],
-                                         coords={'id': [i], 'start_sample': ('id', [start_sample]),
-                                                 'end_sample': ('id', [end_sample]), 'datetime': ('id', [time_bin]),
-                                                 'frequency': fbands},
-                                         dims=['id', 'frequency']
-                                         )
-            if i == 0:
-                da = da_levels
-            else:
-                da = xarray.concat((da, da_levels), 'id')
-        da.attrs.update(units_attrs)
-        ds = xarray.Dataset(data_vars={oct_str: da}, attrs=self._get_metadata_attrs())
-        return ds
-
-    def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_overlap=0, db=True,
+        signal = self.signal
+        signal.set_band(band, downsample=downsample)
+        fbands, levels = signal.octave_levels(db, fraction)
+        da_levels = xarray.DataArray(data=[levels],
+                                     coords={'id': [self.chunk_id],
+                                             'file_id': ('id', [self.chunk_file_id]),
+                                             'start_sample': ('id', [self.start_frame]),
+                                             'end_sample': ('id', [self.end_frame]),
+                                             'datetime': ('id', [self.time_bin]),
+                                             'frequency': fbands},
+                                     dims=['id', 'frequency']
+                                     )
+        return da_levels
+
+    def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, db=True,
                                  method='density', band=None, percentiles=None):
         """
 
         Parameters
         ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
         nfft : int
             Length of the fft window in samples. Power of 2.
         fft_overlap : float [0 to 1]
@@ -503,8 +351,8 @@ def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_over
 
         if band is None:
             band = [0, self.fs / 2]
-        spectra_ds = self._spectrum(scaling=method, binsize=binsize, nfft=nfft, fft_overlap=fft_overlap,
-                                    db=False, bin_overlap=bin_overlap, percentiles=percentiles, band=band)
+        spectra_ds = self.spectrum(scaling=method, nfft=nfft, fft_overlap=fft_overlap,
+                                   db=False, percentiles=percentiles, band=band)
         millidecade_bands_limits, millidecade_bands_c = utils.get_hybrid_millidecade_limits(band, nfft)
         fft_bin_width = band[1] * 2 / nfft
         hybrid_millidecade_ds = utils.spectra_ds_to_bands(spectra_ds['band_%s' % method],
@@ -513,17 +361,12 @@ def hybrid_millidecade_bands(self, nfft, fft_overlap=0.5, binsize=None, bin_over
         spectra_ds['millidecade_bands'] = hybrid_millidecade_ds
         return spectra_ds
 
-    def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
-                    scaling='density', db=True, band=None):
+    def spectrogram(self, nfft=512, fft_overlap=0.5, scaling='density', db=True, band=None):
         """
         Return the spectrogram of the signal (entire file)
 
         Parameters
         ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
         db : bool
             If set to True the result will be given in db, otherwise in upa^2
         nfft : int
@@ -551,27 +394,20 @@ def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
         if band is None:
             band = [None, self.fs / 2]
 
-        da = xarray.DataArray()
-        for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
-            signal.set_band(band, downsample=downsample)
-            freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db)
-            da_sxx = xarray.DataArray([sxx], coords={'id': [i],
-                                                     'start_sample': ('id', [start_sample]),
-                                                     'end_sample': ('id', [end_sample]),
-                                                     'datetime': ('id', [time_bin]),
-                                                     'frequency': freq, 'time': t},
-                                      dims=['id', 'frequency', 'time'])
-            if i == 0:
-                da = da_sxx
-            else:
-                da = xarray.concat((da, da_sxx), 'id')
-        units_attrs = output_units.get_units_attrs(method_name='spectrogram_' + scaling, p_ref=self.p_ref, log=db)
-        da.attrs.update(units_attrs)
-        ds = xarray.Dataset(data_vars={'spectrogram': da}, attrs=self._get_metadata_attrs())
-        return ds
-
-    def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
-                  db=True, percentiles=None, band=None):
+        signal = self.signal
+        signal.set_band(band, downsample=downsample)
+        freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db)
+        da_sxx = xarray.DataArray([sxx], coords={'id': [self.chunk_id],
+                                                 'file_id': ('id', [self.chunk_file_id]),
+                                                 'start_sample': ('id', [self.start_frame]),
+                                                 'end_sample': ('id', [self.end_frame]),
+                                                 'datetime': ('id', [self.time_bin]),
+                                                 'frequency': freq, 'time': t},
+                                  dims=['id', 'frequency', 'time'])
+        return da_sxx
+
+    def spectrum(self, scaling='density', nfft=512, fft_overlap=0.5,
+                 db=True, percentiles=None, band=None):
         """
         Return the spectrum : frequency distribution of every bin (periodogram)
         Returns Dataframe with 'datetime' as index and a column for each frequency and each
@@ -581,10 +417,6 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff
         ----------
         scaling : string
             Can be set to 'spectrum' or 'density' depending on the desired output
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
         nfft : int
             Length of the fft window in samples. Power of 2.
         fft_overlap : float [0 to 1]
@@ -631,77 +463,13 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff
 
         return ds_bin
 
-    def psd(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5, db=True, percentiles=None, band=None):
-        """
-        Return the power spectrum density (PSD) of all the file (units^2 / Hz) re 1 V 1 upa
-        Returns a Dataframe with 'datetime' as index and a column for each frequency and each
-        percentile
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        nfft : int
-            Length of the fft window in samples. Recommended power of 2.
-        fft_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        db : bool
-            If set to True the result will be given in db, otherwise in upa^2
-        percentiles : list or None
-            List of all the percentiles that have to be returned. If set to empty list,
-            no percentiles is returned
-        band : tuple or None
-            Band to filter the spectrogram in. A band is represented with a tuple - or a list - as
-            (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed
-        """
-        psd_ds = self._spectrum(scaling='density', binsize=binsize, nfft=nfft, fft_overlap=fft_overlap,
-                                db=db, bin_overlap=bin_overlap, percentiles=percentiles, band=band)
-        return psd_ds
-
-    def power_spectrum(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
-                       db=True, percentiles=None, band=None):
-        """
-        Return the power spectrum of all the file (units^2 / Hz) re 1 V 1 upa
-        Returns a Dataframe with 'datetime' as index and a column for each frequency and
-        each percentile
-
-        Parameters
-        ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
-        nfft : int
-            Length of the fft window in samples. Power of 2.
-        fft_overlap : float [0 to 1]
-            Percentage to overlap the windows in the fft
-        db : bool
-            If set to True the result will be given in db, otherwise in upa^2
-        percentiles : list or None
-            List of all the percentiles that have to be returned. If set to empty list,
-            no percentiles is returned
-        band : tuple or None
-            Band to filter the spectrogram in. A band is represented with a tuple - or a list - as
-            (low_freq, high_freq). If set to None, the broadband up to the Nyquist frequency will be analyzed
-        """
-
-        spectrum_ds = self._spectrum(scaling='spectrum', binsize=binsize, nfft=nfft, fft_overlap=fft_overlap,
-                                     db=db, bin_overlap=bin_overlap, percentiles=percentiles, band=band)
-        return spectrum_ds
-
-    def spd(self, binsize=None, bin_overlap=0, h=0.1, nfft=512, fft_overlap=0.5,
+    def spd(self, h=0.1, nfft=512, fft_overlap=0.5,
             db=True, percentiles=None, min_val=None, max_val=None, band=None):
         """
         Return the spectral probability density.
 
         Parameters
         ----------
-        binsize : float, in sec
-            Time window considered. If set to None, only one value is returned
-        bin_overlap : float [0 to 1]
-            Percentage to overlap the bin windows
         h : float
             Histogram bin width (in the correspondent units, upa or db)
         nfft : int
@@ -738,8 +506,8 @@ def spd(self, binsize=None, bin_overlap=0, h=0.1, nfft=512, fft_overlap=0.5,
             list of matrices with all the probabilities
 
         """
-        psd_evolution = self.psd(binsize=binsize, nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles,
-                                 bin_overlap=bin_overlap, band=band)
+        psd_evolution = self.psd(nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles,
+                                 band=band)
         return utils.compute_spd(psd_evolution, h=h, percentiles=percentiles, max_val=max_val, min_val=min_val)
 
     def source_separation(self, window_time=1.0, n_sources=15, binsize=None, save_path=None, verbose=False, band=None):
@@ -793,7 +561,7 @@ def plot_spectrum_median(self, scaling='density', db=True, log=True, save_path=N
             Where to save the images
         **kwargs : any attribute valid on psd() function
         """
-        psd = self._spectrum(db=db, scaling=scaling, **kwargs)
+        psd = self.spectrum(db=db, scaling=scaling, **kwargs)
         plots.plot_spectrum_median(ds=psd, data_var='band_' + scaling, log=log, save_path=save_path)
 
     def plot_spectrum_per_chunk(self, scaling='density', db=True, log=True, save_path=None, **kwargs):
@@ -812,7 +580,7 @@ def plot_spectrum_per_chunk(self, scaling='density', db=True, log=True, save_pat
             Where to save the images
         **kwargs : any attribute valid on psd() function
         """
-        psd = self._spectrum(db=db, scaling=scaling, **kwargs)
+        psd = self.spectrum(db=db, scaling=scaling, **kwargs)
         plots.plot_spectrum_per_chunk(ds=psd, data_var='band_' + scaling, log=log, save_path=save_path)
 
     def plot_spectrogram(self, db=True, log=True, save_path=None, **kwargs):
diff --git a/pypam/acoustic_file.py b/pypam/acoustic_file.py
index c6ca3e5..bcae1a6 100644
--- a/pypam/acoustic_file.py
+++ b/pypam/acoustic_file.py
@@ -5,8 +5,6 @@
 __status__ = "Development"
 
 import datetime
-import operator
-import os
 import pathlib
 import zipfile
 
@@ -16,10 +14,8 @@
 import seaborn as sns
 import soundfile as sf
 import xarray
-from tqdm.auto import tqdm
 
 from pypam import plots
-from pypam import signal as sig
 from pypam import utils
 from pypam import units as output_units
 from pypam import acoustic_chunk
@@ -53,26 +49,31 @@ class AcuFile:
         Set to True to subtract the dc noise (root mean squared value
     """
 
-    def __init__(self, sfile, hydrophone, p_ref, start_frame=0, timezone='UTC', channel=0,
+    def __init__(self, sfile, hydrophone, p_ref, sfile_next=None, gridded=True, timezone='UTC', channel=0,
                  calibration=None, dc_subtract=False, chunk_id_start=0):
         # Save hydrophone model
         self.hydrophone = hydrophone
 
         # Get the date from the name
         file_name = utils.parse_file_name(sfile)
-
-        try:
-            self.date = hydrophone.get_name_datetime(file_name)
-        except ValueError:
-            self.date = datetime.datetime.now()
-            print('Filename %s does not match the %s file structure. Setting time to now...' %
-                  (file_name, self.hydrophone.name))
+        self.date = utils.get_file_datetime(file_name, hydrophone)
 
         # Signal
         self.file_path = sfile
         self.file = sf.SoundFile(self.file_path, 'r')
         self.fs = self.file.samplerate
 
+        # Check if next file is continuous
+        self.date_end = self.date + datetime.timedelta(seconds=self.total_time())
+        self.file_path_next = None
+        if sfile_next is not None:
+            next_file_name = utils.parse_file_name(sfile_next)
+            next_date = utils.get_file_datetime(next_file_name, self.hydrophone)
+
+            if (next_date - self.date_end) < datetime.timedelta(seconds=1):
+                self.file_path_next = sfile_next
+                self.file_next = sf.SoundFile(self.file_path_next, 'r')
+
         # Reference pressure in upa
         self.p_ref = p_ref
 
@@ -89,16 +90,23 @@ def __init__(self, sfile, hydrophone, p_ref, start_frame=0, timezone='UTC', chan
         self.wav = None
         self.time = None
 
-        # Set a starting frame for the file
-        if calibration is None:
-            self._start_frame = start_frame
-        elif calibration < 0:
-            self._start_frame = self.hydrophone.calibrate(self.file_path)
-            if self._start_frame is None:
-                self._start_frame = 0
-        else:
-            self._start_frame = int(calibration * self.fs)
+        if gridded and calibration is not None:
+            raise AttributeError('The options gridded and calibration are not compatible')
         self.calibration = calibration
+        self.gridded = gridded
+
+        if gridded:
+            self._start_frame = int((pd.to_datetime(self.date).ceil('min') - self.date).total_seconds() * self.fs)
+        else:
+            # Set a starting frame for the file
+            if calibration is None:
+                self._start_frame = 0
+            elif calibration < 0:
+                self._start_frame = self.hydrophone.calibrate(self.file_path)
+                if self._start_frame is None:
+                    self._start_frame = 0
+            else:
+                self._start_frame = int(calibration * self.fs)
 
         self.dc_subtract = dc_subtract
 
@@ -140,25 +148,41 @@ def _bins(self, binsize=None, bin_overlap=0):
         else:
             blocksize = self.samples(binsize)
         noverlap = int(bin_overlap * blocksize)
-        n_blocks = self._n_blocks(blocksize, noverlap=noverlap)
+        if self.file_path_next is not None:
+            is_there_next = True
+        else:
+            is_there_next = False
+        n_blocks = self._n_blocks(blocksize, noverlap=noverlap, add_last=is_there_next)
         time_array, _, _ = self._time_array(binsize, bin_overlap=bin_overlap)
         chunk_id = self.chunk_id_start
+        file_end = self.file_path
         for i in np.arange(n_blocks):
-            # Select the desired channel
             time_bin = time_array[i]
-            start_sample = i * blocksize + self._start_frame
-            end_sample = start_sample + blocksize
-            chunk = acoustic_chunk.AcuChunk(sfile_start=self.file_path, sfile_end=self.file_path,
+            start_sample = i * (blocksize - noverlap) + self._start_frame
+            if i == n_blocks - 1:
+                if is_there_next:
+                    end_sample = blocksize - (self.file.frames - start_sample)
+                    file_end = self.file_path_next
+                else:
+                    end_sample = self.file.frames
+            else:
+                end_sample = start_sample + blocksize
+            chunk = acoustic_chunk.AcuChunk(sfile_start=self.file_path, sfile_end=file_end,
                                             start_frame=start_sample, end_frame=end_sample, hydrophone=self.hydrophone,
                                             p_ref=self.p_ref, chunk_id=chunk_id, chunk_file_id=i, time_bin=time_bin)
 
             chunk_id += 1
 
             yield i, chunk
+
         self.file.seek(0)
 
-    def _n_blocks(self, blocksize, noverlap):
-        return int(np.floor(self.file.frames - self._start_frame) / (blocksize - noverlap))
+    def _n_blocks(self, blocksize, noverlap, add_last):
+        if add_last:
+            n_blocks = int(np.ceil((self.file.frames - self._start_frame) / (blocksize - noverlap)))
+        else:
+            n_blocks = int(np.floor((self.file.frames - self._start_frame) / (blocksize - noverlap)))
+        return n_blocks
 
     def samples(self, bintime):
         """
@@ -316,7 +340,7 @@ def _time_array(self, binsize=None, bin_overlap=0):
         blocks_samples = np.arange(start=self._start_frame, stop=self.file.frames - 1, step=blocksize)
         end_samples = blocks_samples + blocksize
         incr = pd.to_timedelta(blocks_samples / self.fs, unit='seconds')
-        self.time = self.date + datetime.timedelta(seconds=self._start_frame / self.fs) + incr
+        self.time = self.date + incr
         if self.timezone != 'UTC':
             self.time = pd.to_datetime(self.time).tz_localize(self.timezone).tz_convert('UTC').tz_convert(None)
         return self.time, blocks_samples.astype(int), end_samples.astype(int)
@@ -475,14 +499,13 @@ def _apply_multiple(self, method_list, binsize=None, band_list=None, bin_overlap
                     sorted_bands = [band] + sorted_bands
                 else:
                     sorted_bands = sorted_bands + [band]
-        log = True
         if 'db' in kwargs.keys():
             if not kwargs['db']:
                 log = False
         # Define an empty dataset
         ds = xarray.Dataset()
         for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
-            ds_bands = chunk._apply_multiple(method_list=method_list, band_list=None, **kwargs)
+            ds_bands = chunk.apply_multiple(method_list=method_list, band_list=None, **kwargs)
             if i == 0:
                 ds = ds_bands
             else:
@@ -653,15 +676,8 @@ def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band
         # Create an empty dataset
         da = xarray.DataArray()
         units_attrs = output_units.get_units_attrs(method_name='octave_levels', p_ref=self.p_ref, log=db)
-        for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
-            signal.set_band(band, downsample=downsample)
-            fbands, levels = signal.octave_levels(db, fraction)
-            da_levels = xarray.DataArray(data=[levels],
-                                         coords={'id': [i], 'start_sample': ('id', [start_sample]),
-                                                 'end_sample': ('id', [end_sample]), 'datetime': ('id', [time_bin]),
-                                                 'frequency': fbands},
-                                         dims=['id', 'frequency']
-                                         )
+        for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
+            da_levels = chunk._octaves_levels(fraction=fraction, db=db, band=band)
             if i == 0:
                 da = da_levels
             else:
@@ -751,15 +767,8 @@ def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
             band = [None, self.fs / 2]
 
         da = xarray.DataArray()
-        for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
-            signal.set_band(band, downsample=downsample)
-            freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db)
-            da_sxx = xarray.DataArray([sxx], coords={'id': [i],
-                                                     'start_sample': ('id', [start_sample]),
-                                                     'end_sample': ('id', [end_sample]),
-                                                     'datetime': ('id', [time_bin]),
-                                                     'frequency': freq, 'time': t},
-                                      dims=['id', 'frequency', 'time'])
+        for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
+            da_sxx = chunk.spectrogram(nfft=nfft, fft_overlap=fft_overlap, scaling=scaling, db=db)
             if i == 0:
                 da = da_sxx
             else:
@@ -806,8 +815,8 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff
         spectrum_str = 'band_' + scaling
         ds = xarray.DataArray()
         for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
-            ds_bin = chunk._spectrum(scaling=scaling, binsize=binsize, bin_overlap=bin_overlap,
-                                     nfft=nfft, fft_overlap=fft_overlap, db=db, percentiles=percentiles, band=band)
+            ds_bin = chunk.spectrum(scaling=scaling, nfft=nfft, fft_overlap=fft_overlap, db=db,
+                                     percentiles=percentiles, band=band)
             if i == 0:
                 ds = ds_bin
             else:
diff --git a/pypam/acoustic_survey.py b/pypam/acoustic_survey.py
index c9a0c41..5bc4a01 100644
--- a/pypam/acoustic_survey.py
+++ b/pypam/acoustic_survey.py
@@ -112,19 +112,24 @@ def __init__(self,
         self.file_dependent_attrs = ['file_path', '_start_frame', 'end_to_end_calibration']
         self.current_chunk_id = 0
 
+        self.gridded = gridded_data
+
     def _files(self):
         """
         Iterator that returns AcuFile for each wav file in the folder
         """
         self.current_chunk_id = 0
-        for file_list in tqdm(self.acu_files):
+        for file_i, file_list in tqdm(enumerate(self.acu_files), total=len(self.acu_files)):
             wav_file = file_list[0]
-            print(wav_file)
-            sound_file = self._hydro_file(wav_file, chunk_id_start=self.current_chunk_id)
+            if file_i >= (len(self.acu_files) - 1):
+                next_file = None
+            else:
+                next_file = self.acu_files[file_i+1][0]
+            sound_file = self._hydro_file(wav_file, wav_file_next=next_file, chunk_id_start=self.current_chunk_id)
             if sound_file.is_in_period(self.period) and sound_file.file.frames > 0:
                 yield sound_file
 
-    def _hydro_file(self, wav_file, chunk_id_start=0):
+    def _hydro_file(self, wav_file, wav_file_next=None, chunk_id_start=0):
         """
         Return the AcuFile object from the wav_file
         Parameters
@@ -136,9 +141,11 @@ def _hydro_file(self, wav_file, chunk_id_start=0):
         -------
         Object AcuFile
         """
-        hydro_file = acoustic_file.AcuFile(sfile=wav_file, hydrophone=self.hydrophone, p_ref=self.p_ref,
+        hydro_file = acoustic_file.AcuFile(sfile=wav_file, sfile_next=wav_file_next,
+                                           hydrophone=self.hydrophone, p_ref=self.p_ref,
                                            timezone=self.timezone, channel=self.channel, calibration=self.calibration,
-                                           dc_subtract=self.dc_subtract, chunk_id_start=chunk_id_start)
+                                           dc_subtract=self.dc_subtract, chunk_id_start=chunk_id_start,
+                                           gridded=self.gridded)
         return hydro_file
 
     def _get_metadata_attrs(self):
@@ -248,14 +255,12 @@ def start_end_timestamp(self):
         Return the start and the end timestamps
         """
         wav_file = self.acu_files[0][0]
-        print(wav_file)
 
         sound_file = self._hydro_file(wav_file)
         start_datetime = sound_file.date
 
         file_list = self.acu_files[-1]
         wav_file = file_list[0]
-        print(wav_file)
         sound_file = self._hydro_file(wav_file)
         end_datetime = sound_file.date + datetime.timedelta(seconds=sound_file.total_time())
 
@@ -526,19 +531,6 @@ def __init__(self, folder_path, zipped=False, include_dirs=False, extension='.wa
             extra_extensions = []
         self.extra_extensions = extra_extensions
 
-    def __getitem__(self, n):
-        """
-        Get n wav file
-        """
-        self.__iter__()
-        self.n = n
-        return self.__next__()
-
-    def __iter__(self):
-        """
-        Iteration
-        """
-        self.n = 0
         if not self.zipped:
             if self.recursive:
                 self.files_list = sorted(self.folder_path.glob('**/*%s' % self.extension))
@@ -547,57 +539,47 @@ def __iter__(self):
         else:
             if self.recursive:
                 self.folder_list = sorted(self.folder_path.iterdir())
-                self.zipped_subfolder = AcousticFolder(self.folder_list[self.n],
-                                                       extra_extensions=self.extra_extensions,
-                                                       zipped=self.zipped,
-                                                       include_dirs=self.recursive)
+                self.files_list = []
+                for fol in self.folder_list:
+                    self.zipped_subfolder = AcousticFolder(fol,
+                                                           extra_extensions=self.extra_extensions,
+                                                           zipped=self.zipped,
+                                                           include_dirs=self.recursive)
+                    np.concatenate((self.files_list, self.zipped_subfolder.files_list))
             else:
                 zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True)
                 self.files_list = []
                 total_files_list = zipped_folder.namelist()
-                for f in total_files_list: 
+                for f in total_files_list:
                     extension = f.split(".")[-1]
                     if extension == 'wav':
                         self.files_list.append(f)
-        return self
 
-    def __next__(self):
+    def __getitem__(self, index):
         """
-        Next wav file
+        Get n wav file
         """
-        if self.n < len(self.files_list):
+        if index < len(self.files_list):
             files_list = []
             if self.zipped:
-                if self.recursive:
-                    try:
-                        self.files_list = self.zipped_subfolder.__next__()
-                    except StopIteration:
-                        self.n += 1
-                        self.zipped_subfolder = AcousticFolder(self.folder_list[self.n],
-                                                               extra_extensions=self.extra_extensions,
-                                                               zipped=self.zipped,
-                                                               include_dirs=self.recursive)
-                else:
-                    file_name = self.files_list[self.n]
-                    zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True)
-                    wav_file = zipped_folder.open(file_name)
-                    files_list.append(wav_file)
-                    for extension in self.extra_extensions:
-                        ext_file_name = file_name.parent.joinpath(
-                            file_name.name.replace(self.extension, extension))
-                        files_list.append(zipped_folder.open(ext_file_name))
-                    self.n += 1
-                    return files_list
+                file_name = self.files_list[index]
+                zipped_folder = zipfile.ZipFile(self.folder_path, 'r', allowZip64=True)
+                wav_file = zipped_folder.open(file_name)
+                files_list.append(wav_file)
+                for extension in self.extra_extensions:
+                    ext_file_name = file_name.parent.joinpath(
+                        file_name.name.replace(self.extension, extension))
+                    files_list.append(zipped_folder.open(ext_file_name))
+                return files_list
             else:
-                wav_path = self.files_list[self.n]
+                wav_path = self.files_list[index]
                 files_list.append(wav_path)
                 for extension in self.extra_extensions:
                     files_list.append(pathlib.Path(str(wav_path).replace(self.extension, extension)))
 
-                self.n += 1
                 return files_list
         else:
-            raise StopIteration
+            raise IndexError
 
     def __len__(self):
         if not self.zipped:
diff --git a/pypam/utils.py b/pypam/utils.py
index 639bc0b..285d161 100644
--- a/pypam/utils.py
+++ b/pypam/utils.py
@@ -53,6 +53,7 @@
 from functools import partial
 import zipfile
 import os
+import datetime
 
 try:
     import dask
@@ -913,3 +914,13 @@ def parse_file_name(sfile):
         raise Exception('The filename has to be either a Path object or a string')
 
     return file_name
+
+
+def get_file_datetime(file_name, hydrophone):
+    try:
+        date = hydrophone.get_name_datetime(file_name)
+    except ValueError:
+        date = datetime.datetime.now()
+        print('Filename %s does not match the %s file structure. Setting time to now...' %
+              (file_name, hydrophone.name))
+    return date
diff --git a/tests/test_acoustic_file.py b/tests/test_acoustic_file.py
index eceb1ec..6671ea1 100644
--- a/tests/test_acoustic_file.py
+++ b/tests/test_acoustic_file.py
@@ -4,6 +4,8 @@
 from tests import skip_unless_with_plots
 import pathlib
 import matplotlib.pyplot as plt
+import datetime
+import pandas as pd
 
 plt.rcParams.update(plt.rcParamsDefault)
 
@@ -21,6 +23,8 @@
 class TestAcuFile(unittest.TestCase):
     def setUp(self) -> None:
         self.acu_file = AcuFile('tests/test_data/67416073.210610033655.wav', soundtrap, 1)
+        self.acu_file_gridded = AcuFile('tests/test_data/67416073.210610033655.wav', soundtrap,
+                                        1, gridded=True)
 
     @skip_unless_with_plots()
     def test_plots(self):
@@ -34,6 +38,24 @@ def test_millidecade_bands(self):
         self.acu_file.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=None, bin_overlap=0, db=True,
                                                method='density', band=None)
 
+    def test_millidecade_bands_gridded(self):
+        nfft = 8000
+        binsize = 60
+        ds = self.acu_file_gridded.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=binsize, bin_overlap=0, db=True,
+                                               method='spectrum', band=None)
+        assert ds.datetime.dt.time.values[0].second == 0
+
+    def test_non_zero_bin_overlap(self):
+        nfft = 8000
+        binsize = 60
+        bin_overlap = 0.5
+        ds = self.acu_file_gridded.hybrid_millidecade_bands(nfft, fft_overlap=0.5, binsize=binsize,
+                                                            bin_overlap=bin_overlap, db=True, method='spectrum',
+                                                            band=None)
+        assert (pd.to_timedelta(ds.datetime.values[1] - ds.datetime.values[0]) ==
+                pd.to_timedelta(datetime.timedelta(seconds=binsize * bin_overlap)))
+        assert (ds.start_sample.values[1] - ds.start_sample.values[0]) == (binsize * bin_overlap) * 8000
+
     def test_update_freq_cal(self):
         ds_psd = self.acu_file.psd()
         ds_psd_updated = self.acu_file.update_freq_cal(ds=ds_psd, data_var='band_density')
diff --git a/tests/test_acoustic_survey.py b/tests/test_acoustic_survey.py
index c4474d2..703875d 100644
--- a/tests/test_acoustic_survey.py
+++ b/tests/test_acoustic_survey.py
@@ -37,7 +37,7 @@
 third_octaves = None
 dc_subtract = True
 
-include_dirs = True
+include_dirs = False
 zipped_files = False
 
 # Don't plot if it is running on CI
@@ -51,6 +51,8 @@ def setUp(self) -> None:
         self.asa_flac = ASA(hydrophone=soundtrap, folder_path=folder_path.joinpath('flac_data'), binsize=binsize,
                             nfft=nfft, timezone='UTC', include_dirs=include_dirs, zipped=zipped_files,
                             dc_subtract=dc_subtract, extension='.flac')
+        self.asa_gridded = ASA(hydrophone=soundtrap, folder_path=folder_path, binsize=binsize, nfft=nfft, timezone='UTC',
+                       include_dirs=include_dirs, zipped=zipped_files, dc_subtract=dc_subtract, gridded_data=True)
 
     def test_empty_directory(self):
         with self.assertRaises(ValueError) as context:
@@ -127,6 +129,12 @@ def test_millidecade_bands(self):
             plt.legend()
             plt.show()
 
+    def test_millidecade_bands_gridded(self):
+        # Set the frequency resolution to 1 Hz
+        # Check for the complete broadband and a band filtered in the lower frequencies
+        milli_psd = self.asa_gridded.hybrid_millidecade_bands(db=True, method='density', band=[0, 4000], percentiles=None)
+        assert milli_psd.datetime.dt.time.values[0].second == 0
+
     def test_spectrogram(self):
         self.asa.apply_to_all('spectrogram')