Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/gridded data #133

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
fb6e47a
first changes to add an acoustic chunk so data can be gridded
cparcerisas Sep 3, 2024
b886d1a
added option of having gridded data. Fixes #127 and #79
cparcerisas Sep 3, 2024
05221d9
changed the millidecades tests so it is also gridded
cparcerisas Sep 3, 2024
80f97e6
changed the test for the dataset to not be gridded as otherwise calib…
cparcerisas Sep 3, 2024
8452167
changed function _octaves_levels in acoustic_file to call octaves lev…
cparcerisas Sep 3, 2024
52e51ce
first approach to save daily files in evolution_frequency and apply_m…
cparcerisas Sep 4, 2024
4802f20
removed yield function and only made save_daily an option when a path…
cparcerisas Sep 4, 2024
03422f6
first changes to add an acoustic chunk so data can be gridded
cparcerisas Sep 3, 2024
b2b7a96
added option of having gridded data. Fixes #127 and #79
cparcerisas Sep 3, 2024
800ce25
changed the millidecades tests so it is also gridded
cparcerisas Sep 3, 2024
9f11118
changed the test for the dataset to not be gridded as otherwise calib…
cparcerisas Sep 3, 2024
e3e3341
changed function _octaves_levels in acoustic_file to call octaves lev…
cparcerisas Sep 3, 2024
c7296a7
Merge branch 'feature/gridded_data' of https://github.com/lifewatch/p…
cparcerisas Sep 13, 2024
ae71739
Merge branch 'main' into feature/gridded_data
cparcerisas Sep 13, 2024
f542d28
Merge pull request #139 from lifewatch/feature/save_daily_files
cparcerisas Sep 13, 2024
7db44f8
fixed error when plotting a spectrogram from a chunk
cparcerisas Nov 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
629 changes: 629 additions & 0 deletions pypam/acoustic_chunk.py

Large diffs are not rendered by default.

195 changes: 78 additions & 117 deletions pypam/acoustic_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
__status__ = "Development"

import datetime
import operator
import os
import pathlib
import zipfile

Expand All @@ -16,12 +14,11 @@
import seaborn as sns
import soundfile as sf
import xarray
from tqdm.auto import tqdm

from pypam import plots
from pypam import signal as sig
from pypam import utils
from pypam import units as output_units
from pypam import acoustic_chunk

pd.plotting.register_matplotlib_converters()
plt.rcParams.update({'pcolor.shading': 'auto'})
Expand Down Expand Up @@ -52,32 +49,31 @@ class AcuFile:
Set to True to subtract the dc noise (root mean squared value
"""

def __init__(self, sfile, hydrophone, p_ref, timezone='UTC', channel=0, calibration=None, dc_subtract=False):
def __init__(self, sfile, hydrophone, p_ref, sfile_next=None, gridded=True, timezone='UTC', channel=0,
calibration=None, dc_subtract=False, chunk_id_start=0):
# Save hydrophone model
self.hydrophone = hydrophone

# Get the date from the name
if type(sfile) == str:
file_name = os.path.split(sfile)[-1]
elif issubclass(sfile.__class__, pathlib.Path):
file_name = sfile.name
elif issubclass(sfile.__class__, zipfile.ZipExtFile):
file_name = sfile.name
else:
raise Exception('The filename has to be either a Path object or a string')

try:
self.date = hydrophone.get_name_datetime(file_name)
except ValueError:
self.date = datetime.datetime.now()
print('Filename %s does not match the %s file structure. Setting time to now...' %
(file_name, self.hydrophone.name))
file_name = utils.parse_file_name(sfile)
self.date = utils.get_file_datetime(file_name, hydrophone)

# Signal
self.file_path = sfile
self.file = sf.SoundFile(self.file_path, 'r')
self.fs = self.file.samplerate

# Check if next file is continuous
self.date_end = self.date + datetime.timedelta(seconds=self.total_time())
self.file_path_next = None
if sfile_next is not None:
next_file_name = utils.parse_file_name(sfile_next)
next_date = utils.get_file_datetime(next_file_name, self.hydrophone)

if (next_date - self.date_end) < datetime.timedelta(seconds=1):
self.file_path_next = sfile_next
self.file_next = sf.SoundFile(self.file_path_next, 'r')

# Reference pressure in upa
self.p_ref = p_ref

Expand All @@ -94,19 +90,29 @@ def __init__(self, sfile, hydrophone, p_ref, timezone='UTC', channel=0, calibrat
self.wav = None
self.time = None

# Set a starting frame for the file
if calibration is None:
self._start_frame = 0
elif calibration < 0:
self._start_frame = self.hydrophone.calibrate(self.file_path)
if self._start_frame is None:
self._start_frame = 0
else:
self._start_frame = int(calibration * self.fs)
if gridded and calibration is not None:
raise AttributeError('The options gridded and calibration are not compatible')
self.calibration = calibration
self.gridded = gridded

if gridded:
self._start_frame = int((pd.to_datetime(self.date).ceil('min') - self.date).total_seconds() * self.fs)
else:
# Set a starting frame for the file
if calibration is None:
self._start_frame = 0
elif calibration < 0:
self._start_frame = self.hydrophone.calibrate(self.file_path)
if self._start_frame is None:
self._start_frame = 0
else:
self._start_frame = int(calibration * self.fs)

self.dc_subtract = dc_subtract

# Start to count chunk id's
self.chunk_id_start = chunk_id_start

def __getattr__(self, name):
"""
Specific methods to make it easier to access attributes
Expand Down Expand Up @@ -142,26 +148,41 @@ def _bins(self, binsize=None, bin_overlap=0):
else:
blocksize = self.samples(binsize)
noverlap = int(bin_overlap * blocksize)
n_blocks = self._n_blocks(blocksize, noverlap=noverlap)
if self.file_path_next is not None:
is_there_next = True
else:
is_there_next = False
n_blocks = self._n_blocks(blocksize, noverlap=noverlap, add_last=is_there_next)
time_array, _, _ = self._time_array(binsize, bin_overlap=bin_overlap)
for i, block in tqdm(enumerate(sf.blocks(self.file_path, blocksize=blocksize, start=self._start_frame,
overlap=bin_overlap, always_2d=True, fill_value=0.0)),
total=n_blocks, leave=False, position=0):
# Select the desired channel
block = block[:, self.channel]
chunk_id = self.chunk_id_start
file_end = self.file_path
for i in np.arange(n_blocks):
time_bin = time_array[i]
# Read the signal and prepare it for analysis
signal_upa = self.wav2upa(wav=block)
signal = sig.Signal(signal=signal_upa, fs=self.fs, channel=self.channel)
if self.dc_subtract:
signal.remove_dc()
start_sample = i * blocksize + self._start_frame
end_sample = start_sample + len(signal_upa)
yield i, time_bin, signal, start_sample, end_sample
start_sample = i * (blocksize - noverlap) + self._start_frame
if i == n_blocks - 1:
if is_there_next:
end_sample = blocksize - (self.file.frames - start_sample)
file_end = self.file_path_next
else:
end_sample = self.file.frames
else:
end_sample = start_sample + blocksize
chunk = acoustic_chunk.AcuChunk(sfile_start=self.file_path, sfile_end=file_end,
start_frame=start_sample, end_frame=end_sample, hydrophone=self.hydrophone,
p_ref=self.p_ref, chunk_id=chunk_id, chunk_file_id=i, time_bin=time_bin)

chunk_id += 1

yield i, chunk

self.file.seek(0)

def _n_blocks(self, blocksize, noverlap):
return int(np.floor(self.file.frames - self._start_frame) / (blocksize - noverlap))
def _n_blocks(self, blocksize, noverlap, add_last):
if add_last:
n_blocks = int(np.ceil((self.file.frames - self._start_frame) / (blocksize - noverlap)))
else:
n_blocks = int(np.floor((self.file.frames - self._start_frame) / (blocksize - noverlap)))
return n_blocks

def samples(self, bintime):
"""
Expand Down Expand Up @@ -319,7 +340,7 @@ def _time_array(self, binsize=None, bin_overlap=0):
blocks_samples = np.arange(start=self._start_frame, stop=self.file.frames - 1, step=blocksize)
end_samples = blocks_samples + blocksize
incr = pd.to_timedelta(blocks_samples / self.fs, unit='seconds')
self.time = self.date + datetime.timedelta(seconds=self._start_frame / self.fs) + incr
self.time = self.date + incr
if self.timezone != 'UTC':
self.time = pd.to_datetime(self.time).tz_localize(self.timezone).tz_convert('UTC').tz_convert(None)
return self.time, blocks_samples.astype(int), end_samples.astype(int)
Expand Down Expand Up @@ -433,6 +454,8 @@ def _get_metadata_attrs(self):
d = d.__dict__[sub_k]
if isinstance(d, pathlib.Path):
d = str(d)
if isinstance(d, bool):
d = int(d)
if d is None:
d = 0
metadata_attrs[k.replace('.', '_')] = d
Expand Down Expand Up @@ -478,44 +501,13 @@ def _apply_multiple(self, method_list, binsize=None, band_list=None, bin_overlap
sorted_bands = [band] + sorted_bands
else:
sorted_bands = sorted_bands + [band]
log = True
if 'db' in kwargs.keys():
if not kwargs['db']:
log = False
# Define an empty dataset
ds = xarray.Dataset()
for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
ds_bands = xarray.Dataset()
for j, band in enumerate(sorted_bands):
signal.set_band(band, downsample=downsample)
methods_output = xarray.Dataset()
for method_name in method_list:
f = operator.methodcaller(method_name, **kwargs)
try:
output = f(signal)
except Exception as e:
print('There was an error in band %s, feature %s. Setting to None. '
'Error: %s' % (band, method_name, e))
output = None

units_attrs = output_units.get_units_attrs(method_name=method_name, log=log,
p_ref=self.p_ref, **kwargs)
methods_output[method_name] = xarray.DataArray([[output]], coords={'id': [i],
'datetime': ('id', [time_bin]),
'start_sample': ('id',
[start_sample]),
'end_sample': (
'id', [end_sample]),
'band': [j],
'low_freq': ('band', [band[0]]),
'high_freq': (
'band', [band[1]])},
dims=['id', 'band'],
attrs=units_attrs)
if j == 0:
ds_bands = methods_output
else:
ds_bands = xarray.concat((ds_bands, methods_output), 'band')
for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
ds_bands = chunk.apply_multiple(method_list=method_list, band_list=None, **kwargs)
if i == 0:
ds = ds_bands
else:
Expand Down Expand Up @@ -686,15 +678,8 @@ def _octaves_levels(self, fraction=1, binsize=None, bin_overlap=0, db=True, band
# Create an empty dataset
da = xarray.DataArray()
units_attrs = output_units.get_units_attrs(method_name='octave_levels', p_ref=self.p_ref, log=db)
for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
signal.set_band(band, downsample=downsample)
fbands, levels = signal.octave_levels(db, fraction)
da_levels = xarray.DataArray(data=[levels],
coords={'id': [i], 'start_sample': ('id', [start_sample]),
'end_sample': ('id', [end_sample]), 'datetime': ('id', [time_bin]),
'frequency': fbands},
dims=['id', 'frequency']
)
for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
da_levels = chunk.octaves_levels(fraction=fraction, db=db, band=band)
if i == 0:
da = da_levels
else:
Expand Down Expand Up @@ -784,15 +769,8 @@ def spectrogram(self, binsize=None, bin_overlap=0, nfft=512, fft_overlap=0.5,
band = [None, self.fs / 2]

da = xarray.DataArray()
for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
signal.set_band(band, downsample=downsample)
freq, t, sxx = signal.spectrogram(nfft=nfft, overlap=fft_overlap, scaling=scaling, db=db)
da_sxx = xarray.DataArray([sxx], coords={'id': [i],
'start_sample': ('id', [start_sample]),
'end_sample': ('id', [end_sample]),
'datetime': ('id', [time_bin]),
'frequency': freq, 'time': t},
dims=['id', 'frequency', 'time'])
for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
da_sxx = chunk.spectrogram(nfft=nfft, fft_overlap=fft_overlap, scaling=scaling, db=db)
if i == 0:
da = da_sxx
else:
Expand Down Expand Up @@ -838,26 +816,9 @@ def _spectrum(self, scaling='density', binsize=None, bin_overlap=0, nfft=512, ff

spectrum_str = 'band_' + scaling
ds = xarray.DataArray()
for i, time_bin, signal, start_sample, end_sample in self._bins(binsize, bin_overlap=bin_overlap):
signal.set_band(band, downsample=downsample)
fbands, spectra, percentiles_val = signal.spectrum(scaling=scaling, nfft=nfft, db=db,
percentiles=percentiles, overlap=fft_overlap)

spectra_da = xarray.DataArray([spectra],
coords={'id': [i],
'datetime': ('id', [time_bin]),
'frequency': fbands,
'start_sample': ('id',
[start_sample]),
'end_sample': ('id', [end_sample]),
},
dims=['id', 'frequency'])
percentiles_da = xarray.DataArray([percentiles_val],
coords={'id': [i], 'datetime': ('id', [time_bin]),
'percentiles': percentiles},
dims=['id', 'percentiles'])

ds_bin = xarray.Dataset({spectrum_str: spectra_da, 'value_percentiles': percentiles_da})
for i, chunk in self._bins(binsize, bin_overlap=bin_overlap):
ds_bin = chunk.spectrum(scaling=scaling, nfft=nfft, fft_overlap=fft_overlap, db=db,
percentiles=percentiles, band=band)
if i == 0:
ds = ds_bin
else:
Expand Down
Loading
Loading