From bab063508dc466e3556b24b7445f687bfd93bf54 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 8 Jan 2024 16:08:05 -0600 Subject: [PATCH 01/42] convert river.graphics to xarray --- mhkit/river/graphics.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index 46b621f88..3f40e8673 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -1,5 +1,5 @@ import numpy as np -import pandas as pd +import xarray as xr import matplotlib.pyplot as plt @@ -69,9 +69,9 @@ def plot_flow_duration_curve(D, F, label=None, ax=None): """ # Sort by F - temp = pd.DataFrame({'D': D, 'F': F}) - temp.sort_values('F', ascending=False, kind='mergesort', inplace=True) - + temp = xr.Dataset(data_vars = {'D': D, 'F': F}) + temp.sortby('F', ascending=False) + ax = _xy_plot(temp['D'], temp['F'], fmt='-', label=label, xlabel='Discharge [$m^3/s$]', ylabel='Exceedance Probability', ax=ax) plt.xscale('log') @@ -104,9 +104,9 @@ def plot_velocity_duration_curve(V, F, label=None, ax=None): """ # Sort by F - temp = pd.DataFrame({'V': V, 'F': F}) - temp.sort_values('F', ascending=False, kind='mergesort', inplace=True) - + temp = xr.Dataset(data_vars = {'V': V, 'F': F}) + temp.sortby('F', ascending=False) + ax = _xy_plot(temp['V'], temp['F'], fmt='-', label=label, xlabel='Velocity [$m/s$]', ylabel='Exceedance Probability', ax=ax) @@ -138,16 +138,16 @@ def plot_power_duration_curve(P, F, label=None, ax=None): """ # Sort by F - temp = pd.DataFrame({'P': P, 'F': F}) - temp.sort_values('F', ascending=False, kind='mergesort', inplace=True) - + temp = xr.Dataset(data_vars = {'P': P, 'F': F}) + temp.sortby('F', ascending=False) + ax = _xy_plot(temp['P'], temp['F'], fmt='-', label=label, xlabel='Power [W]', ylabel='Exceedance Probability', ax=ax) return ax -def plot_discharge_timeseries(Q, label=None, ax=None): +def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None): """ Plots discharge time-series @@ -156,6 +156,10 @@ def plot_discharge_timeseries(Q, label=None, ax=None): Q: array-like Discharge [m3/s] indexed by time + time_dimension: string (optional) + Name of the xarray dimension corresponding to time. If not supplied, + defaults to the first dimension. + label: string Label to use in the legend @@ -168,8 +172,11 @@ def plot_discharge_timeseries(Q, label=None, ax=None): ax : matplotlib pyplot axes """ + if time_dimension == "": + time_dimension = list(Q.coords)[0] + ax = _xy_plot( - Q.index, + Q.coords[time_dimension].values, Q, fmt='-', label=label, @@ -187,10 +194,10 @@ def plot_discharge_vs_velocity(D, V, polynomial_coeff=None, label=None, ax=None) Parameters ------------ - D : pandas Series + D : array-like Discharge [m/s] indexed by time - V : pandas Series + V : array-like Velocity [m/s] indexed by time polynomial_coeff: numpy polynomial @@ -224,10 +231,10 @@ def plot_velocity_vs_power(V, P, polynomial_coeff=None, label=None, ax=None): Parameters ------------ - V : pandas Series + V : array-like Velocity [m/s] indexed by time - P: pandas Series + P: array-like Power [W] indexed by time polynomial_coeff: numpy polynomial From b260ebdfd87f9ccab897b74d3b57b7bd3aadebf1 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 11 Jan 2024 13:51:06 -0600 Subject: [PATCH 02/42] convert mhkit.river.resource to xarray --- mhkit/river/resource.py | 131 ++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 47 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index fcf2e0d07..acd3aac21 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -1,7 +1,9 @@ import pandas as pd +import xarray as xr import numpy as np from scipy.stats import linregress as _linregress from scipy.stats import rv_histogram as _rv_histogram +from mhkit.utils import _convert_to_dataset def Froude_number(v, h, g=9.80665): @@ -36,33 +38,48 @@ def Froude_number(v, h, g=9.80665): return Fr -def exceedance_probability(D): +def exceedance_probability(D, dimension="", to_pandas=True): """ Calculates the exceedance probability Parameters ---------- - D : pandas Series - Data indexed by time [datetime or s]. - + D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset + Data indexed by time [datetime or s]. + + dimension: string (optional) + Name of the relevant xarray dimension. If not supplied, + defaults to the first dimension. Does not affect pandas input. + + to_pandas: bool (optional) + Flag to output pandas instead of xarray. Default = True. + Returns ------- - F : pandas DataFrame + F : pandas DataFrame or xarray Dataset Exceedance probability [unitless] indexed by time [datetime or s] """ - # dataframe allowed for matlab - if not isinstance(D, (pd.DataFrame, pd.Series)): - raise TypeError(f'D must be of type pd.Series or pd.DataFrame. Got: {type(D)}') - - if isinstance(D, pd.DataFrame) and len(D.columns) == 1: # for matlab - D = D.squeeze().copy() + if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): + raise TypeError( + f'D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}') + if not isinstance(dimension, str): + raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + if not isinstance(to_pandas, bool): + raise TypeError(f'to_pandas must be of type bool. Got: {type(to_pandas)}') + + if dimension == "": + dimension = list(D.coords)[0] + + D = _convert_to_dataset(D) # Calculate exceedence probability (F) - rank = D.rank(method='max', ascending=False) - F = 100* (rank / (len(D)+1) ) - - F = F.to_frame('F') # for matlab - + rank = D.rank() + rank = len(D[dimension]) - rank + 1 # convert to descending rank + F = 100 * rank / (len(D[dimension])+1) + + if to_pandas: + F = F.to_pandas() + return F @@ -113,50 +130,62 @@ def polynomial_fit(x, y, n): return polynomial_coefficients, R2 -def discharge_to_velocity(D, polynomial_coefficients): +def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=True): """ Calculates velocity given discharge data and the relationship between discharge and velocity at an individual turbine Parameters ------------ - D : pandas Series + D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset Discharge data [m3/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial List of polynomial coefficients that discribe the relationship between discharge and velocity at an individual turbine + dimension: string (optional) + Name of the relevant xarray dimension. If not supplied, + defaults to the first dimension. Does not affect pandas input. + to_pandas: bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------------ - V: pandas DataFrame + V: pandas DataFrame or xarray Dataset Velocity [m/s] indexed by time [datetime or s] """ - # dataframe allowed for matlab - if not isinstance(D, (pd.DataFrame, pd.Series)): - raise TypeError(f'D must be of type pd.Series. Got: {type(D)}') + if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): + raise TypeError( + f'D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}') if not isinstance(polynomial_coefficients, np.poly1d): raise TypeError(f'polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}') + if not isinstance(dimension, str): + raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + if not isinstance(to_pandas, bool): + raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - if isinstance(D, pd.DataFrame) and len(D.columns) == 1: # for matlab - D = D.squeeze().copy() + if dimension == "": + dimension = list(D.coords)[0] + + D = _convert_to_dataset(D) # Calculate velocity using polynomial vals = polynomial_coefficients(D) - V = pd.Series(vals, index=D.index) + V = xr.Dataset(vals) - V = V.to_frame('V') # for matlab + if to_pandas: + V = V.to_pandas() return V -def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out): +def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out, dimension="", to_pandas=True): """ Calculates power given velocity data and the relationship between velocity and power from an individual turbine Parameters ---------- - V : pandas Series + V : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset Velocity [m/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial List of polynomial coefficients that discribe the relationship between @@ -165,36 +194,45 @@ def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out): Velocity values below cut_in are not used to compute P cut_out: int/float Velocity values above cut_out are not used to compute P + dimension: string (optional) + Name of the relevant xarray dimension. If not supplied, + defaults to the first dimension. Does not affect pandas input. + to_pandas: bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - P : pandas DataFrame + P : pandas DataFrame or xarray Dataset Power [W] indexed by time [datetime or s] """ - # dataframe allowed for matlab - if not isinstance(V, (pd.DataFrame, pd.Series)): - raise TypeError(f'V must be of type pd.Series or pd.DataFrame. Got: {type(V)}') + if not isinstance(V, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): + raise TypeError( + f'V must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(V)}') if not isinstance(polynomial_coefficients, np.poly1d): raise TypeError(f'polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}') if not isinstance(cut_in, (int,float)): raise TypeError(f'cut_in must be of type int or float. Got: {type(cut_in)}') if not isinstance(cut_out, (int,float)): raise TypeError(f'cut_out must be of type int or float. Got: {type(cut_out)}') + if not isinstance(dimension, str): + raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + if not isinstance(to_pandas, bool): + raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - if isinstance(V, pd.DataFrame) and len(V.columns) == 1: - V = V.squeeze().copy() - - # Calculate power using tranfer function and FDC + V = _convert_to_dataset(V) + + # Calculate power using transfer function and FDC vals = polynomial_coefficients(V) - + # Power for velocity values outside lower and upper bounds Turbine produces 0 power vals[V < cut_in] = 0. vals[V > cut_out] = 0. - P = pd.Series(vals, index=V.index) - - P = P.to_frame('P') # for matlab - + P = xr.Dataset(vals) + + if to_pandas: + P = P.to_pandas() + return P @@ -205,7 +243,7 @@ def energy_produced(P, seconds): Parameters ---------- - P : pandas Series + P : pandas Series or xarray DataArray Power [W] indexed by time [datetime or s] seconds: int or float Seconds in the time period of interest @@ -215,14 +253,13 @@ def energy_produced(P, seconds): E : float Energy [J] produced in the given time frame """ - # dataframe allowed for matlab - if not isinstance(P, (pd.DataFrame, pd.Series)): - raise TypeError(f'P must be of type pd.Series or pd.DataFrame. Got: {type(P)}') + if not isinstance(P, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): + raise TypeError( + f'P must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(P)}') if not isinstance(seconds, (int, float)): raise TypeError(f'seconds must be of type int or float. Got: {type(seconds)}') - if isinstance(P, pd.DataFrame) and len(P.columns) == 1: # for matlab - P = P.squeeze().copy() + P = _convert_to_dataset(P) # Calculate Histogram of power H, edges = np.histogram(P, 100 ) From 329aae97e0b1a8a268a776127fca4ecc773f6309 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 11 Jan 2024 13:51:28 -0600 Subject: [PATCH 03/42] add pandas to xarray helper function --- mhkit/utils/__init__.py | 1 + mhkit/utils/data_utils.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 mhkit/utils/data_utils.py diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py index 074232541..4b3a6a4ee 100644 --- a/mhkit/utils/__init__.py +++ b/mhkit/utils/__init__.py @@ -2,5 +2,6 @@ from .stat_utils import get_statistics, vector_statistics, unwrap_vector, magnitude_phase, unorm from .cache import handle_caching, clear_cache from .upcrossing import upcrossing, peaks, troughs, heights, periods, custom +from .data_utils import _convert_to_dataset _matlab = False # Private variable indicating if mhkit is run through matlab diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py new file mode 100644 index 000000000..f97032a0a --- /dev/null +++ b/mhkit/utils/data_utils.py @@ -0,0 +1,15 @@ +import pandas as pd +import xarray as xr + +def _convert_to_dataset(data, name='data'): + # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or + # xr.Dataset and converts it to xr.Dataset + if isinstance(data, (pd.DataFrame, pd.Series)): + data = data.to_xarray() + + if isinstance(data, xr.DataArray): + if data.name is None: + data.name = name # xr.DataArray.to_dataset() breaks if the data variable is unnamed + data = data.to_dataset() + + return data From edc9b7466cec3645176f048ba0636517f15da7a3 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 11 Jan 2024 14:42:58 -0600 Subject: [PATCH 04/42] mhkit.river.io.usgs to xarray --- mhkit/river/io/usgs.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index 4a69e2dd2..be281a9fa 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -6,7 +6,7 @@ from mhkit.utils.cache import handle_caching -def _read_usgs_json(text): +def _read_usgs_json(text, to_pandas=True): data = pd.DataFrame() for i in range(len(text['value']['timeSeries'])): @@ -24,10 +24,13 @@ def _read_usgs_json(text): except: pass + if not to_pandas: + data = data.to_xarray() + return data -def read_usgs_file(file_name): +def read_usgs_file(file_name, to_pandas=True): """ Reads a USGS JSON data file (from https://waterdata.usgs.gov/nwis) @@ -35,17 +38,19 @@ def read_usgs_file(file_name): ---------- file_name : str Name of USGS JSON data file + to_pandas: bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - data : pandas DataFrame + data : pandas DataFrame or xarray Dataset Data indexed by datetime with columns named according to the parameter's variable description """ with open(file_name) as json_file: text = json.load(json_file) - data = _read_usgs_json(text) + data = _read_usgs_json(text, to_pandas) return data @@ -58,7 +63,8 @@ def request_usgs_data( data_type='Daily', proxy=None, write_json=None, - clear_cache=False): + clear_cache=False, + to_pandas=True): """ Loads USGS data directly from https://waterdata.usgs.gov/nwis using a GET request @@ -84,11 +90,13 @@ def request_usgs_data( write_json : str or None Name of json file to write data clear_cache : bool - If True, the cache for this specific request will be cleared. + If True, the cache for this specific request will be cleared. + to_pandas: bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - data : pandas DataFrame + data : pandas DataFrame or xarray Dataset Data indexed by datetime with columns named according to the parameter's variable description """ @@ -127,7 +135,7 @@ def request_usgs_data( response = requests.get(url=data_url+api_query, proxies=proxy) text = json.loads(response.text) - data = _read_usgs_json(text) + data = _read_usgs_json(text, to_pandas) # After making the API request and processing the response, write the # response to a cache file From a724678dbe90e6c0996e5ab306da4655c2a20856 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 11 Jan 2024 15:46:05 -0600 Subject: [PATCH 05/42] convert mhkit.river.io.usgs to xarray --- mhkit/river/io/usgs.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index be281a9fa..eb71a0f5a 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -2,30 +2,33 @@ import json import requests import shutil -import pandas as pd +import numpy as np +import xarray as xr from mhkit.utils.cache import handle_caching def _read_usgs_json(text, to_pandas=True): - data = pd.DataFrame() + data = xr.Dataset for i in range(len(text['value']['timeSeries'])): try: site_name = text['value']['timeSeries'][i]['variable']['variableDescription'] - site_data = pd.DataFrame( - text['value']['timeSeries'][i]['values'][0]['value']) - site_data.set_index('dateTime', drop=True, inplace=True) - site_data.index = pd.to_datetime(site_data.index, utc=True) - site_data.rename(columns={'value': site_name}, inplace=True) - site_data[site_name] = pd.to_numeric(site_data[site_name]) - site_data.index.name = None - del site_data['qualifiers'] + tmp = text['value']['timeSeries'][i]['values'][0]['value'] + v = [] + t = [] + for i in range(0,len(tmp)): + v.append(tmp[i]['value']) + t.append(tmp[i]['dateTime']) + v = np.asarray(v).astype(float) + t = np.asarray(t).astype(np.datetime64) + site_data = xr.Dataset(data_vars = {site_name: (['dateTime'],v)}, + coords = {'dateTime': t}) data = data.combine_first(site_data) except: pass - if not to_pandas: - data = data.to_xarray() + if to_pandas: + data = data.to_pandas() return data From f72e8c4c648bb56ac3c3d8f484c2961841045abd Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 25 Jan 2024 11:57:13 -0600 Subject: [PATCH 06/42] convert river.io.d3d to xarray --- mhkit/river/io/d3d.py | 237 +++++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 116 deletions(-) diff --git a/mhkit/river/io/d3d.py b/mhkit/river/io/d3d.py index 46ba87cea..4385f16db 100644 --- a/mhkit/river/io/d3d.py +++ b/mhkit/river/io/d3d.py @@ -131,13 +131,13 @@ def _convert_time(data, time_index=None, seconds_run=None): except: idx = (np.abs(times - seconds_run)).argmin() QoI = idx - warnings.warn(f'Warning: seconds_run not found. Closest time stamp' - + 'found {times[idx]}', stacklevel=2) + warnings.warn('Warning: seconds_run not found. Closest time stamp' + + f'found {times[idx]}', stacklevel=2) return QoI -def get_layer_data(data, variable, layer_index=-1, time_index=-1): +def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True): ''' Get variable data from the NetCDF4 object at a specified layer and timestep. If the data is 2D the layer_index is ignored. @@ -156,10 +156,13 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1): time_index: int An integer to pull the time index from the dataset. 0 being closest to the start time. Default is last time index, found with input -1. + to_pandas : bool (optional) + Flag to output pandas instead of xarray. Default = True. + Returns ------- - layer_data: DataFrame - DataFrame with columns of "x", "y", "waterdepth", and "waterlevel" location + layer_data: pd.DataFrame or xr.Dataset + Dataset with columns of "x", "y", "waterdepth", and "waterlevel" location of the specified layer, variable values "v", and the "time" the simulation has run. The waterdepth is measured from the water surface and the "waterlevel" is the water level diffrencein meters from the zero water level. @@ -177,6 +180,10 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1): if variable not in data.variables.keys(): raise ValueError('variable not recognized') + if not isinstance(to_pandas, bool): + raise TypeError( + f'to_pandas must be of type bool. Got: {type(to_pandas)}') + coords = str(data.variables[variable].coordinates).split() var = data.variables[variable][:] max_time_index = data['time'].shape[0] - 1 # to account for zero index @@ -286,20 +293,29 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1): time = np.ma.getdata( data.variables['time'][time_index], False)*np.ones(len(x)) - layer = np.array([[x_i, y_i, d_i, w_i, v_i, t_i] for x_i, y_i, d_i, w_i, v_i, t_i in - zip(x, y, waterdepth, waterlevel, v, time)]) - layer_data = pd.DataFrame( - layer, columns=['x', 'y', 'waterdepth', 'waterlevel', 'v', 'time']) + index = np.arange(0,len(time)) + layer_data = xr.Dataset( + data_vars = {'x': (['index'], x), + 'y': (['index'], y), + 'waterdepth': (['index'], waterdepth), + 'waterlevel': (['index'], waterlevel), + 'v': (['index'], v), + 'time': (['index'], time)}, + coords = {'index': index} + ) + + if to_pandas: + layer_data = layer_data.to_pandas() return layer_data -def create_points(x, y, waterdepth): +def create_points(x, y, waterdepth, to_pandas=True): ''' - Generate a DataFrame of points from combinations of input coordinates. + Generate a Dataset of points from combinations of input coordinates. This function accepts three inputs and combines them to generate a - DataFrame of points. The inputs can be: + Dataset of points. The inputs can be: - 3 points - 2 points and 1 array - 1 point and 2 arrays @@ -317,11 +333,13 @@ def create_points(x, y, waterdepth): Y values (latitude) for the points. waterdepth : int, float, array-like Waterdepth values for the points. + to_pandas : bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - pd.DataFrame - A DataFrame with columns 'x', 'y', and 'waterdepth' representing the generated points. + points : xr.Dataset or pd.DataFrame + A Dataset with columns 'x', 'y', and 'waterdepth' representing the generated points. Example ------- @@ -364,91 +382,54 @@ def create_points(x, y, waterdepth): # Check data type if not isinstance(value, (int, float, np.ndarray, pd.Series, xr.DataArray)): - raise TypeError(f"{name} must be an int, float, array, or series") + raise TypeError(f"{name} must be an int, float, np.ndarray, pd.Series, or xr.DataArray. Got: {type(value)}") # Check for empty arrays if isinstance(value, (np.ndarray, pd.Series, xr.DataArray)) and len(value) == 0: raise ValueError(f"{name} should not be an empty array") - # Directions Initialization - directions = {} - for name, value in inputs.items(): - # Check if the value is a single integer or float, if so wrap it in an array - if isinstance(value, (int, float)): - value_array = np.array([value]) - else: - value_array = value - - # Determine the type based on the length - direction_type = 'point' if len(value_array) == 1 else 'array' - - # Assign to the directions dictionary - directions[name] = {'values': value_array, 'type': direction_type} - - types = [direction['type'] for direction in directions.values()] - num_points = types.count('point') - - if num_points >= 2: - max_len_name = max(directions, key=lambda name: len( - directions[name]['values'])) - for name, direction in directions.items(): - if direction['type'] == 'point': - direction['values'] = np.full( - len(directions[max_len_name]['values']), direction['values'][0]) - - combined_values = [direction['values'] - for direction in directions.values()] - points = pd.DataFrame(np.column_stack( - combined_values), columns=inputs.keys()) - - elif num_points == 1: - point_name = None - array_names = [] - - for name, direction in directions.items(): - if direction['type'] == 'point': - point_name = name - elif direction['type'] == 'array': - array_names.append(name) - - if point_name is None: - raise ValueError("No point-type direction found!") - - if len(array_names) != 2: - raise ValueError("Expected two array type directions") + if not isinstance(to_pandas, bool): + raise TypeError( + f'to_pandas must be of type bool. Got: {type(to_pandas)}') - mesh_x, mesh_y = np.meshgrid( - directions[array_names[0]]['values'], directions[array_names[1]]['values']) - mesh_depth = np.ones_like(mesh_x) * directions[point_name]['values'][0] + x_array_like = ~isinstance(x, (int, float)) + y_array_like = ~isinstance(y, (int, float)) + waterdepth_array_like = ~isinstance(waterdepth, (int, float)) - data = list(zip(mesh_x.ravel(), mesh_y.ravel(), mesh_depth.ravel())) - points = pd.DataFrame(data, columns=['x', 'y', 'waterdepth']) + if x_array_like and y_array_like and waterdepth_array_like: + # if all inputs are arrays, grid the coordinate and waterdepth + y_grid, waterdepth_grid = np.meshgrid(y, waterdepth) + y_grid = y_grid.ravel() + waterdepth_grid = waterdepth_grid.ravel() + _,mask = np.where(y==y_grid) + x_grid = x[mask] else: - x_values = directions['x']['values'] - y_values = directions['y']['values'] - depth_values = directions['waterdepth']['values'] - - if len(x_values) != len(y_values): - raise ValueError( - 'X and Y must be the same length if you are inputting three arrays') - - x_repeated = np.tile(x_values, len(depth_values)) - y_repeated = np.tile(y_values, len(depth_values)) - depth_tiled = np.repeat(depth_values, len(x_values)) - - points = pd.DataFrame({ - 'x': x_repeated, - 'y': y_repeated, - 'waterdepth': depth_tiled - }) + # if at least one input is a point, grid all inputs + x_grid, y_grid, waterdepth_grid = np.meshgrid(x, y, waterdepth) + x_grid = x_grid.ravel() + y_grid = y_grid.ravel() + waterdepth_grid = waterdepth_grid.ravel() + + index = np.arange(0,len(x_grid)) + points = xr.Dataset(data_vars={ + 'x': (['index'], x_grid), + 'y': (['index'], y_grid), + 'waterdepth': (['index'], waterdepth_grid) + }, + coords = {'index': index} + ) + + if to_pandas: + points = points.to_pandas() return points def variable_interpolation(data, variables, points='cells', edges='none', x_max_lim=float('inf'), x_min_lim=float('-inf'), - y_max_lim=float('inf'), y_min_lim=float('-inf')): + y_max_lim=float('inf'), y_min_lim=float('-inf'), + to_pandas=False): ''' Interpolate multiple variables from the Delft3D onto the same points. @@ -460,46 +441,54 @@ def variable_interpolation(data, variables, points='cells', edges='none', variables: array of strings Name of variables to interpolate, e.g. 'turkin1', 'ucx', 'ucy' and 'ucz'. The full list can be found using "data.variables.keys()" in the console. - points: string, DataFrame + points: string, pd.DataFrame, or xr.Dataset The points to interpolate data onto. 'cells'- interpolates all data onto the Delft3D cell coordinate system (Default) 'faces'- interpolates all dada onto the Delft3D face coordinate system - DataFrame of x, y, and waterdepth coordinates - Interpolates data onto user + Dataset of x, y, and waterdepth coordinates - Interpolates data onto user povided points. Can be created with `create_points` function. - edges: sting: 'nearest' + edges: string: 'nearest' If edges is set to 'nearest' the code will fill in nan values with nearest interpolation. Otherwise only linear interpolarion will be used. + to_pandas : bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - transformed_data: DataFrame + transformed_data: pd.DataFrame or xr.Dataset Variables on specified grid points saved under the input variable names and the x, y, and waterdepth coordinates of those points. ''' - if not isinstance(points, (str, pd.DataFrame)): - raise TypeError('points must be a string or DataFrame') + if not isinstance(points, (str, pd.DataFrame, xr.Dataset)): + raise TypeError(f'points must be a string, pd.DataFrame, or xr.Dataset. Got {type(points)}.') + + if isinstance(points, pd.DataFrame): + points = points.to_xarray() if isinstance(points, str): if not (points == 'cells' or points == 'faces'): - raise ValueError('points must be cells or faces') + raise ValueError(f'If a string, points must be cells or faces. Got {points}') if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be netCDF4 object') + raise TypeError(f'data must be netCDF4 object. Got {type(data)}') + + if not isinstance(to_pandas, bool): + raise TypeError( + f'to_pandas must be of type bool. Got: {type(to_pandas)}') data_raw = {} for var in variables: - var_data_df = get_all_data_points(data, var, time_index=-1) - var_data_df['depth'] = var_data_df.waterdepth - \ - var_data_df.waterlevel # added - var_data_df = var_data_df.loc[:, ~ - var_data_df.T.duplicated(keep='first')] - var_data_df = var_data_df[var_data_df.x > x_min_lim] - var_data_df = var_data_df[var_data_df.x < x_max_lim] - var_data_df = var_data_df[var_data_df.y > y_min_lim] - var_data_df = var_data_df[var_data_df.y < y_max_lim] - data_raw[var] = var_data_df - if type(points) == pd.DataFrame: + var_data = get_all_data_points(data, var, time_index=-1, to_pandas=False) + var_data['depth'] = var_data.waterdepth - var_data.waterlevel # added + + # var_data = var_data.loc[:, var_data.T.duplicated(keep='first')] + var_data = var_data.where(var_data.x > x_min_lim, drop=True) + var_data = var_data.where(var_data.x < x_max_lim, drop=True) + var_data = var_data.where(var_data.y > y_min_lim, drop=True) + var_data = var_data.where(var_data.y < y_max_lim, drop=True) + data_raw[var] = var_data + if isinstance(points, xr.Dataset): print('points provided') elif points == 'faces': points = data_raw['ucx'][['x', 'y', 'waterdepth']] @@ -522,10 +511,13 @@ def variable_interpolation(data, variables, points='cells', edges='none', [points['x'][i], points['y'][i], points['waterdepth'][i]], method='nearest')) + if to_pandas: + transformed_data = transformed_data.to_pandas() + return transformed_data -def get_all_data_points(data, variable, time_index=-1): +def get_all_data_points(data, variable, time_index=-1, to_pandas=True): ''' Get data points for a passed variable for all layers at a specified time from the Delft3D NetCDF4 object by iterating over the `get_layer_data` function. @@ -541,10 +533,12 @@ def get_all_data_points(data, variable, time_index=-1): time_index: int An integer to pull the time step from the dataset. Default is last time step, found with the input -1. + to_pandas : bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- - all_data: DataFrame + all_data: xr.Dataset or pd.Dataframe Dataframe with columns x, y, waterdepth, waterlevel, variable, and time. The waterdepth is measured from the water surface and the "waterlevel" is the water level diffrence in meters from the zero water level. @@ -560,6 +554,10 @@ def get_all_data_points(data, variable, time_index=-1): if variable not in data.variables.keys(): raise ValueError('variable not recognized') + if not isinstance(to_pandas, bool): + raise TypeError( + f'to_pandas must be of type bool. Got: {type(to_pandas)}') + max_time_index = len(data.variables[variable][:]) if abs(time_index) > max_time_index: raise ValueError( @@ -611,12 +609,19 @@ def get_all_data_points(data, variable, time_index=-1): v_all = np.append(v_all, layer_data.v) time_all = np.append(time_all, layer_data.time) - known_points = np.array([[x, y, waterdepth, waterlevel, v, time] - for x, y, waterdepth, waterlevel, v, time in zip(x_all, y_all, - depth_all, water_level_all, v_all, time_all)]) + index = np.arange(0,len(time_all)) + all_data = xr.Dataset(data_vars={ + 'x': (['index'], x_all), + 'y': (['index'], y_all), + 'waterdepth': (['index'], depth_all), + 'waterlevel': (['index'], water_level_all), + f'{variable}': (['index'], v_all), + 'time': (['index'], time_all)}, + coords={'index': index} + ) - all_data = pd.DataFrame(known_points, columns=[ - 'x', 'y', 'waterdepth', 'waterlevel', f'{variable}', 'time']) + if to_pandas: + all_data = all_data.to_pandas() return all_data @@ -632,7 +637,7 @@ def turbulent_intensity(data, points='cells', time_index=-1, data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear stress, generated by running a Delft3D model. - points: string, DataFrame + points: string, pd.DataFrame, xr.Dataset Points to interpolate data onto. 'cells': interpolates all data onto velocity coordinate system (Default). 'faces': interpolates all data onto the TKE coordinate system. @@ -648,7 +653,7 @@ def turbulent_intensity(data, points='cells', time_index=-1, Returns ------- - TI_data: Dataframe + TI_data: xr.Dataset or pd.DataFrame If intermediate_values is true all values are output. If intermediate_values is equal to false only turbulent_intesity and x, y, and z variables are output. @@ -663,8 +668,8 @@ def turbulent_intensity(data, points='cells', time_index=-1, ucz- velocity in the vertical direction ''' - if not isinstance(points, (str, pd.DataFrame)): - raise TypeError('points must be a string or DataFrame') + if not isinstance(points, (str, pd.DataFrame, xr.Dataset)): + raise TypeError('points must be a string, pd.DataFrame, xr.Dataset') if isinstance(points, str): if not (points == 'cells' or points == 'faces'): @@ -722,8 +727,8 @@ def turbulent_intensity(data, points='cells', time_index=-1, atol=1.0e-4) zero_ind = neg_index[0][zero_bool] non_zero_ind = neg_index[0][~zero_bool] - TI_data.loc[zero_ind, 'turkin1'] = np.zeros(len(zero_ind)) - TI_data.loc[non_zero_ind, 'turkin1'] = [np.nan]*len(non_zero_ind) + TI_data.where(zero_ind)['turkin1'] = np.zeros(len(zero_ind)) + TI_data.where(non_zero_ind)['turkin1'] = [np.nan]*len(non_zero_ind) TI_data['turbulent_intensity'] = np.sqrt( 2/3*TI_data['turkin1'])/u_mag * 100 # % From 618e188773a6a469727aaa89ffe77a21181a1be9 Mon Sep 17 00:00:00 2001 From: akeeste Date: Tue, 30 Jan 2024 14:34:38 -0600 Subject: [PATCH 07/42] fix discharge_to_velocity --- mhkit/river/resource.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index acd3aac21..567eb8cb7 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -162,15 +162,18 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') if not isinstance(to_pandas, bool): raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - + + D = _convert_to_dataset(D,'V') + if dimension == "": dimension = list(D.coords)[0] - - D = _convert_to_dataset(D) # Calculate velocity using polynomial - vals = polynomial_coefficients(D) - V = xr.Dataset(vals) + V = xr.Dataset() + for var in list(D.data_vars): + vals = polynomial_coefficients(D[var]) + V = V.assign(variables={var: ([dimension], vals)}) + V = V.assign_coords({dimension: D[dimension]}) if to_pandas: V = V.to_pandas() From 2e69f3161b249bccbc882893e1f728a5db802055 Mon Sep 17 00:00:00 2001 From: akeeste Date: Tue, 30 Jan 2024 14:43:30 -0600 Subject: [PATCH 08/42] move convert_to_dataset into mhkit.utils.data_utils --- mhkit/power/characteristics.py | 68 +++------------------------------- mhkit/power/quality.py | 10 ++--- mhkit/river/resource.py | 10 ++--- mhkit/utils/__init__.py | 2 +- mhkit/utils/data_utils.py | 47 ++++++++++++++++++++++- 5 files changed, 63 insertions(+), 74 deletions(-) diff --git a/mhkit/power/characteristics.py b/mhkit/power/characteristics.py index 727de6088..da72cfcc2 100644 --- a/mhkit/power/characteristics.py +++ b/mhkit/power/characteristics.py @@ -2,6 +2,7 @@ import xarray as xr import numpy as np from scipy.signal import hilbert +from mhkit.utils import convert_to_dataset def instantaneous_frequency(um, time_dimension="", to_pandas=True): @@ -37,7 +38,7 @@ def instantaneous_frequency(um, time_dimension="", to_pandas=True): f'time_dimension must be of type bool. Got: {type(time_dimension)}') # Convert input to xr.Dataset - um = _convert_to_dataset(um, 'data') + um = convert_to_dataset(um, 'data') if time_dimension != '' and time_dimension not in um.coords: raise ValueError('time_dimension was supplied but is not a dimension ' @@ -100,8 +101,8 @@ def dc_power(voltage, current, to_pandas=True): f'to_pandas must be of type bool. Got: {type(to_pandas)}') # Convert inputs to xr.Dataset - voltage = _convert_to_dataset(voltage, 'voltage') - current = _convert_to_dataset(current, 'current') + voltage = convert_to_dataset(voltage, 'voltage') + current = convert_to_dataset(current, 'current') # Check that sizes are the same if not (voltage.sizes == current.sizes and len(voltage.data_vars) == len(current.data_vars)): @@ -166,8 +167,8 @@ def ac_power_three_phase(voltage, current, power_factor, line_to_line=False, to_ f'to_pandas must be of type bool. Got: {type(to_pandas)}') # Convert inputs to xr.Dataset - voltage = _convert_to_dataset(voltage, 'voltage') - current = _convert_to_dataset(current, 'current') + voltage = convert_to_dataset(voltage, 'voltage') + current = convert_to_dataset(current, 'current') # Check that sizes are the same if not len(voltage.data_vars) == 3: @@ -190,60 +191,3 @@ def ac_power_three_phase(voltage, current, power_factor, line_to_line=False, to_ return P -def _convert_to_dataset(data, name='data'): - """ - Converts the given data to an xarray.Dataset. - - This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series, - an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset. - - Parameters - ---------- - data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset - The data to be converted. - - name: str (Optional) - The name to assign to the data variable in case the input is an xarray DataArray without a name. - Default value is 'data'. - - Returns - ------- - xarray.Dataset - The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset, - it is returned as is. - - Examples - -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) - >>> ds = _convert_to_dataset(df) - >>> type(ds) - - - >>> series = pd.Series([1, 2, 3], name='C') - >>> ds = _convert_to_dataset(series) - >>> type(ds) - - - >>> data_array = xr.DataArray([1, 2, 3]) - >>> ds = _convert_to_dataset(data_array, name='D') - >>> type(ds) - - """ - if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): - raise TypeError("Input data must be of type pandas.DataFrame, pandas.Series, " - "xarray.DataArray, or xarray.Dataset") - - if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string") - - # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or - # xr.Dataset and converts it to xr.Dataset - if isinstance(data, (pd.DataFrame, pd.Series)): - data = data.to_xarray() - - if isinstance(data, xr.DataArray): - if data.name is None: - data.name = name # xr.DataArray.to_dataset() breaks if the data variable is unnamed - data = data.to_dataset() - - return data diff --git a/mhkit/power/quality.py b/mhkit/power/quality.py index a0e898b3d..1d4d77acb 100644 --- a/mhkit/power/quality.py +++ b/mhkit/power/quality.py @@ -2,7 +2,7 @@ import numpy as np from scipy import fftpack import xarray as xr -from .characteristics import _convert_to_dataset +from mhkit.utils import convert_to_dataset # This group of functions are to be used for power quality assessments def harmonics(x, freq, grid_freq, to_pandas=True): @@ -44,7 +44,7 @@ def harmonics(x, freq, grid_freq, to_pandas=True): f'to_pandas must be of type bool. Got {type(to_pandas)}') # Convert input to xr.Dataset - x = _convert_to_dataset(x, 'data') + x = convert_to_dataset(x, 'data') sample_spacing = 1./freq @@ -116,7 +116,7 @@ def harmonic_subgroups(harmonics, grid_freq, frequency_dimension="", to_pandas=T f'frequency_dimension must be of type bool. Got: {type(frequency_dimension)}') # Convert input to xr.Dataset - harmonics = _convert_to_dataset(harmonics, 'harmonics') + harmonics = convert_to_dataset(harmonics, 'harmonics') if frequency_dimension != '' and frequency_dimension not in harmonics.coords: raise ValueError('frequency_dimension was supplied but is not a dimension ' @@ -188,7 +188,7 @@ def total_harmonic_current_distortion(harmonics_subgroup, frequency_dimension="" f'frequency_dimension must be of type bool. Got: {type(frequency_dimension)}') # Convert input to xr.Dataset - harmonics_subgroup = _convert_to_dataset(harmonics_subgroup, 'harmonics') + harmonics_subgroup = convert_to_dataset(harmonics_subgroup, 'harmonics') if frequency_dimension != '' and frequency_dimension not in harmonics.coords: raise ValueError('frequency_dimension was supplied but is not a dimension ' @@ -246,7 +246,7 @@ def interharmonics(harmonics, grid_freq, frequency_dimension="", to_pandas=True) f'to_pandas must be of type bool. Got: {type(to_pandas)}') # Convert input to xr.Dataset - harmonics = _convert_to_dataset(harmonics, 'harmonics') + harmonics = convert_to_dataset(harmonics, 'harmonics') if frequency_dimension != '' and frequency_dimension not in harmonics.coords: raise ValueError('frequency_dimension was supplied but is not a dimension ' diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 567eb8cb7..181e877b6 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -3,7 +3,7 @@ import numpy as np from scipy.stats import linregress as _linregress from scipy.stats import rv_histogram as _rv_histogram -from mhkit.utils import _convert_to_dataset +from mhkit.utils import convert_to_dataset def Froude_number(v, h, g=9.80665): @@ -70,7 +70,7 @@ def exceedance_probability(D, dimension="", to_pandas=True): if dimension == "": dimension = list(D.coords)[0] - D = _convert_to_dataset(D) + D = convert_to_dataset(D) # Calculate exceedence probability (F) rank = D.rank() @@ -163,7 +163,7 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr if not isinstance(to_pandas, bool): raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - D = _convert_to_dataset(D,'V') + D = convert_to_dataset(D,'V') if dimension == "": dimension = list(D.coords)[0] @@ -222,7 +222,7 @@ def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out, dimension="", if not isinstance(to_pandas, bool): raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - V = _convert_to_dataset(V) + V = convert_to_dataset(V) # Calculate power using transfer function and FDC vals = polynomial_coefficients(V) @@ -262,7 +262,7 @@ def energy_produced(P, seconds): if not isinstance(seconds, (int, float)): raise TypeError(f'seconds must be of type int or float. Got: {type(seconds)}') - P = _convert_to_dataset(P) + P = convert_to_dataset(P) # Calculate Histogram of power H, edges = np.histogram(P, 100 ) diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py index 4b3a6a4ee..6e30fcd5f 100644 --- a/mhkit/utils/__init__.py +++ b/mhkit/utils/__init__.py @@ -2,6 +2,6 @@ from .stat_utils import get_statistics, vector_statistics, unwrap_vector, magnitude_phase, unorm from .cache import handle_caching, clear_cache from .upcrossing import upcrossing, peaks, troughs, heights, periods, custom -from .data_utils import _convert_to_dataset +from .data_utils import convert_to_dataset _matlab = False # Private variable indicating if mhkit is run through matlab diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index f97032a0a..6d9bd83b6 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -1,7 +1,52 @@ import pandas as pd import xarray as xr -def _convert_to_dataset(data, name='data'): +def convert_to_dataset(data, name='data'): + """ + Converts the given data to an xarray.Dataset. + + This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series, + an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset. + + Parameters + ---------- + data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset + The data to be converted. + + name: str (Optional) + The name to assign to the data variable in case the input is an xarray DataArray without a name. + Default value is 'data'. + + Returns + ------- + xarray.Dataset + The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset, + it is returned as is. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + >>> ds = convert_to_dataset(df) + >>> type(ds) + + + >>> series = pd.Series([1, 2, 3], name='C') + >>> ds = convert_to_dataset(series) + >>> type(ds) + + + >>> data_array = xr.DataArray([1, 2, 3]) + >>> ds = convert_to_dataset(data_array, name='D') + >>> type(ds) + + """ + if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): + raise TypeError("Input data must be of type pandas.DataFrame, pandas.Series, " + "xarray.DataArray, or xarray.Dataset") + + if not isinstance(name, str): + raise TypeError("The 'name' parameter must be a string") + # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or # xr.Dataset and converts it to xr.Dataset if isinstance(data, (pd.DataFrame, pd.Series)): From 02349e66b1f43f397bf7dcced10c48effabecdcc Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 1 Feb 2024 08:54:20 -0600 Subject: [PATCH 09/42] update dataset labeling --- mhkit/river/resource.py | 23 +++++++++++++++-------- mhkit/tests/river/test_resource.py | 4 ++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 181e877b6..3eddd44ed 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -163,7 +163,7 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr if not isinstance(to_pandas, bool): raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - D = convert_to_dataset(D,'V') + D = convert_to_dataset(D, name='V') if dimension == "": dimension = list(D.coords)[0] @@ -222,16 +222,23 @@ def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out, dimension="", if not isinstance(to_pandas, bool): raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - V = convert_to_dataset(V) + V = convert_to_dataset(V, name='P') - # Calculate power using transfer function and FDC - vals = polynomial_coefficients(V) + if dimension == "": + dimension = list(V.coords)[0] + + # Calculate velocity using polynomial + P = xr.Dataset() + for var in list(V.data_vars): + # Calculate power using transfer function and FDC + vals = polynomial_coefficients(V[var]) - # Power for velocity values outside lower and upper bounds Turbine produces 0 power - vals[V < cut_in] = 0. - vals[V > cut_out] = 0. + # Power for velocity values outside lower and upper bounds Turbine produces 0 power + vals[V[var] < cut_in] = 0. + vals[V[var] > cut_out] = 0. - P = xr.Dataset(vals) + P = P.assign(variables={var: ([dimension], vals)}) + P = P.assign_coords({dimension: V[dimension]}) if to_pandas: P = P.to_pandas() diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index 6e42da2a6..380326c77 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -66,14 +66,14 @@ def test_discharge_to_velocity(self): Q = pd.Series(np.arange(9)) # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values p, r2 = river.resource.polynomial_fit(np.arange(9), 10*np.arange(9), 1) - # Becuase the polynomial line fits perfect we should expect the V to equal 10*Q + # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(Q, p) self.assertAlmostEqual(np.sum(10*Q - V['V']), 0.00, places=2) def test_velocity_to_power(self): # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values p, r2 = river.resource.polynomial_fit(np.arange(9), 10*np.arange(9), 1) - # Becuase the polynomial line fits perfect we should expect the V to equal 10*Q + # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(pd.Series(np.arange(9)), p) # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values p2, r22 = river.resource.polynomial_fit( From b964e0019a23d2bbbe17742bed59f2598d928bec Mon Sep 17 00:00:00 2001 From: akeeste Date: Tue, 20 Feb 2024 14:09:56 -0600 Subject: [PATCH 10/42] black formatting for previously changed river files --- mhkit/river/graphics.py | 281 +++++++++++-------- mhkit/river/io/d3d.py | 588 ++++++++++++++++++++++------------------ mhkit/river/io/usgs.py | 108 +++++--- mhkit/river/resource.py | 169 ++++++------ 4 files changed, 643 insertions(+), 503 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index 3f40e8673..fdbede6eb 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -1,10 +1,9 @@ import numpy as np import xarray as xr -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt -def _xy_plot(x, y, fmt='.', label=None, xlabel=None, ylabel=None, title=None, - ax=None): +def _xy_plot(x, y, fmt=".", label=None, xlabel=None, ylabel=None, title=None, ax=None): """ Base function to plot any x vs y data @@ -14,248 +13,302 @@ def _xy_plot(x, y, fmt='.', label=None, xlabel=None, ylabel=None, title=None, Data for the x axis of plot y: array-like Data for y axis of plot - + Returns ------- ax : matplotlib.pyplot axes - + """ if ax is None: - plt.figure(figsize=(16,8)) - params = {'legend.fontsize': 'x-large', - 'axes.labelsize': 'x-large', - 'axes.titlesize':'x-large', - 'xtick.labelsize':'x-large', - 'ytick.labelsize':'x-large'} + plt.figure(figsize=(16, 8)) + params = { + "legend.fontsize": "x-large", + "axes.labelsize": "x-large", + "axes.titlesize": "x-large", + "xtick.labelsize": "x-large", + "ytick.labelsize": "x-large", + } plt.rcParams.update(params) ax = plt.gca() - + ax.plot(x, y, fmt, label=label, markersize=7) - + ax.grid() - - if label: ax.legend() - if xlabel: ax.set_xlabel(xlabel) - if ylabel: ax.set_ylabel(ylabel) - if title: ax.set_title(title) - + + if label: + ax.legend() + if xlabel: + ax.set_xlabel(xlabel) + if ylabel: + ax.set_ylabel(ylabel) + if title: + ax.set_title(title) + plt.tight_layout() - + return ax def plot_flow_duration_curve(D, F, label=None, ax=None): """ - Plots discharge vs exceedance probability as a Flow Duration Curve (FDC) - + Plots discharge vs exceedance probability as a Flow Duration Curve (FDC) + Parameters ------------ D: array-like Discharge [m/s] indexed by time - - F: array-like + + F: array-like Exceedance probability [unitless] indexed by time - + label: string Label to use in the legend - + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- ax : matplotlib pyplot axes - + """ # Sort by F - temp = xr.Dataset(data_vars = {'D': D, 'F': F}) - temp.sortby('F', ascending=False) + temp = xr.Dataset(data_vars={"D": D, "F": F}) + temp.sortby("F", ascending=False) - ax = _xy_plot(temp['D'], temp['F'], fmt='-', label=label, xlabel='Discharge [$m^3/s$]', - ylabel='Exceedance Probability', ax=ax) - plt.xscale('log') + ax = _xy_plot( + temp["D"], + temp["F"], + fmt="-", + label=label, + xlabel="Discharge [$m^3/s$]", + ylabel="Exceedance Probability", + ax=ax, + ) + plt.xscale("log") return ax def plot_velocity_duration_curve(V, F, label=None, ax=None): """ - Plots velocity vs exceedance probability as a Velocity Duration Curve (VDC) - + Plots velocity vs exceedance probability as a Velocity Duration Curve (VDC) + Parameters ------------ - V: array-like + V: array-like Velocity [m/s] indexed by time - - F: array-like + + F: array-like Exceedance probability [unitless] indexed by time - + label: string Label to use in the legend - + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- ax : matplotlib pyplot axes - + """ # Sort by F - temp = xr.Dataset(data_vars = {'V': V, 'F': F}) - temp.sortby('F', ascending=False) + temp = xr.Dataset(data_vars={"V": V, "F": F}) + temp.sortby("F", ascending=False) - ax = _xy_plot(temp['V'], temp['F'], fmt='-', label=label, xlabel='Velocity [$m/s$]', - ylabel='Exceedance Probability', ax=ax) + ax = _xy_plot( + temp["V"], + temp["F"], + fmt="-", + label=label, + xlabel="Velocity [$m/s$]", + ylabel="Exceedance Probability", + ax=ax, + ) return ax def plot_power_duration_curve(P, F, label=None, ax=None): """ - Plots power vs exceedance probability as a Power Duration Curve (PDC) + Plots power vs exceedance probability as a Power Duration Curve (PDC) Parameters ------------ - P: array-like + P: array-like Power [W] indexed by time - - F: array-like + + F: array-like Exceedance probability [unitless] indexed by time - + label: string Label to use in the legend - + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- ax : matplotlib pyplot axes - + """ # Sort by F - temp = xr.Dataset(data_vars = {'P': P, 'F': F}) - temp.sortby('F', ascending=False) + temp = xr.Dataset(data_vars={"P": P, "F": F}) + temp.sortby("F", ascending=False) - ax = _xy_plot(temp['P'], temp['F'], fmt='-', label=label, xlabel='Power [W]', - ylabel='Exceedance Probability', ax=ax) + ax = _xy_plot( + temp["P"], + temp["F"], + fmt="-", + label=label, + xlabel="Power [W]", + ylabel="Exceedance Probability", + ax=ax, + ) return ax - + def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None): """ Plots discharge time-series - + Parameters ------------ Q: array-like Discharge [m3/s] indexed by time - + time_dimension: string (optional) - Name of the xarray dimension corresponding to time. If not supplied, + Name of the xarray dimension corresponding to time. If not supplied, defaults to the first dimension. - + label: string Label to use in the legend - + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- - ax : matplotlib pyplot axes - + ax : matplotlib pyplot axes + """ if time_dimension == "": time_dimension = list(Q.coords)[0] ax = _xy_plot( - Q.coords[time_dimension].values, - Q, - fmt='-', - label=label, - xlabel='Time', - ylabel='Discharge [$m^3/s$]', - ax=ax + Q.coords[time_dimension].values, + Q, + fmt="-", + label=label, + xlabel="Time", + ylabel="Discharge [$m^3/s$]", + ax=ax, ) - + return ax def plot_discharge_vs_velocity(D, V, polynomial_coeff=None, label=None, ax=None): """ Plots discharge vs velocity data along with the polynomial fit - + Parameters ------------ - D : array-like + D : array-like Discharge [m/s] indexed by time - - V : array-like + + V : array-like Velocity [m/s] indexed by time - + polynomial_coeff: numpy polynomial - Polynomial coefficients, which can be computed using - `river.resource.polynomial_fit`. If None, then the polynomial fit is - not included int the plot. - + Polynomial coefficients, which can be computed using + `river.resource.polynomial_fit`. If None, then the polynomial fit is + not included int the plot. + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- ax : matplotlib pyplot axes - + """ - ax = _xy_plot(D, V, fmt='.', label=label, xlabel='Discharge [$m^3/s$]', - ylabel='Velocity [$m/s$]', ax=ax) + ax = _xy_plot( + D, + V, + fmt=".", + label=label, + xlabel="Discharge [$m^3/s$]", + ylabel="Velocity [$m/s$]", + ax=ax, + ) if polynomial_coeff: x = np.linspace(D.min(), D.max()) - ax = _xy_plot(x, polynomial_coeff(x), fmt='--', label='Polynomial fit', - xlabel='Discharge [$m^3/s$]', ylabel='Velocity [$m/s$]', - ax=ax) + ax = _xy_plot( + x, + polynomial_coeff(x), + fmt="--", + label="Polynomial fit", + xlabel="Discharge [$m^3/s$]", + ylabel="Velocity [$m/s$]", + ax=ax, + ) return ax def plot_velocity_vs_power(V, P, polynomial_coeff=None, label=None, ax=None): """ - Plots velocity vs power data along with the polynomial fit - + Plots velocity vs power data along with the polynomial fit + Parameters ------------ - V : array-like + V : array-like Velocity [m/s] indexed by time - - P: array-like + + P: array-like Power [W] indexed by time - + polynomial_coeff: numpy polynomial - Polynomial coefficients, which can be computed using - `river.resource.polynomial_fit`. If None, then the polynomial fit is - not included int the plot. - + Polynomial coefficients, which can be computed using + `river.resource.polynomial_fit`. If None, then the polynomial fit is + not included int the plot. + ax : matplotlib axes object - Axes for plotting. If None, then a new figure with a single + Axes for plotting. If None, then a new figure with a single axes is used. - + Returns --------- ax : matplotlib pyplot axes - + """ - ax = _xy_plot(V, P, fmt='.', label=label, xlabel='Velocity [$m/s$]', - ylabel='Power [$W$]', ax=ax) + ax = _xy_plot( + V, + P, + fmt=".", + label=label, + xlabel="Velocity [$m/s$]", + ylabel="Power [$W$]", + ax=ax, + ) if polynomial_coeff: x = np.linspace(V.min(), V.max()) - ax = _xy_plot(x, polynomial_coeff(x), fmt='--', label='Polynomial fit', - xlabel='Velocity [$m/s$]', ylabel='Power [$W$]', ax=ax) - + ax = _xy_plot( + x, + polynomial_coeff(x), + fmt="--", + label="Polynomial fit", + xlabel="Velocity [$m/s$]", + ylabel="Power [$W$]", + ax=ax, + ) + return ax diff --git a/mhkit/river/io/d3d.py b/mhkit/river/io/d3d.py index 4385f16db..ce7580cde 100644 --- a/mhkit/river/io/d3d.py +++ b/mhkit/river/io/d3d.py @@ -8,15 +8,15 @@ def get_all_time(data): - ''' - Returns all of the time stamps from a D3D simulation passed to the function + """ + Returns all of the time stamps from a D3D simulation passed to the function as a NetCDF object (data) Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress generated by running a Delft3D model. + stress generated by running a Delft3D model. Returns ------- @@ -24,26 +24,26 @@ def get_all_time(data): Returns an array of integers representing the number of seconds after the simulation started and that the data object contains a snapshot of simulation conditions at that time. - ''' + """ if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be a NetCDF4 object') + raise TypeError("data must be a NetCDF4 object") - seconds_run = np.ma.getdata(data.variables['time'][:], False) + seconds_run = np.ma.getdata(data.variables["time"][:], False) return seconds_run def index_to_seconds(data, time_index): - ''' - The function will return 'seconds_run' if passed a 'time_index' + """ + The function will return 'seconds_run' if passed a 'time_index' Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress, generated by running a Delft3D model. - time_index: int + stress, generated by running a Delft3D model. + time_index: int A positive integer to pull the time index from the dataset. 0 being closest to time 0. Default is last time index -1. @@ -51,74 +51,74 @@ def index_to_seconds(data, time_index): ------- seconds_run: int, float The 'seconds_run' is the seconds corresponding to the 'time_index' increments. - ''' + """ return _convert_time(data, time_index=time_index) def seconds_to_index(data, seconds_run): - ''' + """ The function will return the nearest 'time_index' in the data if passed an integer number of 'seconds_run' Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress, generated by running a Delft3D model. + stress, generated by running a Delft3D model. seconds_run: int, float - A positive integer or float that represents the amount of time in seconds + A positive integer or float that represents the amount of time in seconds passed since starting the simulation. Returns ------- time_index: int - The 'time_index' is a positive integer starting from 0 + The 'time_index' is a positive integer starting from 0 and incrementing until in simulation is complete. - ''' + """ return _convert_time(data, seconds_run=seconds_run) def _convert_time(data, time_index=None, seconds_run=None): - ''' - Converts a time index to seconds or seconds to a time index. The user - must specify 'time_index' or 'seconds_run' (Not both). The function - will returns 'seconds_run' if passed a 'time_index' or will return the + """ + Converts a time index to seconds or seconds to a time index. The user + must specify 'time_index' or 'seconds_run' (Not both). The function + will returns 'seconds_run' if passed a 'time_index' or will return the closest 'time_index' if passed a number of 'seconds_run'. Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress, generated by running a Delft3D model. - time_index: int + stress, generated by running a Delft3D model. + time_index: int An integer to pull the time index from the dataset. 0 being closest - to the start time. + to the start time. seconds_run: int, float - An integer or float that represents the amount of time in seconds + An integer or float that represents the amount of time in seconds passed since starting the simulation. Returns ------- QoI: int, float - The quantity of interest is the unknown value either the 'time_index' - or the 'seconds_run'. The 'time_index' is an integer starting from 0 + The quantity of interest is the unknown value either the 'time_index' + or the 'seconds_run'. The 'time_index' is an integer starting from 0 and incrementing until in simulation is complete. The 'seconds_run' is the seconds corresponding to the 'time_index' increments. - ''' + """ if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be NetCDF4 object') + raise TypeError("data must be NetCDF4 object") if not (time_index or seconds_run): - raise ValueError('Input of time_index or seconds_run needed') + raise ValueError("Input of time_index or seconds_run needed") if time_index and seconds_run: - raise ValueError( - 'Only one of time_index or seconds_run should be provided') + raise ValueError("Only one of time_index or seconds_run should be provided") - if not (isinstance(time_index, (int, float)) or isinstance(seconds_run, (int, float))): - raise TypeError( - 'time_index or seconds_run input must be an int or float') + if not ( + isinstance(time_index, (int, float)) or isinstance(seconds_run, (int, float)) + ): + raise TypeError("time_index or seconds_run input must be an int or float") times = get_all_time(data) @@ -131,15 +131,18 @@ def _convert_time(data, time_index=None, seconds_run=None): except: idx = (np.abs(times - seconds_run)).argmin() QoI = idx - warnings.warn('Warning: seconds_run not found. Closest time stamp' - + f'found {times[idx]}', stacklevel=2) + warnings.warn( + "Warning: seconds_run not found. Closest time stamp" + + f"found {times[idx]}", + stacklevel=2, + ) return QoI def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True): - ''' - Get variable data from the NetCDF4 object at a specified layer and timestep. + """ + Get variable data from the NetCDF4 object at a specified layer and timestep. If the data is 2D the layer_index is ignored. Parameters @@ -149,11 +152,11 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True stress, generated by running a Delft3D model. variable: string Delft3D outputs many vairables. The full list can be - found using "data.variables.keys()" in the console. + found using "data.variables.keys()" in the console. layer_index: int - An integer to pull out a layer from the dataset. 0 being closest + An integer to pull out a layer from the dataset. 0 being closest to the surface. Default is the bottom layer, found with input -1. - time_index: int + time_index: int An integer to pull the time index from the dataset. 0 being closest to the start time. Default is last time index, found with input -1. to_pandas : bool (optional) @@ -163,34 +166,34 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True ------- layer_data: pd.DataFrame or xr.Dataset Dataset with columns of "x", "y", "waterdepth", and "waterlevel" location - of the specified layer, variable values "v", and the "time" the + of the specified layer, variable values "v", and the "time" the simulation has run. The waterdepth is measured from the water surface and the - "waterlevel" is the water level diffrencein meters from the zero water level. - ''' + "waterlevel" is the water level diffrencein meters from the zero water level. + """ if not isinstance(time_index, int): - raise TypeError('time_index must be an int') + raise TypeError("time_index must be an int") if not isinstance(layer_index, int): - raise TypeError('layer_index must be an int') + raise TypeError("layer_index must be an int") if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be NetCDF4 object') + raise TypeError("data must be NetCDF4 object") if variable not in data.variables.keys(): - raise ValueError('variable not recognized') + raise ValueError("variable not recognized") if not isinstance(to_pandas, bool): - raise TypeError( - f'to_pandas must be of type bool. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") coords = str(data.variables[variable].coordinates).split() var = data.variables[variable][:] - max_time_index = data['time'].shape[0] - 1 # to account for zero index + max_time_index = data["time"].shape[0] - 1 # to account for zero index if abs(time_index) > max_time_index: raise ValueError( - f'time_index must be less than the absolute value of the max time index {max_time_index}') + f"time_index must be less than the absolute value of the max time index {max_time_index}" + ) x = np.ma.getdata(data.variables[coords[0]][:], False) y = np.ma.getdata(data.variables[coords[1]][:], False) @@ -199,58 +202,71 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True max_layer = len(var[0][0]) if abs(layer_index) > max_layer: - raise ValueError( - f'layer_index must be less than the max layer {max_layer}') + raise ValueError(f"layer_index must be less than the max layer {max_layer}") v = np.ma.getdata(var[time_index, :, layer_index], False) dimensions = 3 else: if type(var[0][0]) != np.float64: - raise TypeError('data not recognized') + raise TypeError("data not recognized") dimensions = 2 v = np.ma.getdata(var[time_index, :], False) # waterdepth if "mesh2d" in variable: - cords_to_layers = {'mesh2d_face_x mesh2d_face_y': {'name': 'mesh2d_nLayers', - 'coords': data.variables['mesh2d_layer_sigma'][:]}, - 'mesh2d_edge_x mesh2d_edge_y': {'name': 'mesh2d_nInterfaces', - 'coords': data.variables['mesh2d_interface_sigma'][:]}} + cords_to_layers = { + "mesh2d_face_x mesh2d_face_y": { + "name": "mesh2d_nLayers", + "coords": data.variables["mesh2d_layer_sigma"][:], + }, + "mesh2d_edge_x mesh2d_edge_y": { + "name": "mesh2d_nInterfaces", + "coords": data.variables["mesh2d_interface_sigma"][:], + }, + } bottom_depth = np.ma.getdata( - data.variables['mesh2d_waterdepth'][time_index, :], False) - waterlevel = np.ma.getdata( - data.variables['mesh2d_s1'][time_index, :], False) - coords = str(data.variables['waterdepth'].coordinates).split() - - elif str(data.variables[variable].coordinates) == 'FlowElem_xcc FlowElem_ycc': - cords_to_layers = {'FlowElem_xcc FlowElem_ycc': - {'name': 'laydim', - 'coords': data.variables['LayCoord_cc'][:]}, - 'FlowLink_xu FlowLink_yu': {'name': 'wdim', - 'coords': data.variables['LayCoord_w'][:]}} - bottom_depth = np.ma.getdata( - data.variables['waterdepth'][time_index, :], False) - waterlevel = np.ma.getdata(data.variables['s1'][time_index, :], False) - coords = str(data.variables['waterdepth'].coordinates).split() + data.variables["mesh2d_waterdepth"][time_index, :], False + ) + waterlevel = np.ma.getdata(data.variables["mesh2d_s1"][time_index, :], False) + coords = str(data.variables["waterdepth"].coordinates).split() + + elif str(data.variables[variable].coordinates) == "FlowElem_xcc FlowElem_ycc": + cords_to_layers = { + "FlowElem_xcc FlowElem_ycc": { + "name": "laydim", + "coords": data.variables["LayCoord_cc"][:], + }, + "FlowLink_xu FlowLink_yu": { + "name": "wdim", + "coords": data.variables["LayCoord_w"][:], + }, + } + bottom_depth = np.ma.getdata(data.variables["waterdepth"][time_index, :], False) + waterlevel = np.ma.getdata(data.variables["s1"][time_index, :], False) + coords = str(data.variables["waterdepth"].coordinates).split() else: - cords_to_layers = {'FlowElem_xcc FlowElem_ycc LayCoord_cc LayCoord_cc': - {'name': 'laydim', - 'coords': data.variables['LayCoord_cc'][:]}, - 'FlowLink_xu FlowLink_yu': {'name': 'wdim', - 'coords': data.variables['LayCoord_w'][:]}} - bottom_depth = np.ma.getdata( - data.variables['waterdepth'][time_index, :], False) - waterlevel = np.ma.getdata(data.variables['s1'][time_index, :], False) - coords = str(data.variables['waterdepth'].coordinates).split() + cords_to_layers = { + "FlowElem_xcc FlowElem_ycc LayCoord_cc LayCoord_cc": { + "name": "laydim", + "coords": data.variables["LayCoord_cc"][:], + }, + "FlowLink_xu FlowLink_yu": { + "name": "wdim", + "coords": data.variables["LayCoord_w"][:], + }, + } + bottom_depth = np.ma.getdata(data.variables["waterdepth"][time_index, :], False) + waterlevel = np.ma.getdata(data.variables["s1"][time_index, :], False) + coords = str(data.variables["waterdepth"].coordinates).split() layer_dim = str(data.variables[variable].coordinates) - cord_sys = cords_to_layers[layer_dim]['coords'] + cord_sys = cords_to_layers[layer_dim]["coords"] layer_percentages = np.ma.getdata(cord_sys, False) # accumulative - if layer_dim == 'FlowLink_xu FlowLink_yu': + if layer_dim == "FlowLink_xu FlowLink_yu": # interpolate x_laydim = np.ma.getdata(data.variables[coords[0]][:], False) y_laydim = np.ma.getdata(data.variables[coords[1]][:], False) @@ -261,48 +277,49 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True y_wdim = np.ma.getdata(data.variables[coords_request[1]][:], False) points_wdim = np.array([[x, y] for x, y in zip(x_wdim, y_wdim)]) - bottom_depth_wdim = interp.griddata(points_laydim, bottom_depth, - points_wdim) - water_level_wdim = interp.griddata(points_laydim, waterlevel, - points_wdim) + bottom_depth_wdim = interp.griddata(points_laydim, bottom_depth, points_wdim) + water_level_wdim = interp.griddata(points_laydim, waterlevel, points_wdim) idx_bd = np.where(np.isnan(bottom_depth_wdim)) for i in idx_bd: - bottom_depth_wdim[i] = interp.griddata(points_laydim, bottom_depth, - points_wdim[i], method='nearest') - water_level_wdim[i] = interp.griddata(points_laydim, waterlevel, - points_wdim[i], method='nearest') + bottom_depth_wdim[i] = interp.griddata( + points_laydim, bottom_depth, points_wdim[i], method="nearest" + ) + water_level_wdim[i] = interp.griddata( + points_laydim, waterlevel, points_wdim[i], method="nearest" + ) waterdepth = [] if dimensions == 2: - if layer_dim == 'FlowLink_xu FlowLink_yu': + if layer_dim == "FlowLink_xu FlowLink_yu": z = [bottom_depth_wdim] waterlevel = water_level_wdim else: z = [bottom_depth] else: - if layer_dim == 'FlowLink_xu FlowLink_yu': - z = [bottom_depth_wdim*layer_percentages[layer_index]] + if layer_dim == "FlowLink_xu FlowLink_yu": + z = [bottom_depth_wdim * layer_percentages[layer_index]] waterlevel = water_level_wdim else: - z = [bottom_depth*layer_percentages[layer_index]] + z = [bottom_depth * layer_percentages[layer_index]] waterdepth = np.append(waterdepth, z) - time = np.ma.getdata( - data.variables['time'][time_index], False)*np.ones(len(x)) + time = np.ma.getdata(data.variables["time"][time_index], False) * np.ones(len(x)) - index = np.arange(0,len(time)) + index = np.arange(0, len(time)) layer_data = xr.Dataset( - data_vars = {'x': (['index'], x), - 'y': (['index'], y), - 'waterdepth': (['index'], waterdepth), - 'waterlevel': (['index'], waterlevel), - 'v': (['index'], v), - 'time': (['index'], time)}, - coords = {'index': index} - ) + data_vars={ + "x": (["index"], x), + "y": (["index"], y), + "waterdepth": (["index"], waterdepth), + "waterlevel": (["index"], waterlevel), + "v": (["index"], v), + "time": (["index"], time), + }, + coords={"index": index}, + ) if to_pandas: layer_data = layer_data.to_pandas() @@ -311,10 +328,10 @@ def get_layer_data(data, variable, layer_index=-1, time_index=-1, to_pandas=True def create_points(x, y, waterdepth, to_pandas=True): - ''' + """ Generate a Dataset of points from combinations of input coordinates. - This function accepts three inputs and combines them to generate a + This function accepts three inputs and combines them to generate a Dataset of points. The inputs can be: - 3 points - 2 points and 1 array @@ -341,7 +358,7 @@ def create_points(x, y, waterdepth, to_pandas=True): points : xr.Dataset or pd.DataFrame A Dataset with columns 'x', 'y', and 'waterdepth' representing the generated points. - Example + Example ------- 2 arrays and 1 point: >>> x = np.array([1, 2]) @@ -355,7 +372,7 @@ def create_points(x, y, waterdepth, to_pandas=True): 2 1.0 4.0 6.0 3 2.0 4.0 6.0 4 1.0 5.0 6.0 - 5 2.0 5.0 6.0 + 5 2.0 5.0 6.0 3 arrays (x and y must have the same length): >>> x = np.array([1, 2, 3]) @@ -369,11 +386,11 @@ def create_points(x, y, waterdepth, to_pandas=True): 2 3.0 6.0 1.0 3 1.0 4.0 2.0 4 2.0 5.0 2.0 - 5 4.0 6.0 2.0 - ''' + 5 4.0 6.0 2.0 + """ # Check input types - inputs = {'x': x, 'y': y, 'waterdepth': waterdepth} + inputs = {"x": x, "y": y, "waterdepth": waterdepth} for name, value in inputs.items(): # Convert lists to numpy arrays if isinstance(value, list): @@ -382,15 +399,16 @@ def create_points(x, y, waterdepth, to_pandas=True): # Check data type if not isinstance(value, (int, float, np.ndarray, pd.Series, xr.DataArray)): - raise TypeError(f"{name} must be an int, float, np.ndarray, pd.Series, or xr.DataArray. Got: {type(value)}") + raise TypeError( + f"{name} must be an int, float, np.ndarray, pd.Series, or xr.DataArray. Got: {type(value)}" + ) # Check for empty arrays if isinstance(value, (np.ndarray, pd.Series, xr.DataArray)) and len(value) == 0: raise ValueError(f"{name} should not be an empty array") if not isinstance(to_pandas, bool): - raise TypeError( - f'to_pandas must be of type bool. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") x_array_like = ~isinstance(x, (int, float)) y_array_like = ~isinstance(y, (int, float)) @@ -402,7 +420,7 @@ def create_points(x, y, waterdepth, to_pandas=True): y_grid = y_grid.ravel() waterdepth_grid = waterdepth_grid.ravel() - _,mask = np.where(y==y_grid) + _, mask = np.where(y == y_grid) x_grid = x[mask] else: # if at least one input is a point, grid all inputs @@ -411,77 +429,88 @@ def create_points(x, y, waterdepth, to_pandas=True): y_grid = y_grid.ravel() waterdepth_grid = waterdepth_grid.ravel() - index = np.arange(0,len(x_grid)) - points = xr.Dataset(data_vars={ - 'x': (['index'], x_grid), - 'y': (['index'], y_grid), - 'waterdepth': (['index'], waterdepth_grid) + index = np.arange(0, len(x_grid)) + points = xr.Dataset( + data_vars={ + "x": (["index"], x_grid), + "y": (["index"], y_grid), + "waterdepth": (["index"], waterdepth_grid), }, - coords = {'index': index} + coords={"index": index}, ) - + if to_pandas: points = points.to_pandas() return points -def variable_interpolation(data, variables, points='cells', edges='none', - x_max_lim=float('inf'), x_min_lim=float('-inf'), - y_max_lim=float('inf'), y_min_lim=float('-inf'), - to_pandas=False): - ''' - Interpolate multiple variables from the Delft3D onto the same points. +def variable_interpolation( + data, + variables, + points="cells", + edges="none", + x_max_lim=float("inf"), + x_min_lim=float("-inf"), + y_max_lim=float("inf"), + y_min_lim=float("-inf"), + to_pandas=False, +): + """ + Interpolate multiple variables from the Delft3D onto the same points. Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress generated by running a Delft3D model. + stress generated by running a Delft3D model. variables: array of strings Name of variables to interpolate, e.g. 'turkin1', 'ucx', 'ucy' and 'ucz'. The full list can be found using "data.variables.keys()" in the console. points: string, pd.DataFrame, or xr.Dataset The points to interpolate data onto. 'cells'- interpolates all data onto the Delft3D cell coordinate system (Default) - 'faces'- interpolates all dada onto the Delft3D face coordinate system - Dataset of x, y, and waterdepth coordinates - Interpolates data onto user + 'faces'- interpolates all dada onto the Delft3D face coordinate system + Dataset of x, y, and waterdepth coordinates - Interpolates data onto user povided points. Can be created with `create_points` function. edges: string: 'nearest' - If edges is set to 'nearest' the code will fill in nan values with nearest - interpolation. Otherwise only linear interpolarion will be used. + If edges is set to 'nearest' the code will fill in nan values with nearest + interpolation. Otherwise only linear interpolarion will be used. to_pandas : bool (optional) Flag to output pandas instead of xarray. Default = True. Returns ------- transformed_data: pd.DataFrame or xr.Dataset - Variables on specified grid points saved under the input variable names - and the x, y, and waterdepth coordinates of those points. - ''' + Variables on specified grid points saved under the input variable names + and the x, y, and waterdepth coordinates of those points. + """ if not isinstance(points, (str, pd.DataFrame, xr.Dataset)): - raise TypeError(f'points must be a string, pd.DataFrame, or xr.Dataset. Got {type(points)}.') + raise TypeError( + f"points must be a string, pd.DataFrame, or xr.Dataset. Got {type(points)}." + ) if isinstance(points, pd.DataFrame): points = points.to_xarray() if isinstance(points, str): - if not (points == 'cells' or points == 'faces'): - raise ValueError(f'If a string, points must be cells or faces. Got {points}') + if not (points == "cells" or points == "faces"): + raise ValueError( + f"If a string, points must be cells or faces. Got {points}" + ) if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError(f'data must be netCDF4 object. Got {type(data)}') + raise TypeError(f"data must be netCDF4 object. Got {type(data)}") if not isinstance(to_pandas, bool): - raise TypeError( - f'to_pandas must be of type bool. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") data_raw = {} for var in variables: var_data = get_all_data_points(data, var, time_index=-1, to_pandas=False) - var_data['depth'] = var_data.waterdepth - var_data.waterlevel # added - + var_data["depth"] = var_data.waterdepth - var_data.waterlevel # added + # var_data = var_data.loc[:, var_data.T.duplicated(keep='first')] var_data = var_data.where(var_data.x > x_min_lim, drop=True) var_data = var_data.where(var_data.x < x_max_lim, drop=True) @@ -489,27 +518,31 @@ def variable_interpolation(data, variables, points='cells', edges='none', var_data = var_data.where(var_data.y < y_max_lim, drop=True) data_raw[var] = var_data if isinstance(points, xr.Dataset): - print('points provided') - elif points == 'faces': - points = data_raw['ucx'][['x', 'y', 'waterdepth']] - elif points == 'cells': - points = data_raw['turkin1'][['x', 'y', 'waterdepth']] + print("points provided") + elif points == "faces": + points = data_raw["ucx"][["x", "y", "waterdepth"]] + elif points == "cells": + points = data_raw["turkin1"][["x", "y", "waterdepth"]] transformed_data = points.copy(deep=True) for var in variables: - transformed_data[var] = interp.griddata(data_raw[var][['x', 'y', 'waterdepth']], # waterdepth to depth - data_raw[var][var], points[['x', 'y', 'waterdepth']]) - if edges == 'nearest': + transformed_data[var] = interp.griddata( + data_raw[var][["x", "y", "waterdepth"]], # waterdepth to depth + data_raw[var][var], + points[["x", "y", "waterdepth"]], + ) + if edges == "nearest": idx = np.where(np.isnan(transformed_data[var])) if len(idx[0]): for i in idx[0]: - transformed_data[var][i] = (interp - .griddata(data_raw[var][['x', 'y', 'waterdepth']], - data_raw[var][var], - [points['x'][i], points['y'][i], - points['waterdepth'][i]], method='nearest')) + transformed_data[var][i] = interp.griddata( + data_raw[var][["x", "y", "waterdepth"]], + data_raw[var][var], + [points["x"][i], points["y"][i], points["waterdepth"][i]], + method="nearest", + ) if to_pandas: transformed_data = transformed_data.to_pandas() @@ -518,20 +551,20 @@ def variable_interpolation(data, variables, points='cells', edges='none', def get_all_data_points(data, variable, time_index=-1, to_pandas=True): - ''' - Get data points for a passed variable for all layers at a specified time from - the Delft3D NetCDF4 object by iterating over the `get_layer_data` function. + """ + Get data points for a passed variable for all layers at a specified time from + the Delft3D NetCDF4 object by iterating over the `get_layer_data` function. Parameters ---------- - data: Netcdf4 object + data: Netcdf4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear - stress, generated by running a Delft3D model. + stress, generated by running a Delft3D model. variable: string Delft3D variable. The full list can be of variables can be - found using "data.variables.keys()" in the console. + found using "data.variables.keys()" in the console. time_index: int - An integer to pull the time step from the dataset. + An integer to pull the time step from the dataset. Default is last time step, found with the input -1. to_pandas : bool (optional) Flag to output pandas instead of xarray. Default = True. @@ -540,54 +573,70 @@ def get_all_data_points(data, variable, time_index=-1, to_pandas=True): ------- all_data: xr.Dataset or pd.Dataframe Dataframe with columns x, y, waterdepth, waterlevel, variable, and time. - The waterdepth is measured from the water surface and the "waterlevel" is + The waterdepth is measured from the water surface and the "waterlevel" is the water level diffrence in meters from the zero water level. - ''' + """ if not isinstance(time_index, int): - raise TypeError('time_index must be an int') + raise TypeError("time_index must be an int") if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be NetCDF4 object') + raise TypeError("data must be NetCDF4 object") if variable not in data.variables.keys(): - raise ValueError('variable not recognized') + raise ValueError("variable not recognized") if not isinstance(to_pandas, bool): - raise TypeError( - f'to_pandas must be of type bool. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") max_time_index = len(data.variables[variable][:]) if abs(time_index) > max_time_index: raise ValueError( - f'time_index must be less than the max time index {max_time_index}') + f"time_index must be less than the max time index {max_time_index}" + ) if "mesh2d" in variable: - cords_to_layers = {'mesh2d_face_x mesh2d_face_y': {'name': 'mesh2d_nLayers', - 'coords': data.variables['mesh2d_layer_sigma'][:]}, - 'mesh2d_edge_x mesh2d_edge_y': {'name': 'mesh2d_nInterfaces', - 'coords': data.variables['mesh2d_interface_sigma'][:]}} - - elif str(data.variables[variable].coordinates) == 'FlowElem_xcc FlowElem_ycc': - cords_to_layers = {'FlowElem_xcc FlowElem_ycc': - {'name': 'laydim', - 'coords': data.variables['LayCoord_cc'][:]}, - 'FlowLink_xu FlowLink_yu': {'name': 'wdim', - 'coords': data.variables['LayCoord_w'][:]}} + cords_to_layers = { + "mesh2d_face_x mesh2d_face_y": { + "name": "mesh2d_nLayers", + "coords": data.variables["mesh2d_layer_sigma"][:], + }, + "mesh2d_edge_x mesh2d_edge_y": { + "name": "mesh2d_nInterfaces", + "coords": data.variables["mesh2d_interface_sigma"][:], + }, + } + + elif str(data.variables[variable].coordinates) == "FlowElem_xcc FlowElem_ycc": + cords_to_layers = { + "FlowElem_xcc FlowElem_ycc": { + "name": "laydim", + "coords": data.variables["LayCoord_cc"][:], + }, + "FlowLink_xu FlowLink_yu": { + "name": "wdim", + "coords": data.variables["LayCoord_w"][:], + }, + } else: - cords_to_layers = {'FlowElem_xcc FlowElem_ycc LayCoord_cc LayCoord_cc': - {'name': 'laydim', - 'coords': data.variables['LayCoord_cc'][:]}, - 'FlowLink_xu FlowLink_yu': {'name': 'wdim', - 'coords': data.variables['LayCoord_w'][:]}} + cords_to_layers = { + "FlowElem_xcc FlowElem_ycc LayCoord_cc LayCoord_cc": { + "name": "laydim", + "coords": data.variables["LayCoord_cc"][:], + }, + "FlowLink_xu FlowLink_yu": { + "name": "wdim", + "coords": data.variables["LayCoord_w"][:], + }, + } layer_dim = str(data.variables[variable].coordinates) try: - cord_sys = cords_to_layers[layer_dim]['coords'] + cord_sys = cords_to_layers[layer_dim]["coords"] except: - raise Exception('Coordinates not recognized.') + raise Exception("Coordinates not recognized.") else: layer_percentages = np.ma.getdata(cord_sys, False) @@ -609,16 +658,18 @@ def get_all_data_points(data, variable, time_index=-1, to_pandas=True): v_all = np.append(v_all, layer_data.v) time_all = np.append(time_all, layer_data.time) - index = np.arange(0,len(time_all)) - all_data = xr.Dataset(data_vars={ - 'x': (['index'], x_all), - 'y': (['index'], y_all), - 'waterdepth': (['index'], depth_all), - 'waterlevel': (['index'], water_level_all), - f'{variable}': (['index'], v_all), - 'time': (['index'], time_all)}, - coords={'index': index} - ) + index = np.arange(0, len(time_all)) + all_data = xr.Dataset( + data_vars={ + "x": (["index"], x_all), + "y": (["index"], y_all), + "waterdepth": (["index"], depth_all), + "waterlevel": (["index"], water_level_all), + f"{variable}": (["index"], v_all), + "time": (["index"], time_all), + }, + coords={"index": index}, + ) if to_pandas: all_data = all_data.to_pandas() @@ -626,112 +677,119 @@ def get_all_data_points(data, variable, time_index=-1, to_pandas=True): return all_data -def turbulent_intensity(data, points='cells', time_index=-1, - intermediate_values=False): - ''' - Calculate the turbulent intensity percentage for a given data set for the +def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values=False): + """ + Calculate the turbulent intensity percentage for a given data set for the specified points. Assumes variable names: ucx, ucy, ucz and turkin1. Parameters ---------- - data: NetCDF4 object + data: NetCDF4 object A NetCDF4 object that contains spatial data, e.g. velocity or shear stress, generated by running a Delft3D model. points: string, pd.DataFrame, xr.Dataset - Points to interpolate data onto. + Points to interpolate data onto. 'cells': interpolates all data onto velocity coordinate system (Default). 'faces': interpolates all data onto the TKE coordinate system. - DataFrame of x, y, and z coordinates: Interpolates data onto user - provided points. - time_index: int + DataFrame of x, y, and z coordinates: Interpolates data onto user + provided points. + time_index: int An integer to pull the time step from the dataset. Default is - late time step -1. + late time step -1. intermediate_values: boolean (optional) - If false the function will return position and turbulent intensity values. + If false the function will return position and turbulent intensity values. If true the function will return position(x,y,z) and values need to calculate turbulent intensity (ucx, uxy, uxz and turkin1) in a Dataframe. Default False. Returns ------- TI_data: xr.Dataset or pd.DataFrame - If intermediate_values is true all values are output. - If intermediate_values is equal to false only turbulent_intesity and - x, y, and z variables are output. - x- position in the x direction - y- position in the y direction + If intermediate_values is true all values are output. + If intermediate_values is equal to false only turbulent_intesity and + x, y, and z variables are output. + x- position in the x direction + y- position in the y direction waterdepth- position in the vertical direction turbulet_intesity- turbulent kinetic energy divided by the root mean squared velocity - turkin1- turbulent kinetic energy - ucx- velocity in the x direction - ucy- velocity in the y direction - ucz- velocity in the vertical direction - ''' + turkin1- turbulent kinetic energy + ucx- velocity in the x direction + ucy- velocity in the y direction + ucz- velocity in the vertical direction + """ if not isinstance(points, (str, pd.DataFrame, xr.Dataset)): - raise TypeError('points must be a string, pd.DataFrame, xr.Dataset') + raise TypeError("points must be a string, pd.DataFrame, xr.Dataset") if isinstance(points, str): - if not (points == 'cells' or points == 'faces'): - raise ValueError('points must be cells or faces') + if not (points == "cells" or points == "faces"): + raise ValueError("points must be cells or faces") if not isinstance(time_index, int): - raise TypeError('time_index must be an int') + raise TypeError("time_index must be an int") - max_time_index = data['time'].shape[0] - 1 # to account for zero index + max_time_index = data["time"].shape[0] - 1 # to account for zero index if abs(time_index) > max_time_index: raise ValueError( - f'time_index must be less than the absolute value of the max time index {max_time_index}') + f"time_index must be less than the absolute value of the max time index {max_time_index}" + ) if not isinstance(data, netCDF4._netCDF4.Dataset): - raise TypeError('data must be netCDF4 object') + raise TypeError("data must be netCDF4 object") - for variable in ['turkin1', 'ucx', 'ucy', 'ucz']: + for variable in ["turkin1", "ucx", "ucy", "ucz"]: if variable not in data.variables.keys(): - raise ValueError(f'Variable {variable} not present in Data') + raise ValueError(f"Variable {variable} not present in Data") - TI_vars = ['turkin1', 'ucx', 'ucy', 'ucz'] + TI_vars = ["turkin1", "ucx", "ucy", "ucz"] TI_data_raw = {} for var in TI_vars: var_data_df = get_all_data_points(data, var, time_index) TI_data_raw[var] = var_data_df if type(points) == pd.DataFrame: - print('points provided') - elif points == 'faces': - points = TI_data_raw['turkin1'].drop(['waterlevel', 'turkin1'], axis=1) - elif points == 'cells': - points = TI_data_raw['ucx'].drop(['waterlevel', 'ucx'], axis=1) + print("points provided") + elif points == "faces": + points = TI_data_raw["turkin1"].drop(["waterlevel", "turkin1"], axis=1) + elif points == "cells": + points = TI_data_raw["ucx"].drop(["waterlevel", "ucx"], axis=1) TI_data = points.copy(deep=True) for var in TI_vars: - TI_data[var] = interp.griddata(TI_data_raw[var][['x', 'y', 'waterdepth']], - TI_data_raw[var][var], points[['x', 'y', 'waterdepth']]) + TI_data[var] = interp.griddata( + TI_data_raw[var][["x", "y", "waterdepth"]], + TI_data_raw[var][var], + points[["x", "y", "waterdepth"]], + ) idx = np.where(np.isnan(TI_data[var])) if len(idx[0]): for i in idx[0]: - TI_data[var][i] = interp.griddata(TI_data_raw[var][['x', 'y', 'waterdepth']], - TI_data_raw[var][var], - [points['x'][i], points['y'] - [i], points['waterdepth'][i]], - method='nearest') - - u_mag = unorm(np.array(TI_data['ucx']), np.array(TI_data['ucy']), - np.array(TI_data['ucz'])) - - neg_index = np.where(TI_data['turkin1'] < 0) - zero_bool = np.isclose(TI_data['turkin1'][TI_data['turkin1'] < 0].array, - np.zeros( - len(TI_data['turkin1'][TI_data['turkin1'] < 0].array)), - atol=1.0e-4) + TI_data[var][i] = interp.griddata( + TI_data_raw[var][["x", "y", "waterdepth"]], + TI_data_raw[var][var], + [points["x"][i], points["y"][i], points["waterdepth"][i]], + method="nearest", + ) + + u_mag = unorm( + np.array(TI_data["ucx"]), np.array(TI_data["ucy"]), np.array(TI_data["ucz"]) + ) + + neg_index = np.where(TI_data["turkin1"] < 0) + zero_bool = np.isclose( + TI_data["turkin1"][TI_data["turkin1"] < 0].array, + np.zeros(len(TI_data["turkin1"][TI_data["turkin1"] < 0].array)), + atol=1.0e-4, + ) zero_ind = neg_index[0][zero_bool] non_zero_ind = neg_index[0][~zero_bool] - TI_data.where(zero_ind)['turkin1'] = np.zeros(len(zero_ind)) - TI_data.where(non_zero_ind)['turkin1'] = [np.nan]*len(non_zero_ind) + TI_data.where(zero_ind)["turkin1"] = np.zeros(len(zero_ind)) + TI_data.where(non_zero_ind)["turkin1"] = [np.nan] * len(non_zero_ind) - TI_data['turbulent_intensity'] = np.sqrt( - 2/3*TI_data['turkin1'])/u_mag * 100 # % + TI_data["turbulent_intensity"] = ( + np.sqrt(2 / 3 * TI_data["turkin1"]) / u_mag * 100 + ) # % if intermediate_values == False: TI_data = TI_data.drop(TI_vars, axis=1) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index eb71a0f5a..df586c219 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -8,21 +8,23 @@ def _read_usgs_json(text, to_pandas=True): - data = xr.Dataset - for i in range(len(text['value']['timeSeries'])): + for i in range(len(text["value"]["timeSeries"])): try: - site_name = text['value']['timeSeries'][i]['variable']['variableDescription'] - tmp = text['value']['timeSeries'][i]['values'][0]['value'] + site_name = text["value"]["timeSeries"][i]["variable"][ + "variableDescription" + ] + tmp = text["value"]["timeSeries"][i]["values"][0]["value"] v = [] t = [] - for i in range(0,len(tmp)): - v.append(tmp[i]['value']) - t.append(tmp[i]['dateTime']) + for i in range(0, len(tmp)): + v.append(tmp[i]["value"]) + t.append(tmp[i]["dateTime"]) v = np.asarray(v).astype(float) t = np.asarray(t).astype(np.datetime64) - site_data = xr.Dataset(data_vars = {site_name: (['dateTime'],v)}, - coords = {'dateTime': t}) + site_data = xr.Dataset( + data_vars={site_name: (["dateTime"], v)}, coords={"dateTime": t} + ) data = data.combine_first(site_data) except: pass @@ -47,7 +49,7 @@ def read_usgs_file(file_name, to_pandas=True): Returns ------- data : pandas DataFrame or xarray Dataset - Data indexed by datetime with columns named according to the parameter's + Data indexed by datetime with columns named according to the parameter's variable description """ with open(file_name) as json_file: @@ -59,17 +61,18 @@ def read_usgs_file(file_name, to_pandas=True): def request_usgs_data( - station, - parameter, - start_date, - end_date, - data_type='Daily', - proxy=None, - write_json=None, - clear_cache=False, - to_pandas=True): + station, + parameter, + start_date, + end_date, + data_type="Daily", + proxy=None, + write_json=None, + clear_cache=False, + to_pandas=True, +): """ - Loads USGS data directly from https://waterdata.usgs.gov/nwis using a + Loads USGS data directly from https://waterdata.usgs.gov/nwis using a GET request The request URL prints to the screen. @@ -85,65 +88,80 @@ def request_usgs_data( end_date : str End date in the format 'YYYY-MM-DD' (e.g. '2018-12-31') data_type : str - Data type, options include 'Daily' (return the mean daily value) and + Data type, options include 'Daily' (return the mean daily value) and 'Instantaneous'. proxy : dict or None - To request data from behind a firewall, define a dictionary of proxy settings, + To request data from behind a firewall, define a dictionary of proxy settings, for example {"http": 'localhost:8080'} write_json : str or None Name of json file to write data clear_cache : bool If True, the cache for this specific request will be cleared. to_pandas: bool (optional) - Flag to output pandas instead of xarray. Default = True. + Flag to output pandas instead of xarray. Default = True. Returns ------- data : pandas DataFrame or xarray Dataset - Data indexed by datetime with columns named according to the parameter's + Data indexed by datetime with columns named according to the parameter's variable description """ - if not data_type in ['Daily', 'Instantaneous']: - raise ValueError(f'data_type must be Daily or Instantaneous. Got: {data_type}') + if not data_type in ["Daily", "Instantaneous"]: + raise ValueError(f"data_type must be Daily or Instantaneous. Got: {data_type}") # Define the path to the cache directory - cache_dir = os.path.join(os.path.expanduser("~"), - ".cache", "mhkit", "usgs") + cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "mhkit", "usgs") # Create a unique filename based on the function parameters hash_params = f"{station}_{parameter}_{start_date}_{end_date}_{data_type}" # Use handle_caching to manage cache cached_data, metadata, cache_filepath = handle_caching( - hash_params, cache_dir, write_json, clear_cache) + hash_params, cache_dir, write_json, clear_cache + ) if cached_data is not None: return cached_data # If no cached data, proceed with the API request - if data_type == 'Daily': - data_url = 'https://waterservices.usgs.gov/nwis/dv' - api_query = '/?format=json&sites='+station + \ - '&startDT='+start_date+'&endDT='+end_date + \ - '&statCd=00003' + \ - '¶meterCd='+parameter+'&siteStatus=all' + if data_type == "Daily": + data_url = "https://waterservices.usgs.gov/nwis/dv" + api_query = ( + "/?format=json&sites=" + + station + + "&startDT=" + + start_date + + "&endDT=" + + end_date + + "&statCd=00003" + + "¶meterCd=" + + parameter + + "&siteStatus=all" + ) else: - data_url = 'https://waterservices.usgs.gov/nwis/iv' - api_query = '/?format=json&sites='+station + \ - '&startDT='+start_date+'&endDT='+end_date + \ - '¶meterCd='+parameter+'&siteStatus=all' - - print('Data request URL: ', data_url+api_query) - - response = requests.get(url=data_url+api_query, proxies=proxy) + data_url = "https://waterservices.usgs.gov/nwis/iv" + api_query = ( + "/?format=json&sites=" + + station + + "&startDT=" + + start_date + + "&endDT=" + + end_date + + "¶meterCd=" + + parameter + + "&siteStatus=all" + ) + + print("Data request URL: ", data_url + api_query) + + response = requests.get(url=data_url + api_query, proxies=proxy) text = json.loads(response.text) data = _read_usgs_json(text, to_pandas) # After making the API request and processing the response, write the # response to a cache file - handle_caching(hash_params, cache_dir, data=data, - clear_cache_file=clear_cache) + handle_caching(hash_params, cache_dir, data=data, clear_cache_file=clear_cache) if write_json: shutil.copy(cache_filepath, write_json) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 3eddd44ed..cff18d0d7 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -10,10 +10,10 @@ def Froude_number(v, h, g=9.80665): """ Calculate the Froude Number of the river, channel or duct flow, to check subcritical flow assumption (if Fr <1). - + Parameters ------------ - v : int/float + v : int/float Average velocity [m/s]. h : int/float Mean hydraulic depth float [m]. @@ -26,46 +26,47 @@ def Froude_number(v, h, g=9.80665): Froude Number of the river [unitless]. """ - if not isinstance(v, (int,float)): - raise TypeError(f'v must be of type int or float. Got: {type(v)}') - if not isinstance(h, (int,float)): - raise TypeError(f'h must be of type int or float. Got: {type(h)}') - if not isinstance(g, (int,float)): - raise TypeError(f'g must be of type int or float. Got: {type(g)}') - - Fr = v / np.sqrt( g * h ) - - return Fr + if not isinstance(v, (int, float)): + raise TypeError(f"v must be of type int or float. Got: {type(v)}") + if not isinstance(h, (int, float)): + raise TypeError(f"h must be of type int or float. Got: {type(h)}") + if not isinstance(g, (int, float)): + raise TypeError(f"g must be of type int or float. Got: {type(g)}") + + Fr = v / np.sqrt(g * h) + + return Fr def exceedance_probability(D, dimension="", to_pandas=True): """ Calculates the exceedance probability - + Parameters ---------- D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset Data indexed by time [datetime or s]. dimension: string (optional) - Name of the relevant xarray dimension. If not supplied, + Name of the relevant xarray dimension. If not supplied, defaults to the first dimension. Does not affect pandas input. to_pandas: bool (optional) Flag to output pandas instead of xarray. Default = True. - Returns + Returns ------- - F : pandas DataFrame or xarray Dataset + F : pandas DataFrame or xarray Dataset Exceedance probability [unitless] indexed by time [datetime or s] """ if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): raise TypeError( - f'D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}') + f"D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}" + ) if not isinstance(dimension, str): - raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + raise TypeError(f"dimension must be of type str. Got: {type(dimension)}") if not isinstance(to_pandas, bool): - raise TypeError(f'to_pandas must be of type bool. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") if dimension == "": dimension = list(D.coords)[0] @@ -74,8 +75,8 @@ def exceedance_probability(D, dimension="", to_pandas=True): # Calculate exceedence probability (F) rank = D.rank() - rank = len(D[dimension]) - rank + 1 # convert to descending rank - F = 100 * rank / (len(D[dimension])+1) + rank = len(D[dimension]) - rank + 1 # convert to descending rank + F = 100 * rank / (len(D[dimension]) + 1) if to_pandas: F = F.to_pandas() @@ -103,7 +104,7 @@ def polynomial_fit(x, y, n): List of polynomial coefficients R2 : float Polynomical fit coeffcient of determination - + """ try: x = np.array(x) @@ -114,119 +115,129 @@ def polynomial_fit(x, y, n): except: pass if not isinstance(x, np.ndarray): - raise TypeError(f'x must be of type np.ndarray. Got: {type(x)}') + raise TypeError(f"x must be of type np.ndarray. Got: {type(x)}") if not isinstance(y, np.ndarray): - raise TypeError(f'y must be of type np.ndarray. Got: {type(y)}') + raise TypeError(f"y must be of type np.ndarray. Got: {type(y)}") if not isinstance(n, int): - raise TypeError(f'n must be of type int. Got: {type(n)}') - - # Get coeffcients of polynomial of order n + raise TypeError(f"n must be of type int. Got: {type(n)}") + + # Get coeffcients of polynomial of order n polynomial_coefficients = np.poly1d(np.polyfit(x, y, n)) - + # Calculate the coeffcient of determination - slope, intercept, r_value, p_value, std_err = _linregress(y, polynomial_coefficients(x)) + slope, intercept, r_value, p_value, std_err = _linregress( + y, polynomial_coefficients(x) + ) R2 = r_value**2 - + return polynomial_coefficients, R2 - + def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=True): """ - Calculates velocity given discharge data and the relationship between + Calculates velocity given discharge data and the relationship between discharge and velocity at an individual turbine - + Parameters ------------ D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset Discharge data [m3/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial - List of polynomial coefficients that discribe the relationship between + List of polynomial coefficients that discribe the relationship between discharge and velocity at an individual turbine dimension: string (optional) - Name of the relevant xarray dimension. If not supplied, + Name of the relevant xarray dimension. If not supplied, defaults to the first dimension. Does not affect pandas input. to_pandas: bool (optional) Flag to output pandas instead of xarray. Default = True. - - Returns + + Returns ------------ V: pandas DataFrame or xarray Dataset Velocity [m/s] indexed by time [datetime or s] """ if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): raise TypeError( - f'D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}') + f"D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}" + ) if not isinstance(polynomial_coefficients, np.poly1d): - raise TypeError(f'polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}') + raise TypeError( + f"polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}" + ) if not isinstance(dimension, str): - raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + raise TypeError(f"dimension must be of type str. Got: {type(dimension)}") if not isinstance(to_pandas, bool): - raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') + raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") - D = convert_to_dataset(D, name='V') + D = convert_to_dataset(D, name="V") if dimension == "": dimension = list(D.coords)[0] - + # Calculate velocity using polynomial V = xr.Dataset() for var in list(D.data_vars): vals = polynomial_coefficients(D[var]) V = V.assign(variables={var: ([dimension], vals)}) V = V.assign_coords({dimension: D[dimension]}) - + if to_pandas: V = V.to_pandas() - + return V - -def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out, dimension="", to_pandas=True): + +def velocity_to_power( + V, polynomial_coefficients, cut_in, cut_out, dimension="", to_pandas=True +): """ - Calculates power given velocity data and the relationship + Calculates power given velocity data and the relationship between velocity and power from an individual turbine - + Parameters ---------- V : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset Velocity [m/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial - List of polynomial coefficients that discribe the relationship between + List of polynomial coefficients that discribe the relationship between velocity and power at an individual turbine cut_in: int/float Velocity values below cut_in are not used to compute P cut_out: int/float Velocity values above cut_out are not used to compute P dimension: string (optional) - Name of the relevant xarray dimension. If not supplied, + Name of the relevant xarray dimension. If not supplied, defaults to the first dimension. Does not affect pandas input. to_pandas: bool (optional) Flag to output pandas instead of xarray. Default = True. - - Returns + + Returns ------- P : pandas DataFrame or xarray Dataset Power [W] indexed by time [datetime or s] """ if not isinstance(V, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): raise TypeError( - f'V must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(V)}') + f"V must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(V)}" + ) if not isinstance(polynomial_coefficients, np.poly1d): - raise TypeError(f'polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}') - if not isinstance(cut_in, (int,float)): - raise TypeError(f'cut_in must be of type int or float. Got: {type(cut_in)}') - if not isinstance(cut_out, (int,float)): - raise TypeError(f'cut_out must be of type int or float. Got: {type(cut_out)}') + raise TypeError( + f"polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}" + ) + if not isinstance(cut_in, (int, float)): + raise TypeError(f"cut_in must be of type int or float. Got: {type(cut_in)}") + if not isinstance(cut_out, (int, float)): + raise TypeError(f"cut_out must be of type int or float. Got: {type(cut_out)}") if not isinstance(dimension, str): - raise TypeError(f'dimension must be of type str. Got: {type(dimension)}') + raise TypeError(f"dimension must be of type str. Got: {type(dimension)}") if not isinstance(to_pandas, bool): - raise TypeError(f'to_pandas must be of type str. Got: {type(to_pandas)}') - - V = convert_to_dataset(V, name='P') + raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") + + V = convert_to_dataset(V, name="P") if dimension == "": dimension = list(V.coords)[0] - + # Calculate velocity using polynomial P = xr.Dataset() for var in list(V.data_vars): @@ -234,8 +245,8 @@ def velocity_to_power(V, polynomial_coefficients, cut_in, cut_out, dimension="", vals = polynomial_coefficients(V[var]) # Power for velocity values outside lower and upper bounds Turbine produces 0 power - vals[V[var] < cut_in] = 0. - vals[V[var] > cut_out] = 0. + vals[V[var] < cut_in] = 0.0 + vals[V[var] > cut_out] = 0.0 P = P.assign(variables={var: ([dimension], vals)}) P = P.assign_coords({dimension: V[dimension]}) @@ -250,14 +261,14 @@ def energy_produced(P, seconds): """ Returns the energy produced for a given time period provided exceedence probability and power. - + Parameters ---------- P : pandas Series or xarray DataArray Power [W] indexed by time [datetime or s] seconds: int or float Seconds in the time period of interest - + Returns ------- E : float @@ -265,24 +276,24 @@ def energy_produced(P, seconds): """ if not isinstance(P, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): raise TypeError( - f'P must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(P)}') + f"P must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(P)}" + ) if not isinstance(seconds, (int, float)): - raise TypeError(f'seconds must be of type int or float. Got: {type(seconds)}') + raise TypeError(f"seconds must be of type int or float. Got: {type(seconds)}") P = convert_to_dataset(P) - + # Calculate Histogram of power - H, edges = np.histogram(P, 100 ) + H, edges = np.histogram(P, 100) # Create a distribution - hist_dist = _rv_histogram([H,edges]) + hist_dist = _rv_histogram([H, edges]) # Sample range for pdf - x = np.linspace(edges.min(),edges.max(),1000) + x = np.linspace(edges.min(), edges.max(), 1000) # Calculate the expected value of Power - expected_val_of_power = np.trapz(x*hist_dist.pdf(x),x=x) + expected_val_of_power = np.trapz(x * hist_dist.pdf(x), x=x) # Note: Built-in Expected Value method often throws warning - #EV = hist_dist.expect(lb=edges.min(), ub=edges.max()) + # EV = hist_dist.expect(lb=edges.min(), ub=edges.max()) # Energy - E = seconds * expected_val_of_power - - return E + E = seconds * expected_val_of_power + return E From 747d375ce58b279159bee7d86369488d53013a06 Mon Sep 17 00:00:00 2001 From: akeeste Date: Tue, 20 Feb 2024 14:11:27 -0600 Subject: [PATCH 11/42] black formatting for data_utils.py --- mhkit/utils/data_utils.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 6d9bd83b6..2d7e90611 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -1,60 +1,65 @@ import pandas as pd -import xarray as xr +import xarray as xr -def convert_to_dataset(data, name='data'): + +def convert_to_dataset(data, name="data"): """ Converts the given data to an xarray.Dataset. - + This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series, an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset. - + Parameters ---------- data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset - The data to be converted. - + The data to be converted. + name: str (Optional) The name to assign to the data variable in case the input is an xarray DataArray without a name. Default value is 'data'. - + Returns ------- xarray.Dataset The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset, it is returned as is. - + Examples -------- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) >>> ds = convert_to_dataset(df) >>> type(ds) - + >>> series = pd.Series([1, 2, 3], name='C') >>> ds = convert_to_dataset(series) >>> type(ds) - + >>> data_array = xr.DataArray([1, 2, 3]) >>> ds = convert_to_dataset(data_array, name='D') >>> type(ds) """ if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): - raise TypeError("Input data must be of type pandas.DataFrame, pandas.Series, " - "xarray.DataArray, or xarray.Dataset") + raise TypeError( + "Input data must be of type pandas.DataFrame, pandas.Series, " + "xarray.DataArray, or xarray.Dataset" + ) if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string") + raise TypeError("The 'name' parameter must be a string") - # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or + # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or # xr.Dataset and converts it to xr.Dataset if isinstance(data, (pd.DataFrame, pd.Series)): data = data.to_xarray() if isinstance(data, xr.DataArray): if data.name is None: - data.name = name # xr.DataArray.to_dataset() breaks if the data variable is unnamed + data.name = ( + name # xr.DataArray.to_dataset() breaks if the data variable is unnamed + ) data = data.to_dataset() return data From 41e63426b6420d263ec3e4f43ec4a76a94153506 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 22 Feb 2024 14:33:21 -0600 Subject: [PATCH 12/42] all xarray tests passing --- mhkit/river/graphics.py | 6 ++- mhkit/river/resource.py | 9 +++-- mhkit/tests/river/test_resource.py | 63 ++++++++++++++++++++++++++++++ mhkit/utils/data_utils.py | 7 ++-- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index fdbede6eb..afb894c92 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -1,6 +1,7 @@ import numpy as np import xarray as xr import matplotlib.pyplot as plt +from mhkit.utils import convert_to_dataset def _xy_plot(x, y, fmt=".", label=None, xlabel=None, ylabel=None, title=None, ax=None): @@ -198,12 +199,15 @@ def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None): ax : matplotlib pyplot axes """ + Q = convert_to_dataset(Q) + if time_dimension == "": time_dimension = list(Q.coords)[0] + var = list(Q.keys())[0] ax = _xy_plot( Q.coords[time_dimension].values, - Q, + Q[var], fmt="-", label=label, xlabel="Time", diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index cff18d0d7..15c92ef92 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -68,13 +68,13 @@ def exceedance_probability(D, dimension="", to_pandas=True): if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") + D = convert_to_dataset(D, name="F") + if dimension == "": dimension = list(D.coords)[0] - D = convert_to_dataset(D) - # Calculate exceedence probability (F) - rank = D.rank() + rank = D.rank(dim=dimension) rank = len(D[dimension]) - rank + 1 # convert to descending rank F = 100 * rank / (len(D[dimension]) + 1) @@ -282,9 +282,10 @@ def energy_produced(P, seconds): raise TypeError(f"seconds must be of type int or float. Got: {type(seconds)}") P = convert_to_dataset(P) + var = list(P.keys())[0] # Calculate Histogram of power - H, edges = np.histogram(P, 100) + H, edges = np.histogram(P[var], 100) # Create a distribution hist_dist = _rv_histogram([H, edges]) # Sample range for pdf diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index c97d09b58..8b3a73023 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -2,6 +2,7 @@ import matplotlib.pylab as plt import mhkit.river as river import pandas as pd +import xarray as xr import numpy as np import unittest import os @@ -66,6 +67,17 @@ def test_exceedance_probability(self): self.assertEqual(f.min().values, 10.0) self.assertEqual(f.max().values, 90.0) + def test_exceedance_probability_xarray(self): + # Create arbitrary discharge between 0 and 8(N=9) + Q = xr.DataArray( + data=np.arange(9), dims="index", coords={"index": np.arange(9)} + ) + # if N=9, max F = 100((max(Q)+1)/10) = 90% + # if N=9, min F = 100((min(Q)+1)/10) = 10% + f = river.resource.exceedance_probability(Q) + self.assertEqual(f.min().values, 10.0) + self.assertEqual(f.max().values, 90.0) + def test_exceedance_probability_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame with self.assertRaises(TypeError): @@ -111,6 +123,17 @@ def test_discharge_to_velocity(self): V = river.resource.discharge_to_velocity(Q, p) self.assertAlmostEqual(np.sum(10 * Q - V["V"]), 0.00, places=2) + def test_discharge_to_velocity_xarray(self): + # Create arbitrary discharge between 0 and 8(N=9) + Q = xr.DataArray( + data=np.arange(9), dims="index", coords={"index": np.arange(9)} + ) + # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values + p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) + # Because the polynomial line fits perfect we should expect the V to equal 10*Q + V = river.resource.discharge_to_velocity(Q, p, to_pandas=False) + self.assertAlmostEqual(np.sum(10 * Q - V["V"]).values, 0.00, places=2) + def test_discharge_to_velocity_D_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame polynomial_coefficients = np.poly1d([1, 2]) @@ -142,6 +165,31 @@ def test_velocity_to_power(self): # Middle 10x greater than velocity self.assertAlmostEqual((P["P"][1:-1] - 10 * V["V"][1:-1]).sum(), 0.00, places=2) + def test_velocity_to_power_xarray(self): + # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values + p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) + # Because the polynomial line fits perfect we should expect the V to equal 10*Q + V = river.resource.discharge_to_velocity( + pd.Series(np.arange(9)), p, dimension="", to_pandas=False + ) + # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values + p2, r22 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) + # Set cut in/out to exclude 1 bin on either end of V range + cut_in = V["V"].values[1] + cut_out = V["V"].values[-2] + # Power should be 10x greater and exclude the ends of V + P = river.resource.velocity_to_power( + V["V"], p2, cut_in, cut_out, to_pandas=False + ) + # Cut in power zero + self.assertAlmostEqual(P["P"][0], 0.00, places=2) + # Cut out power zero + self.assertAlmostEqual(P["P"][-1], 0.00, places=2) + # Middle 10x greater than velocity + self.assertAlmostEqual( + (P["P"][1:-1] - 10 * V["V"][1:-1]).sum().values, 0.00, places=2 + ) + def test_velocity_to_power_V_type_error(self): V = "invalid_type" # String instead of pd.Series or pd.DataFrame polynomial_coefficients = np.poly1d([1, 2]) @@ -196,6 +244,21 @@ def test_energy_produced(self): EP2 = river.resource.energy_produced(power_dist, seconds) self.assertAlmostEqual(EP2, mu * seconds, places=1) + def test_energy_produced_xarray(self): + # If power is always X then energy produced with be x*seconds + X = 1 + seconds = 1 + P = xr.DataArray(data=X * np.ones(10)) + EP = river.resource.energy_produced(P, seconds) + self.assertAlmostEqual(EP, X * seconds, places=1) + + # for a normal distribution of Power EP = mean *seconds + mu = 5 + sigma = 1 + power_dist = xr.DataArray(data=np.random.normal(mu, sigma, 10000)) + EP2 = river.resource.energy_produced(power_dist, seconds) + self.assertAlmostEqual(EP2, mu * seconds, places=1) + def test_energy_produced_P_type_error(self): P = "invalid_type" # String instead of pd.Series or pd.DataFrame seconds = 3600 diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 2d7e90611..5c8030cbe 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -56,10 +56,9 @@ def convert_to_dataset(data, name="data"): data = data.to_xarray() if isinstance(data, xr.DataArray): - if data.name is None: - data.name = ( - name # xr.DataArray.to_dataset() breaks if the data variable is unnamed - ) + data.name = ( + name # xr.DataArray.to_dataset() breaks if the data variable is unnamed + ) data = data.to_dataset() return data From 73fbb75579c58243b5ceb1d72685a07479934564 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 22 Feb 2024 15:45:33 -0600 Subject: [PATCH 13/42] add helper function to convert input to xr.DataArray --- mhkit/utils/data_utils.py | 94 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 5c8030cbe..02dc60fc2 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd import xarray as xr @@ -56,9 +57,96 @@ def convert_to_dataset(data, name="data"): data = data.to_xarray() if isinstance(data, xr.DataArray): - data.name = ( - name # xr.DataArray.to_dataset() breaks if the data variable is unnamed - ) + # xr.DataArray.to_dataset() breaks if the data variable is unnamed + if data.name == None: + data.name = name data = data.to_dataset() return data + + +def convert_to_dataArray(data, name="data"): + """ + Converts the given data to an xarray.DataArray. + + This function is designed to handle inputs that can be either a numpy ndarray, pandas Series, + or an xarray DataArray. For convenience, pandas DataFrame and xarray Dataset can also be input + but may only contain a single variable. The function ensures that the output is consistently + an xarray.DataArray. + + Parameters + ---------- + data: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset + The data to be converted. + + name: str (Optional) + The name to overwrite the name of the input data variable for pandas or xarray input. + Default value is 'data'. + + Returns + ------- + xarray.DataArray + The input data converted to an xarray.DataArray. If the input is already an xarray.DataArray, + it is returned as is. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3]}) + >>> da = convert_to_dataArray(df) + >>> type(da) + + + >>> series = pd.Series([1, 2, 3], name='C') + >>> da = convert_to_dataArray(series) + >>> type(da) + + + >>> data_array = xr.DataArray([1, 2, 3]) + >>> da = convert_to_dataArray(data_array, name='D') + >>> type(da) + + """ + if not isinstance( + data, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset) + ): + raise TypeError( + "Input data must be of type pandas.DataFrame, pandas.Series, " + "xarray.DataArray, or xarray.Dataset" + ) + + if not isinstance(name, str): + raise TypeError("The 'name' parameter must be a string") + + # Checks pd.DataFrame input and converts to pd.Series if possible + if isinstance(data, pd.DataFrame): + if data.shape[1] > 1: + raise ValueError( + "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {data.shape[1]}" + ) + else: + data = data.squeeze() + + # Checks xr.Dataset input and converts to xr.DataArray if possible + if isinstance(data, xr.Dataset): + if len(data.keys()) > 1: + raise ValueError( + "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {len(data.keys())}" + ) + else: + data = data.to_array() + + # Converts pd.Series to xr.DataArray + if isinstance(data, pd.Series): + data = data.to_xarray() + + # Converts np.ndarray to xr.DataArray. Assigns a simple 0-based dimension named index + if isinstance(data, np.ndarray): + data = xr.DataArray( + data=data, dims="index", coords={"index": np.arange(len(data))} + ) + + # If there's not data name, add one to prevent issues calling or converting the dataArray later one + if data.name == None: + data.name = name + + return data From 7156db429bf7c851f137a9de4ce57130e43c2947 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 29 Feb 2024 15:46:46 -0600 Subject: [PATCH 14/42] update river to use convert_to_dataArray --- mhkit/river/graphics.py | 7 ++-- mhkit/river/resource.py | 84 +++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 53 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index afb894c92..57e89fd61 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr import matplotlib.pyplot as plt -from mhkit.utils import convert_to_dataset +from mhkit.utils import convert_to_dataArray def _xy_plot(x, y, fmt=".", label=None, xlabel=None, ylabel=None, title=None, ax=None): @@ -199,15 +199,14 @@ def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None): ax : matplotlib pyplot axes """ - Q = convert_to_dataset(Q) + Q = convert_to_dataArray(Q) if time_dimension == "": time_dimension = list(Q.coords)[0] - var = list(Q.keys())[0] ax = _xy_plot( Q.coords[time_dimension].values, - Q[var], + Q, fmt="-", label=label, xlabel="Time", diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 15c92ef92..fc3265250 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -1,9 +1,8 @@ -import pandas as pd import xarray as xr import numpy as np from scipy.stats import linregress as _linregress from scipy.stats import rv_histogram as _rv_histogram -from mhkit.utils import convert_to_dataset +from mhkit.utils import convert_to_dataArray def Froude_number(v, h, g=9.80665): @@ -45,7 +44,7 @@ def exceedance_probability(D, dimension="", to_pandas=True): Parameters ---------- D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset - Data indexed by time [datetime or s]. + Discharge indexed by time [datetime or s]. dimension: string (optional) Name of the relevant xarray dimension. If not supplied, @@ -59,24 +58,21 @@ def exceedance_probability(D, dimension="", to_pandas=True): F : pandas DataFrame or xarray Dataset Exceedance probability [unitless] indexed by time [datetime or s] """ - if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): - raise TypeError( - f"D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}" - ) if not isinstance(dimension, str): raise TypeError(f"dimension must be of type str. Got: {type(dimension)}") if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") - D = convert_to_dataset(D, name="F") + D = convert_to_dataArray(D) if dimension == "": dimension = list(D.coords)[0] - # Calculate exceedence probability (F) + # Calculate exceedance probability (F) rank = D.rank(dim=dimension) rank = len(D[dimension]) - rank + 1 # convert to descending rank F = 100 * rank / (len(D[dimension]) + 1) + F.name = "F" if to_pandas: F = F.to_pandas() @@ -140,10 +136,10 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr Parameters ------------ - D : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset + D : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Discharge data [m3/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial - List of polynomial coefficients that discribe the relationship between + List of polynomial coefficients that describe the relationship between discharge and velocity at an individual turbine dimension: string (optional) Name of the relevant xarray dimension. If not supplied, @@ -156,10 +152,6 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr V: pandas DataFrame or xarray Dataset Velocity [m/s] indexed by time [datetime or s] """ - if not isinstance(D, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): - raise TypeError( - f"D must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(D)}" - ) if not isinstance(polynomial_coefficients, np.poly1d): raise TypeError( f"polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}" @@ -169,17 +161,18 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") - D = convert_to_dataset(D, name="V") + D = convert_to_dataArray(D) if dimension == "": dimension = list(D.coords)[0] # Calculate velocity using polynomial - V = xr.Dataset() - for var in list(D.data_vars): - vals = polynomial_coefficients(D[var]) - V = V.assign(variables={var: ([dimension], vals)}) - V = V.assign_coords({dimension: D[dimension]}) + V = xr.DataArray( + data = polynomial_coefficients(D), + dims = dimension, + coords = {dimension: D[dimension]} + ) + V.name = "V" if to_pandas: V = V.to_pandas() @@ -196,10 +189,10 @@ def velocity_to_power( Parameters ---------- - V : pandas Series, pandas DataFrame, xarray DataArray, or xarray Dataset + V : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Velocity [m/s] indexed by time [datetime or s] polynomial_coefficients : numpy polynomial - List of polynomial coefficients that discribe the relationship between + List of polynomial coefficients that describe the relationship between velocity and power at an individual turbine cut_in: int/float Velocity values below cut_in are not used to compute P @@ -216,10 +209,6 @@ def velocity_to_power( P : pandas DataFrame or xarray Dataset Power [W] indexed by time [datetime or s] """ - if not isinstance(V, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): - raise TypeError( - f"V must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(V)}" - ) if not isinstance(polynomial_coefficients, np.poly1d): raise TypeError( f"polynomial_coefficients must be of type np.poly1d. Got: {type(polynomial_coefficients)}" @@ -233,23 +222,24 @@ def velocity_to_power( if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") - V = convert_to_dataset(V, name="P") + V = convert_to_dataArray(V) if dimension == "": dimension = list(V.coords)[0] # Calculate velocity using polynomial - P = xr.Dataset() - for var in list(V.data_vars): - # Calculate power using transfer function and FDC - vals = polynomial_coefficients(V[var]) - - # Power for velocity values outside lower and upper bounds Turbine produces 0 power - vals[V[var] < cut_in] = 0.0 - vals[V[var] > cut_out] = 0.0 - - P = P.assign(variables={var: ([dimension], vals)}) - P = P.assign_coords({dimension: V[dimension]}) + power = polynomial_coefficients(V) + + # Power for velocity values outside lower and upper bounds Turbine produces 0 power + power[V < cut_in] = 0.0 + power[V > cut_out] = 0.0 + + P = xr.DataArray( + data = power, + dims = dimension, + coords = {dimension: V[dimension]} + ) + P.name = "P" if to_pandas: P = P.to_pandas() @@ -260,11 +250,11 @@ def velocity_to_power( def energy_produced(P, seconds): """ Returns the energy produced for a given time period provided - exceedence probability and power. + exceedance probability and power. Parameters ---------- - P : pandas Series or xarray DataArray + P : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Power [W] indexed by time [datetime or s] seconds: int or float Seconds in the time period of interest @@ -272,20 +262,15 @@ def energy_produced(P, seconds): Returns ------- E : float - Energy [J] produced in the given time frame + Energy [J] produced in the given length of time """ - if not isinstance(P, (pd.Series, pd.DataFrame, xr.DataArray, xr.Dataset)): - raise TypeError( - f"P must be of type pd.Series, pd.DataFrame, xr.DataArray, or xr.Dataset. Got: {type(P)}" - ) if not isinstance(seconds, (int, float)): raise TypeError(f"seconds must be of type int or float. Got: {type(seconds)}") - P = convert_to_dataset(P) - var = list(P.keys())[0] + P = convert_to_dataArray(P) # Calculate Histogram of power - H, edges = np.histogram(P[var], 100) + H, edges = np.histogram(P, 100) # Create a distribution hist_dist = _rv_histogram([H, edges]) # Sample range for pdf @@ -298,3 +283,4 @@ def energy_produced(P, seconds): E = seconds * expected_val_of_power return E + \ No newline at end of file From 05f92cd2d39021b307d012f1af2057a5a523cccc Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 29 Feb 2024 15:47:04 -0600 Subject: [PATCH 15/42] update utils.convert_to_dataArray --- mhkit/utils/__init__.py | 2 +- mhkit/utils/data_utils.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py index 5a4d64a30..423191e1d 100644 --- a/mhkit/utils/__init__.py +++ b/mhkit/utils/__init__.py @@ -8,6 +8,6 @@ ) from .cache import handle_caching, clear_cache from .upcrossing import upcrossing, peaks, troughs, heights, periods, custom -from .data_utils import convert_to_dataset +from .data_utils import convert_to_dataset, convert_to_dataArray _matlab = False # Private variable indicating if mhkit is run through matlab diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 02dc60fc2..07a282477 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -45,11 +45,13 @@ def convert_to_dataset(data, name="data"): if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): raise TypeError( "Input data must be of type pandas.DataFrame, pandas.Series, " - "xarray.DataArray, or xarray.Dataset" + "xarray.DataArray, or xarray.Dataset." + f"Got {type(data)}." ) if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string") + raise TypeError("The 'name' parameter must be a string" + f"Got {type(name)}.") # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or # xr.Dataset and converts it to xr.Dataset @@ -110,7 +112,7 @@ def convert_to_dataArray(data, name="data"): data, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset) ): raise TypeError( - "Input data must be of type pandas.DataFrame, pandas.Series, " + "Input data must be of type np.ndarray, pandas.DataFrame, pandas.Series, " "xarray.DataArray, or xarray.Dataset" ) From f97a7d8ee939ccd522ed704966d5b18815701d8d Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 29 Feb 2024 15:47:24 -0600 Subject: [PATCH 16/42] all river modulel tests passing --- mhkit/tests/river/test_resource.py | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index 8b3a73023..d2480cd98 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -64,8 +64,8 @@ def test_exceedance_probability(self): # if N=9, max F = 100((max(Q)+1)/10) = 90% # if N=9, min F = 100((min(Q)+1)/10) = 10% f = river.resource.exceedance_probability(Q) - self.assertEqual(f.min().values, 10.0) - self.assertEqual(f.max().values, 90.0) + self.assertEqual(f.min(), 10.0) + self.assertEqual(f.max(), 90.0) def test_exceedance_probability_xarray(self): # Create arbitrary discharge between 0 and 8(N=9) @@ -75,8 +75,8 @@ def test_exceedance_probability_xarray(self): # if N=9, max F = 100((max(Q)+1)/10) = 90% # if N=9, min F = 100((min(Q)+1)/10) = 10% f = river.resource.exceedance_probability(Q) - self.assertEqual(f.min().values, 10.0) - self.assertEqual(f.max().values, 90.0) + self.assertEqual(f.min(), 10.0) + self.assertEqual(f.max(), 90.0) def test_exceedance_probability_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame @@ -121,7 +121,7 @@ def test_discharge_to_velocity(self): p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(Q, p) - self.assertAlmostEqual(np.sum(10 * Q - V["V"]), 0.00, places=2) + self.assertAlmostEqual(np.sum(10 * Q - V), 0.00, places=2) def test_discharge_to_velocity_xarray(self): # Create arbitrary discharge between 0 and 8(N=9) @@ -132,7 +132,7 @@ def test_discharge_to_velocity_xarray(self): p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(Q, p, to_pandas=False) - self.assertAlmostEqual(np.sum(10 * Q - V["V"]).values, 0.00, places=2) + self.assertAlmostEqual(np.sum(10 * Q - V).values, 0.00, places=2) def test_discharge_to_velocity_D_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame @@ -154,16 +154,16 @@ def test_velocity_to_power(self): # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values p2, r22 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Set cut in/out to exclude 1 bin on either end of V range - cut_in = V["V"][1] - cut_out = V["V"].iloc[-2] + cut_in = V[1] + cut_out = V.iloc[-2] # Power should be 10x greater and exclude the ends of V - P = river.resource.velocity_to_power(V["V"], p2, cut_in, cut_out) + P = river.resource.velocity_to_power(V, p2, cut_in, cut_out) # Cut in power zero - self.assertAlmostEqual(P["P"][0], 0.00, places=2) + self.assertAlmostEqual(P[0], 0.00, places=2) # Cut out power zero - self.assertAlmostEqual(P["P"].iloc[-1], 0.00, places=2) + self.assertAlmostEqual(P.iloc[-1], 0.00, places=2) # Middle 10x greater than velocity - self.assertAlmostEqual((P["P"][1:-1] - 10 * V["V"][1:-1]).sum(), 0.00, places=2) + self.assertAlmostEqual((P[1:-1] - 10 * V[1:-1]).sum(), 0.00, places=2) def test_velocity_to_power_xarray(self): # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values @@ -175,19 +175,19 @@ def test_velocity_to_power_xarray(self): # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values p2, r22 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Set cut in/out to exclude 1 bin on either end of V range - cut_in = V["V"].values[1] - cut_out = V["V"].values[-2] + cut_in = V.values[1] + cut_out = V.values[-2] # Power should be 10x greater and exclude the ends of V P = river.resource.velocity_to_power( - V["V"], p2, cut_in, cut_out, to_pandas=False + V, p2, cut_in, cut_out, to_pandas=False ) # Cut in power zero - self.assertAlmostEqual(P["P"][0], 0.00, places=2) + self.assertAlmostEqual(P[0], 0.00, places=2) # Cut out power zero - self.assertAlmostEqual(P["P"][-1], 0.00, places=2) + self.assertAlmostEqual(P[-1], 0.00, places=2) # Middle 10x greater than velocity self.assertAlmostEqual( - (P["P"][1:-1] - 10 * V["V"][1:-1]).sum().values, 0.00, places=2 + (P[1:-1] - 10 * V[1:-1]).sum().values, 0.00, places=2 ) def test_velocity_to_power_V_type_error(self): @@ -278,7 +278,7 @@ def test_plot_flow_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_flow_duration_curve(self.data["Q"], f["F"]) + river.graphics.plot_flow_duration_curve(self.data["Q"], f) plt.savefig(filename, format="png") plt.close() @@ -291,7 +291,7 @@ def test_plot_power_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_flow_duration_curve(self.results["P_control"], f["F"]) + river.graphics.plot_flow_duration_curve(self.results["P_control"], f) plt.savefig(filename, format="png") plt.close() @@ -304,7 +304,7 @@ def test_plot_velocity_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_velocity_duration_curve(self.results["V_control"], f["F"]) + river.graphics.plot_velocity_duration_curve(self.results["V_control"], f) plt.savefig(filename, format="png") plt.close() From 4dd3e1cbf68ef95707774e6def69066aba3e8ad8 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 09:50:21 -0600 Subject: [PATCH 17/42] black formatting --- mhkit/river/resource.py | 15 +++++---------- mhkit/tests/river/test_resource.py | 8 ++------ mhkit/utils/data_utils.py | 3 +-- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index fc3265250..2333e04e1 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -168,9 +168,9 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr # Calculate velocity using polynomial V = xr.DataArray( - data = polynomial_coefficients(D), - dims = dimension, - coords = {dimension: D[dimension]} + data=polynomial_coefficients(D), + dims=dimension, + coords={dimension: D[dimension]}, ) V.name = "V" @@ -233,12 +233,8 @@ def velocity_to_power( # Power for velocity values outside lower and upper bounds Turbine produces 0 power power[V < cut_in] = 0.0 power[V > cut_out] = 0.0 - - P = xr.DataArray( - data = power, - dims = dimension, - coords = {dimension: V[dimension]} - ) + + P = xr.DataArray(data=power, dims=dimension, coords={dimension: V[dimension]}) P.name = "P" if to_pandas: @@ -283,4 +279,3 @@ def energy_produced(P, seconds): E = seconds * expected_val_of_power return E - \ No newline at end of file diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index d2480cd98..b1827746a 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -178,17 +178,13 @@ def test_velocity_to_power_xarray(self): cut_in = V.values[1] cut_out = V.values[-2] # Power should be 10x greater and exclude the ends of V - P = river.resource.velocity_to_power( - V, p2, cut_in, cut_out, to_pandas=False - ) + P = river.resource.velocity_to_power(V, p2, cut_in, cut_out, to_pandas=False) # Cut in power zero self.assertAlmostEqual(P[0], 0.00, places=2) # Cut out power zero self.assertAlmostEqual(P[-1], 0.00, places=2) # Middle 10x greater than velocity - self.assertAlmostEqual( - (P[1:-1] - 10 * V[1:-1]).sum().values, 0.00, places=2 - ) + self.assertAlmostEqual((P[1:-1] - 10 * V[1:-1]).sum().values, 0.00, places=2) def test_velocity_to_power_V_type_error(self): V = "invalid_type" # String instead of pd.Series or pd.DataFrame diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 07a282477..4613944f4 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -50,8 +50,7 @@ def convert_to_dataset(data, name="data"): ) if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string" - f"Got {type(name)}.") + raise TypeError("The 'name' parameter must be a string" f"Got {type(name)}.") # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or # xr.Dataset and converts it to xr.Dataset From 033a88ac50516df4d35635074e5467bad6b62756 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 09:59:54 -0600 Subject: [PATCH 18/42] fix typo in river.io.usgs --- mhkit/river/io/usgs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index df586c219..ed442e843 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -8,7 +8,7 @@ def _read_usgs_json(text, to_pandas=True): - data = xr.Dataset + data = xr.Dataset() for i in range(len(text["value"]["timeSeries"])): try: site_name = text["value"]["timeSeries"][i]["variable"][ From 21097dd69643f590c46373d39cc07251e4ef2483 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 11:17:49 -0600 Subject: [PATCH 19/42] update tidal.graphics to use utils.convert_to_dataArray --- mhkit/tidal/graphics.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/mhkit/tidal/graphics.py b/mhkit/tidal/graphics.py index 9ed7c7e76..d97767738 100644 --- a/mhkit/tidal/graphics.py +++ b/mhkit/tidal/graphics.py @@ -1,5 +1,4 @@ import numpy as np -import pandas as pd import bisect from scipy.interpolate import interpn as _interpn from scipy.interpolate import interp1d @@ -7,6 +6,7 @@ from mhkit.river.resource import exceedance_probability from mhkit.tidal.resource import _histogram, _flood_or_ebb from mhkit.river.graphics import plot_velocity_duration_curve, _xy_plot +from mhkit.utils import convert_to_dataArray def _initialize_polar(ax=None, metadata=None, flood=None, ebb=None): @@ -96,15 +96,8 @@ def _check_inputs(directions, velocities, flood, ebb): Direction in degrees added to theta ticks """ - if not isinstance(velocities, (np.ndarray, pd.Series)): - raise TypeError("velocities must be of type np.ndarry or pd.Series") - if isinstance(velocities, np.ndarray): - velocities = pd.Series(velocities) - - if not isinstance(directions, (np.ndarray, pd.Series)): - raise TypeError("directions must be of type np.ndarry or pd.Series") - if isinstance(directions, np.ndarray): - directions = pd.Series(directions) + velocities = convert_to_dataArray(velocities) + directions = convert_to_dataArray(directions) if len(velocities) != len(directions): raise ValueError("velocities and directions must have the same length") From d7f559491b4fe53b7c49723d571d284f2e189f0c Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 11:43:29 -0600 Subject: [PATCH 20/42] tidal.performance to xarray --- mhkit/tidal/performance.py | 104 ++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 20cd8b215..65ae50398 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -2,6 +2,7 @@ import pandas as pd import xarray as xr import warnings +from mhkit.utils import convert_to_dataArray from mhkit import dolfyn from mhkit.river.performance import ( @@ -132,34 +133,6 @@ def _slice_rectangular_capture_area(height, width, hub_height, doppler_cell_size return xr.DataArray(As_slc, coords={"range": A_rng}) -def _check_dtype(var, var_name): - """ - Checks the datatype of a variable, converting pandas Series to xarray DataArray, - or raising an error if the datatype is neither. - - Parameters - ------------- - var: xr.DataArray or pd.Series - The variable to be checked. - - var_name: str - The name of the variable, used for error message. - - Returns - --------- - var: xr.DataArray - The input variable, converted to xr.DataArray if it was a pd.Series. - """ - - if isinstance(var, pd.Series): - var = var.to_xarray() - elif not isinstance(var, xr.DataArray): - raise TypeError( - var_name.capitalize() + " must be of type xr.DataArray or pd.Series" - ) - return var - - def power_curve( power, velocity, @@ -171,6 +144,7 @@ def power_curve( diameter=None, height=None, width=None, + to_pandas=True ): """ Calculates power curve and power statistics for a marine energy @@ -178,9 +152,9 @@ def power_curve( Parameters ------------- - power: pandas.Series or xarray.DataArray (time) + power: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Device power output timeseries. - velocity: pandas.Series or xarray.DataArray ([range,] time) + velocity: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset 1D or 2D streamwise sea water velocity or sea water speed. hub_height: numeric Turbine hub height altitude above the seabed. Assumes ADCP @@ -199,18 +173,20 @@ def power_curve( Required for turbine_profile='rectangular'. Defaults to None. width: numeric, optional Required for turbine_profile='rectangular'. Defaults to None. + to_pandas: bool, optional + Flag to output pandas instead of xarray. Default = True. Returns --------- - pandas.DataFrame + out: pandas DataFrame or xarray Dataset Power-weighted velocity, mean power, power std dev, max and min power vs hub-height velocity. """ # Velocity should be a 2D xarray or pandas array and have dims (range, time) # Power should have a timestamp coordinate/index - power = _check_dtype(power, "power") - velocity = _check_dtype(velocity, "velocity") + power = convert_to_dataArray(power) + velocity = convert_to_dataArray(velocity) if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ @@ -313,18 +289,20 @@ def power_curve( P_bar_max = P_bar_vel.groupby_bins("speed", U_bins).max() P_bar_min = P_bar_vel.groupby_bins("speed", U_bins).min() - out = pd.DataFrame( - ( - U_hub_mean.to_series(), - U_hat_mean.to_series(), - P_bar_mean.to_series(), - P_bar_std.to_series(), - P_bar_max.to_series(), - P_bar_min.to_series(), - ) - ).T - out.columns = ["U_avg", "U_avg_power_weighted", "P_avg", "P_std", "P_max", "P_min"] - out.index.name = "U_bins" + out = xr.Dataset( + { + "U_avg": U_hub_mean, + "U_avg_power_weighted": U_hat_mean, + "P_avg": P_bar_mean, + "P_std": P_bar_std, + "P_max": P_bar_max, + "P_min": P_bar_min + } + ) + out.rename({"speed_bins": "U_bins"}) + + if to_pandas: + out = out.to_pandas() return out @@ -424,7 +402,7 @@ def velocity_profiles( Parameters ------------- - velocity : pandas.Series or xarray.DataArray ([range,] time) + velocity : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset 1D or 2D streamwise sea water velocity or sea water speed. hub_height : numeric Turbine hub height altitude above the seabed. Assumes ADCP depth bins @@ -444,7 +422,7 @@ def velocity_profiles( Average velocity profiles based on ensemble mean velocity. """ - velocity = _check_dtype(velocity, "velocity") + velocity = convert_to_dataArray(velocity, "velocity") if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ @@ -497,15 +475,16 @@ def device_efficiency( hub_height, sampling_frequency, window_avg_time=600, + to_pandas=True ): """ Calculates marine energy device efficiency based on IEC/TS 62600-200 Section 9.7. Parameters ------------- - power : pandas.Series or xarray.DataArray (time) + power : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Device power output timeseries in Watts. - velocity : pandas.Series or xarray.DataArray ([range,] time) + velocity : numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset 1D or 2D streamwise sea water velocity or sea water speed in m/s. water_density : float, pandas.Series or xarray.DataArray Sea water density in kg/m^3. @@ -518,6 +497,8 @@ def device_efficiency( ADCP sampling frequency in Hz. window_avg_time : int, optional Time averaging window in seconds. Defaults to 600. + to_pandas: bool, optional + Flag to output pandas instead of xarray. Default = True. Returns --------- @@ -527,8 +508,8 @@ def device_efficiency( # Velocity should be a 2D xarray or pandas array and have dims (range, time) # Power should have a timestamp coordinate/index - power = _check_dtype(power, "power") - velocity = _check_dtype(velocity, "velocity") + power = convert_to_dataArray(power, "power") + velocity = convert_to_dataArray(velocity, "velocity") if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ @@ -540,7 +521,8 @@ def device_efficiency( time = U["time"].values # Power: Interpolate to velocity timeseries - power = _interpolate_power_to_velocity_timeseries(power, U) + # power = _interpolate_power_to_velocity_timeseries(power, U) TODO - REMOVE, dead code? + power.interp(time=U["time"], method="linear") # Create binner bnr = dolfyn.VelBinner( @@ -565,15 +547,17 @@ def device_efficiency( # Efficiency eta = P_vel / P_resource + + out = xr.Dataset( + { + "U_avg": vel_hub, + "Efficiency": eta + } + ) + out.rename({"speed_bins": "U_bins"}) - out = pd.DataFrame( - ( - vel_hub.to_series(), - eta.to_series(), - ) - ).T - out.columns = ["U_avg", "Efficiency"] - out.index.name = "U_bins" + if to_pandas: + out = out.to_pandas() return out From 10ba5bbb35908e2da73566a47cdc9471feee6a37 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 11:44:47 -0600 Subject: [PATCH 21/42] remove tidal.performance dead code --- mhkit/tidal/performance.py | 41 -------------------------------------- 1 file changed, 41 deletions(-) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 65ae50398..33522737f 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -521,7 +521,6 @@ def device_efficiency( time = U["time"].values # Power: Interpolate to velocity timeseries - # power = _interpolate_power_to_velocity_timeseries(power, U) TODO - REMOVE, dead code? power.interp(time=U["time"], method="linear") # Create binner @@ -562,46 +561,6 @@ def device_efficiency( return out -def _interpolate_power_to_velocity_timeseries(power, U): - """ - Interpolates the power timeseries to match the velocity timeseries time points. - - This function checks if the input power is an xarray DataArray or a pandas Series - with a DatetimeIndex and performs interpolation accordingly. If the input power - does not match either of these types, a warning is issued and the original power - timeseries is returned. - - Parameters - ------------- - power : xarray.DataArray or pandas.Series - The device power output timeseries. - U : xarray.DataArray - 2D streamwise sea water velocity or sea water speed. - - Returns - --------- - xarray.DataArray or pandas.Series - Interpolated power timeseries. - - Raises - --------- - Warning - If the input power is not a xarray DataArray or pandas Series with - a DatetimeIndex, a warning is issued stating that the function assumes the - power timestamps match the velocity timestamps. - """ - - if "xarray" in type(power).__module__: - return power.interp(time=U["time"], method="linear") - elif "pandas" in type(power).__module__ and isinstance( - power.index, pd.DatetimeIndex - ): - return power.to_xarray().interp(time=U["time"], method="linear") - else: - warnings.warn("Assuming `power` timestamps match `velocity` timestamps") - return power - - def _calculate_density(water_density, bnr, mean_hub_vel, time): """ Calculates the averaged density for the given time period. From ed1b87d7c7eea6d55edbcd61891565c547e39596 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 11:47:03 -0600 Subject: [PATCH 22/42] convert tidal.resource to xarray --- mhkit/tidal/performance.py | 2 -- mhkit/tidal/resource.py | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 33522737f..2edaeade1 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -1,7 +1,5 @@ import numpy as np -import pandas as pd import xarray as xr -import warnings from mhkit.utils import convert_to_dataArray from mhkit import dolfyn diff --git a/mhkit/tidal/resource.py b/mhkit/tidal/resource.py index 8e04587cb..12c4f26b1 100644 --- a/mhkit/tidal/resource.py +++ b/mhkit/tidal/resource.py @@ -1,7 +1,7 @@ import numpy as np import math -import pandas as pd from mhkit.river.resource import exceedance_probability, Froude_number +from mhkit.utils import convert_to_dataArray def _histogram(directions, velocities, width_dir, width_vel): @@ -81,7 +81,7 @@ def principal_flow_directions(directions, width_dir): Parameters ---------- - directions: pandas.Series or numpy.ndarray + directions: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset Flow direction in degrees CW from North, from 0 to 360 width_dir: float Width of directional bins for histogram in degrees @@ -97,8 +97,7 @@ def principal_flow_directions(directions, width_dir): ebb based on knowledge of the measurement site. """ - if isinstance(directions, np.ndarray): - directions = pd.Series(directions) + directions = convert_to_dataArray(directions) if any(directions < 0) or any(directions > 360): violating_values = [d for d in directions if d < 0 or d > 360] raise ValueError( From 31d1efd5163e765508b25ab3af86e0e33a56d458 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 13:40:41 -0600 Subject: [PATCH 23/42] tidal tests passing --- mhkit/tidal/graphics.py | 6 +++--- mhkit/tidal/resource.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mhkit/tidal/graphics.py b/mhkit/tidal/graphics.py index d97767738..574ad7500 100644 --- a/mhkit/tidal/graphics.py +++ b/mhkit/tidal/graphics.py @@ -475,9 +475,9 @@ def tidal_phase_exceedance(directions, velocities, flood, ebb, bin_size=0.1, ax= s_ebb = velocities[isEbb] s_flood = velocities[~isEbb] - F = exceedance_probability(velocities)["F"] - F_ebb = exceedance_probability(s_ebb)["F"] - F_flood = exceedance_probability(s_flood)["F"] + F = exceedance_probability(velocities) + F_ebb = exceedance_probability(s_ebb) + F_flood = exceedance_probability(s_flood) decimals = round(bin_size / 0.1) s_new = np.arange( diff --git a/mhkit/tidal/resource.py b/mhkit/tidal/resource.py index 12c4f26b1..0b2ebf144 100644 --- a/mhkit/tidal/resource.py +++ b/mhkit/tidal/resource.py @@ -134,10 +134,11 @@ def principal_flow_directions(directions, width_dir): floodEbbNormalDegree1 = min(floodEbbNormalDegree1, floodEbbNormalDegree2) floodEbbNormalDegree2 = floodEbbNormalDegree1 + 180.0 # Slice directions on the 2 semi circles - d1 = directions[directions.between(floodEbbNormalDegree1, floodEbbNormalDegree2)] - d2 = directions[~directions.between(floodEbbNormalDegree1, floodEbbNormalDegree2)] + mask = (directions>=floodEbbNormalDegree1) & (directions<=floodEbbNormalDegree2) + d1 = directions[mask] + d2 = directions[~mask] # Shift second set of of directions to not break between 360 and 0 - d2 -= 180.0 + d2 -= 180 # Renormalize the points (gets rid of negatives) d2 = _normalize_angle(d2) # Number of bins for semi-circle From 28647a37770b2235c107efbff725b53b49d48b95 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 14:04:10 -0600 Subject: [PATCH 24/42] add option to return noaa data as xarray, add xarray test --- mhkit/tests/tidal/test_io.py | 53 +++++++++++++++++++++++++++++------- mhkit/tidal/io/noaa.py | 40 ++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 17 deletions(-) diff --git a/mhkit/tests/tidal/test_io.py b/mhkit/tests/tidal/test_io.py index eb870e75d..2ccc69c8c 100644 --- a/mhkit/tests/tidal/test_io.py +++ b/mhkit/tests/tidal/test_io.py @@ -39,16 +39,29 @@ def setUpClass(self): def tearDownClass(self): pass - def test_load_noaa_data(self): - """ - Test that the read_noaa_json function reads data from a - JSON file and returns a DataFrame and metadata with the - correct shape and columns. - """ - file_name = join(datadir, "s08010.json") - data, metadata = tidal.io.noaa.read_noaa_json(file_name) - self.assertTrue(np.all(data.columns == ["s", "d", "b"])) - self.assertEqual(data.shape, (18890, 3)) + # def test_load_noaa_data(self): + # """ + # Test that the read_noaa_json function reads data from a + # JSON file and returns a DataFrame and metadata with the + # correct shape and columns. + # """ + # file_name = join(datadir, "s08010.json") + # data, metadata = tidal.io.noaa.read_noaa_json(file_name) + # self.assertTrue(np.all(data.columns == ["s", "d", "b"])) + # self.assertEqual(data.shape, (18890, 3)) + # self.assertEqual(metadata['id'], 's08010') + + # def test_load_noaa_data_xarray(self): + # """ + # Test that the read_noaa_json function reads data from a + # JSON file and returns a DataFrame and metadata with the + # correct shape and columns. + # """ + # file_name = join(datadir, "s08010.json") + # data = tidal.io.noaa.read_noaa_json(file_name, to_pandas=False) + # self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) + # self.assertEqual(len(data['index']), 18890) + # self.assertEqual(data.attrs['id'], 's08010') def test_request_noaa_data_basic(self): """ @@ -66,6 +79,26 @@ def test_request_noaa_data_basic(self): ) self.assertTrue(np.all(data.columns == ["s", "d", "b"])) self.assertEqual(data.shape, (183, 3)) + self.assertEqual(metadata['id'], 's08010') + + def test_request_noaa_data_basic_xarray(self): + """ + Test the request_noaa_data function with basic input parameters + and verify that the returned DataFrame and metadata have the + correct shape and columns. + """ + data = tidal.io.noaa.request_noaa_data( + station="s08010", + parameter="currents", + start_date="20180101", + end_date="20180102", + proxy=None, + write_json=None, + to_pandas=False, + ) + self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) + self.assertEqual(len(data['index']), 183) + self.assertEqual(data.attrs['id'], 's08010') def test_request_noaa_data_write_json(self): """ diff --git a/mhkit/tidal/io/noaa.py b/mhkit/tidal/io/noaa.py index a3236ac9f..892099711 100644 --- a/mhkit/tidal/io/noaa.py +++ b/mhkit/tidal/io/noaa.py @@ -43,6 +43,7 @@ def request_noaa_data( proxy=None, write_json=None, clear_cache=False, + to_pandas=True, ): """ Loads NOAA current data directly from https://tidesandcurrents.noaa.gov/api/ @@ -69,12 +70,17 @@ def request_noaa_data( Name of json file to write data clear_cache : bool If True, the cache for this specific request will be cleared. + to_pandas : bool, optional + Flag to output pandas instead of xarray. Default = True. Returns ------- - data : pandas DataFrame + data : pandas DataFrame or xarray Dataset Data indexed by datetime with columns named according to the parameter's variable description + metadata : dict or None + Request metadata. If returning xarray, metadata is instead attached to + the data's attributes. """ # Type check inputs if not isinstance(station, str): @@ -120,7 +126,12 @@ def request_noaa_data( if cached_data is not None: if write_json: shutil.copy(cache_filepath, write_json) - return cached_data, cached_metadata + if to_pandas: + return cached_data, cached_metadata + else: + cached_data = cached_data.to_xarray() + cached_data.attrs = cached_metadata + return cached_data # If no cached data is available, make the API request # no coverage bc in coverage runs we have already cached the data/ run this code else: # pragma: no cover @@ -182,7 +193,12 @@ def request_noaa_data( if write_json: shutil.copy(cache_filepath, write_json) - return data, metadata + if to_pandas: + return data, metadata + else: + data = data.to_xarray() + data.attrs = metadata + return data def _xml_to_dataframe(response): @@ -223,7 +239,7 @@ def _xml_to_dataframe(response): return df, metadata -def read_noaa_json(filename): +def read_noaa_json(filename, to_pandas=True): """ Returns site DataFrame and metadata from a json saved from the request_noaa_data @@ -231,12 +247,16 @@ def read_noaa_json(filename): ---------- filename: string filename with path of json file to load + to_pandas : bool, optional + Flag to output pandas instead of xarray. Default = True. + Returns ------- data: DataFrame Timeseries Site data of direction and speed - metadata: dictionary - Site metadata + metadata : dictionary or None + Site metadata. If returning xarray, metadata is instead attached to + the data's attributes. """ with open(filename) as outfile: @@ -259,4 +279,10 @@ def read_noaa_json(filename): index=pd.to_datetime(json_data["index"]), columns=json_data["columns"], ) - return data, metadata + + if to_pandas: + return data, metadata + else: + data = data.to_xarray() + data.attrs = metadata + return data From d4a64b1e1dda08a2a78baf673cdfcee4ecef6268 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 14:17:52 -0600 Subject: [PATCH 25/42] add tidal.performance xarray tests, tests passing --- mhkit/tests/tidal/test_io.py | 44 ++++++++--------- mhkit/tests/tidal/test_performance.py | 71 +++++++++++++++++++++++++-- mhkit/tidal/performance.py | 4 +- mhkit/tidal/resource.py | 4 +- 4 files changed, 94 insertions(+), 29 deletions(-) diff --git a/mhkit/tests/tidal/test_io.py b/mhkit/tests/tidal/test_io.py index 2ccc69c8c..b7aa6c3c5 100644 --- a/mhkit/tests/tidal/test_io.py +++ b/mhkit/tests/tidal/test_io.py @@ -39,29 +39,29 @@ def setUpClass(self): def tearDownClass(self): pass - # def test_load_noaa_data(self): - # """ - # Test that the read_noaa_json function reads data from a - # JSON file and returns a DataFrame and metadata with the - # correct shape and columns. - # """ - # file_name = join(datadir, "s08010.json") - # data, metadata = tidal.io.noaa.read_noaa_json(file_name) - # self.assertTrue(np.all(data.columns == ["s", "d", "b"])) - # self.assertEqual(data.shape, (18890, 3)) - # self.assertEqual(metadata['id'], 's08010') + def test_load_noaa_data(self): + """ + Test that the read_noaa_json function reads data from a + JSON file and returns a DataFrame and metadata with the + correct shape and columns. + """ + file_name = join(datadir, "s08010.json") + data, metadata = tidal.io.noaa.read_noaa_json(file_name) + self.assertTrue(np.all(data.columns == ["s", "d", "b"])) + self.assertEqual(data.shape, (18890, 3)) + self.assertEqual(metadata['id'], 's08010') - # def test_load_noaa_data_xarray(self): - # """ - # Test that the read_noaa_json function reads data from a - # JSON file and returns a DataFrame and metadata with the - # correct shape and columns. - # """ - # file_name = join(datadir, "s08010.json") - # data = tidal.io.noaa.read_noaa_json(file_name, to_pandas=False) - # self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) - # self.assertEqual(len(data['index']), 18890) - # self.assertEqual(data.attrs['id'], 's08010') + def test_load_noaa_data_xarray(self): + """ + Test that the read_noaa_json function reads data from a + JSON file and returns a DataFrame and metadata with the + correct shape and columns. + """ + file_name = join(datadir, "s08010.json") + data = tidal.io.noaa.read_noaa_json(file_name, to_pandas=False) + self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) + self.assertEqual(len(data['index']), 18890) + self.assertEqual(data.attrs['id'], 's08010') def test_request_noaa_data_basic(self): """ diff --git a/mhkit/tests/tidal/test_performance.py b/mhkit/tests/tidal/test_performance.py index f1d815db1..5bd4a8bca 100644 --- a/mhkit/tests/tidal/test_performance.py +++ b/mhkit/tests/tidal/test_performance.py @@ -27,9 +27,7 @@ def setUpClass(self): def tearDownClass(self): pass - def test_power_curve( - self, - ): + def test_power_curve(self): df93_circ = performance.power_curve( power=self.power, velocity=self.ds["vel"].sel(dir="streamwise"), @@ -78,6 +76,58 @@ def test_power_curve( assert_allclose(df93_circ.values[-2], test_circ, atol=1e-5) assert_allclose(df93_rect.values[-3], test_rect, atol=1e-5) + + def test_power_curve_xarray(self): + df93_circ = performance.power_curve( + power=self.power, + velocity=self.ds["vel"].sel(dir="streamwise"), + hub_height=4.2, + doppler_cell_size=0.5, + sampling_frequency=1, + window_avg_time=600, + turbine_profile="circular", + diameter=3, + height=None, + width=None, + to_pandas=False, + ) + test_circ = np.array( + [ + 1.26250990e00, + 1.09230978e00, + 1.89122103e05, + 1.03223668e04, + 2.04261423e05, + 1.72095731e05, + ] + ) + + df93_rect = performance.power_curve( + power=self.power, + velocity=self.ds["vel"].sel(dir="streamwise"), + hub_height=4.2, + doppler_cell_size=0.5, + sampling_frequency=1, + window_avg_time=600, + turbine_profile="rectangular", + diameter=None, + height=1, + width=3, + to_pandas=False, + ) + test_rect = np.array( + [ + 1.15032239e00, + 3.75747621e-01, + 1.73098627e05, + 3.04090212e04, + 2.09073742e05, + 1.27430552e05, + ] + ) + + assert_allclose(df93_circ.isel(U_bins=-2).to_dataarray(), test_circ, atol=1e-5) + assert_allclose(df93_rect.isel(U_bins=-3).to_dataarray(), test_rect, atol=1e-5) def test_velocity_profiles(self): df94 = performance.velocity_profiles( @@ -127,6 +177,21 @@ def test_power_efficiency(self): test_df97 = np.array(24.79197) assert_allclose(df97.values[-1, -1], test_df97, atol=1e-5) + def test_power_efficiency_xarray(self): + df97 = performance.device_efficiency( + self.power, + velocity=self.ds["vel"].sel(dir="streamwise"), + water_density=self.ds["water_density"], + capture_area=np.pi * 1.5**2, + hub_height=4.2, + sampling_frequency=1, + window_avg_time=600, + to_pandas=False, + ) + + test_df97 = np.array(24.79197) + assert_allclose(df97['Efficiency'][-1], test_df97, atol=1e-5) + if __name__ == "__main__": unittest.main() diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 2edaeade1..57fe1f377 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -297,7 +297,7 @@ def power_curve( "P_min": P_bar_min } ) - out.rename({"speed_bins": "U_bins"}) + out = out.rename({"speed_bins": "U_bins"}) if to_pandas: out = out.to_pandas() @@ -551,7 +551,7 @@ def device_efficiency( "Efficiency": eta } ) - out.rename({"speed_bins": "U_bins"}) + out = out.rename({"speed_bins": "U_bins"}) if to_pandas: out = out.to_pandas() diff --git a/mhkit/tidal/resource.py b/mhkit/tidal/resource.py index 0b2ebf144..da6682c90 100644 --- a/mhkit/tidal/resource.py +++ b/mhkit/tidal/resource.py @@ -108,7 +108,7 @@ def principal_flow_directions(directions, width_dir): N_dir = int(360 / width_dir) # Compute directional histogram H1, dir_edges = np.histogram(directions, bins=N_dir, range=[0, 360], density=True) - # Convert to perecnt + # Convert to percent H1 = H1 * 100 # [%] # Determine if there are an even or odd number of bins odd = bool(N_dir % 2) @@ -146,7 +146,7 @@ def principal_flow_directions(directions, width_dir): # Compute 1D histograms on both semi circles Hd1, dir1_edges = np.histogram(d1, bins=n_dir, density=True) Hd2, dir2_edges = np.histogram(d2, bins=n_dir, density=True) - # Convert to perecnt + # Convert to percent Hd1 = Hd1 * 100 # [%] Hd2 = Hd2 * 100 # [%] # Principal Directions average of the 2 bins From 3d3ad89b1a40c271083fafb2091c0e613b917a02 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 14:28:25 -0600 Subject: [PATCH 26/42] add parameter validation for to_pandas flag --- mhkit/river/io/usgs.py | 6 ++++++ mhkit/tests/tidal/test_performance.py | 15 +++++++++++++++ mhkit/tidal/io/noaa.py | 4 ++++ mhkit/tidal/performance.py | 15 ++++++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index ed442e843..93e5c1dfc 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -52,6 +52,9 @@ def read_usgs_file(file_name, to_pandas=True): Data indexed by datetime with columns named according to the parameter's variable description """ + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") + with open(file_name) as json_file: text = json.load(json_file) @@ -108,6 +111,9 @@ def request_usgs_data( """ if not data_type in ["Daily", "Instantaneous"]: raise ValueError(f"data_type must be Daily or Instantaneous. Got: {data_type}") + + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") # Define the path to the cache directory cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "mhkit", "usgs") diff --git a/mhkit/tests/tidal/test_performance.py b/mhkit/tests/tidal/test_performance.py index 5bd4a8bca..5969dca8e 100644 --- a/mhkit/tests/tidal/test_performance.py +++ b/mhkit/tests/tidal/test_performance.py @@ -163,6 +163,21 @@ def test_velocity_profiles(self): assert_allclose(df95a.values[1], test_df95a, atol=1e-5) assert_allclose(df95b.values[1], test_df95b, atol=1e-5) + def test_velocity_profiles_xarray(self): + df94 = performance.velocity_profiles( + velocity=self.ds["vel"].sel(dir="streamwise"), + hub_height=4.2, + water_depth=10, + sampling_frequency=1, + window_avg_time=600, + function="mean", + to_pandas=False + ) + + test_df94 = np.array([0.32782955, 0.69326691, 1.00948623]) + + assert_allclose(df94[1], test_df94, atol=1e-5) + def test_power_efficiency(self): df97 = performance.device_efficiency( self.power, diff --git a/mhkit/tidal/io/noaa.py b/mhkit/tidal/io/noaa.py index 892099711..3cc914243 100644 --- a/mhkit/tidal/io/noaa.py +++ b/mhkit/tidal/io/noaa.py @@ -111,6 +111,8 @@ def request_noaa_data( raise TypeError( f"Expected 'clear_cache' to be of type bool, but got {type(clear_cache)}" ) + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") # Define the path to the cache directory cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "mhkit", "noaa") @@ -258,6 +260,8 @@ def read_noaa_json(filename, to_pandas=True): Site metadata. If returning xarray, metadata is instead attached to the data's attributes. """ + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") with open(filename) as outfile: json_data = json.load(outfile) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 57fe1f377..4cc55ea08 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -191,6 +191,9 @@ def power_curve( dimensions of 'time' (temporal) and 'range' (spatial)." ) + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") + # Numeric positive checks numeric_params = [ hub_height, @@ -391,6 +394,7 @@ def velocity_profiles( sampling_frequency, window_avg_time=600, function="mean", + to_pandas=True, ): """ Calculates profiles of the mean, root-mean-square (RMS), or standard @@ -413,6 +417,8 @@ def velocity_profiles( Time averaging window in seconds. Defaults to 600. func : string Function to apply. One of 'mean','rms', or 'std' + to_pandas: bool, optional + Flag to output pandas instead of xarray. Default = True. Returns --------- @@ -429,6 +435,8 @@ def velocity_profiles( if function not in ["mean", "rms", "std"]: raise ValueError("`function` must be one of 'mean', 'rms', or 'std'.") + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") # Streamwise data U = velocity @@ -462,7 +470,10 @@ def velocity_profiles( # Forward fill to surface iec_profiles = iec_profiles.ffill("range", limit=None) - return iec_profiles.to_pandas() + if to_pandas: + iec_profiles = iec_profiles.to_pandas() + + return iec_profiles def device_efficiency( @@ -513,6 +524,8 @@ def device_efficiency( "Velocity should be 2 dimensional and have \ dimensions of 'time' (temporal) and 'range' (spatial)." ) + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") # Streamwise data U = abs(velocity) From e2f691bb09823298452c7fd950f975af86b9b00d Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 16:03:52 -0600 Subject: [PATCH 27/42] add tests for data_utils functions --- mhkit/tests/mooring/test_mooring.py | 6 +- mhkit/tests/utils/test_utils.py | 129 ++++++++++++++++++++++++++++ mhkit/utils/data_utils.py | 2 +- 3 files changed, 133 insertions(+), 4 deletions(-) diff --git a/mhkit/tests/mooring/test_mooring.py b/mhkit/tests/mooring/test_mooring.py index d7c7f7ff2..da11f614c 100644 --- a/mhkit/tests/mooring/test_mooring.py +++ b/mhkit/tests/mooring/test_mooring.py @@ -21,7 +21,7 @@ def test_moordyn_out(self): fpath = join(datadir, "Test.MD.out") inputpath = join(datadir, "TestInput.MD.dat") ds = mooring.io.read_moordyn(fpath, input_file=inputpath) - isinstance(ds, xr.Dataset) + self.assertIsInstance(ds, xr.Dataset) def test_lay_length(self): fpath = join(datadir, "line1_test.nc") @@ -42,7 +42,7 @@ def test_animate_3d(self): zlabel="Depth [m]", title="Mooring Line Example", ) - isinstance(ani, FuncAnimation) + self.assertIsInstance(ani, FuncAnimation) def test_animate_2d(self): dsani = self.ds.sel(Time=slice(0, 10)) @@ -56,7 +56,7 @@ def test_animate_2d(self): ylabel="Depth [m]", title="Mooring Line Example", ) - isinstance(ani2d, FuncAnimation) + self.assertIsInstance(ani2d, FuncAnimation) def test_animate_2d_update(self): ani2d = mooring.graphics.animate( diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index 06a40f9de..9dcbcb441 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -5,6 +5,7 @@ import numpy as np import unittest import json +import xarray as xr testdir = dirname(abspath(__file__)) @@ -159,6 +160,134 @@ def test_magnitude_phase_3D(self): self.assertTrue(all(theta == phase1)) self.assertTrue(all(phi == phase2)) + def convert_to_dataArray(self): + # test data + a = 5 + t = np.arange(0,5,0.5) + i = np.arange(0,11,1) + d1 = i**2/5.0 + d2 = -d1 + + # test data formats + test_n = d1 + test_s = pd.Series(d1,t) + test_df = pd.DataFrame({"d1": d1}, index=t) + test_df2 = pd.DataFrame({"d1": d1, "d1_duplicate": d1}, index=t) + test_da = xr.DataArray( + data = d1, + dims = "time", + coords = dict(time = t), + ) + test_ds = xr.Dataset( + data_vars={"d1": (["time"], d1)}, + coords={"time": t, "index": i} + ) + test_ds2 = xr.Dataset( + data_vars={ + "d1": (["time"], d1), + "d2": (["ind"], d2), + }, + coords={"time": t, "index": i}, + ) + + # numpy + n = utils.convert_to_dataArray(test_n, 'test_data') + self.assertIsInstance(n, xr.DataArray) + self.assertTrue(all(n.data==d1)) + self.assertEqual(n.name, 'test_data') + + # Series + s = utils.convert_to_dataArray(test_s) + self.assertIsInstance(s, xr.DataArray) + self.assertTrue(all(s.data==d1)) + + # DataArray + da = utils.convert_to_dataArray(test_da) + self.assertIsInstance(da, xr.DataArray) + self.assertTrue(all(da.data==d1)) + + # Dataframe + df = utils.convert_to_dataArray(test_df) + self.assertIsInstance(df, xr.DataArray) + self.assertTrue(all(df.data==d1)) + + # Dataset + ds = utils.convert_to_dataArray(test_ds) + self.assertIsInstance(ds, xr.DataArray) + self.assertTrue(all(ds.data==d1)) + + # int (error) + with self.assertRaises(TypeError): + utils.convert_to_dataArray(a) + + # non-string name (error) + with self.assertRaises(TypeError): + utils.convert_to_dataArray(test_n, 5) + + # Multivariate Dataframe (error) + with self.assertRaises(ValueError): + utils.convert_to_dataArray(test_df2) + + # Multivariate Dataset (error) + with self.assertRaises(ValueError): + utils.convert_to_dataArray(test_ds2) + + + def test_convert_to_dataset(self): + # test data + a = 5 + t = np.arange(0,5,0.5) + i = np.arange(0,10,1) + d1 = i**2/5.0 + d2 = -d1 + + # test data formats + test_n = d1 + test_s = pd.Series(d1,t) + test_df2 = pd.DataFrame({"d1": d1, "d2": d2}, index=t) + test_da = xr.DataArray( + data = d1, + dims = "time", + coords = dict(time = t), + ) + test_ds2 = xr.Dataset( + data_vars={ + "d1": (["time"], d1), + "d2": (["ind"], d2), + }, + coords={"time": t, "index": i}, + ) + + # Series + s = utils.convert_to_dataset(test_s) + self.assertIsInstance(s, xr.Dataset) + self.assertTrue(all(s['data'].data==d1)) + + # DataArray with custom name + da = utils.convert_to_dataset(test_da,'test_name') + self.assertIsInstance(da, xr.Dataset) + self.assertTrue(all(da['test_name'].data==d1)) + + # Dataframe + df = utils.convert_to_dataset(test_df2) + self.assertIsInstance(df, xr.Dataset) + self.assertTrue(all(df['d1'].data==d1)) + self.assertTrue(all(df['d2'].data==d2)) + + # Dataset + ds = utils.convert_to_dataset(test_ds2) + self.assertIsInstance(ds, xr.Dataset) + self.assertTrue(all(ds['d1'].data==d1)) + self.assertTrue(all(ds['d2'].data==d2)) + + # int (error) + with self.assertRaises(TypeError): + utils.convert_to_dataset(a) + + # non-string name (error) + with self.assertRaises(TypeError): + utils.convert_to_dataset(test_n, 5) + if __name__ == "__main__": unittest.main() diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py index 4613944f4..063a969e3 100644 --- a/mhkit/utils/data_utils.py +++ b/mhkit/utils/data_utils.py @@ -146,7 +146,7 @@ def convert_to_dataArray(data, name="data"): data=data, dims="index", coords={"index": np.arange(len(data))} ) - # If there's not data name, add one to prevent issues calling or converting the dataArray later one + # If there's no data name, add one to prevent issues calling or converting the dataArray later one if data.name == None: data.name = name From 4bd1ddf98cbf377d7f638d4d2933129f5b23bcc0 Mon Sep 17 00:00:00 2001 From: akeeste Date: Mon, 4 Mar 2024 16:05:10 -0600 Subject: [PATCH 28/42] black formatting --- mhkit/river/io/usgs.py | 2 +- mhkit/tests/tidal/test_io.py | 16 +++--- mhkit/tests/tidal/test_performance.py | 6 +-- mhkit/tests/utils/test_utils.py | 72 +++++++++++++-------------- mhkit/tidal/io/noaa.py | 6 +-- mhkit/tidal/performance.py | 15 ++---- mhkit/tidal/resource.py | 2 +- 7 files changed, 56 insertions(+), 63 deletions(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index 93e5c1dfc..a0eea942c 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -111,7 +111,7 @@ def request_usgs_data( """ if not data_type in ["Daily", "Instantaneous"]: raise ValueError(f"data_type must be Daily or Instantaneous. Got: {data_type}") - + if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") diff --git a/mhkit/tests/tidal/test_io.py b/mhkit/tests/tidal/test_io.py index b7aa6c3c5..6993be815 100644 --- a/mhkit/tests/tidal/test_io.py +++ b/mhkit/tests/tidal/test_io.py @@ -49,8 +49,8 @@ def test_load_noaa_data(self): data, metadata = tidal.io.noaa.read_noaa_json(file_name) self.assertTrue(np.all(data.columns == ["s", "d", "b"])) self.assertEqual(data.shape, (18890, 3)) - self.assertEqual(metadata['id'], 's08010') - + self.assertEqual(metadata["id"], "s08010") + def test_load_noaa_data_xarray(self): """ Test that the read_noaa_json function reads data from a @@ -60,8 +60,8 @@ def test_load_noaa_data_xarray(self): file_name = join(datadir, "s08010.json") data = tidal.io.noaa.read_noaa_json(file_name, to_pandas=False) self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) - self.assertEqual(len(data['index']), 18890) - self.assertEqual(data.attrs['id'], 's08010') + self.assertEqual(len(data["index"]), 18890) + self.assertEqual(data.attrs["id"], "s08010") def test_request_noaa_data_basic(self): """ @@ -79,8 +79,8 @@ def test_request_noaa_data_basic(self): ) self.assertTrue(np.all(data.columns == ["s", "d", "b"])) self.assertEqual(data.shape, (183, 3)) - self.assertEqual(metadata['id'], 's08010') - + self.assertEqual(metadata["id"], "s08010") + def test_request_noaa_data_basic_xarray(self): """ Test the request_noaa_data function with basic input parameters @@ -97,8 +97,8 @@ def test_request_noaa_data_basic_xarray(self): to_pandas=False, ) self.assertTrue(np.all(list(data.variables) == ["index", "s", "d", "b"])) - self.assertEqual(len(data['index']), 183) - self.assertEqual(data.attrs['id'], 's08010') + self.assertEqual(len(data["index"]), 183) + self.assertEqual(data.attrs["id"], "s08010") def test_request_noaa_data_write_json(self): """ diff --git a/mhkit/tests/tidal/test_performance.py b/mhkit/tests/tidal/test_performance.py index 5969dca8e..554480f13 100644 --- a/mhkit/tests/tidal/test_performance.py +++ b/mhkit/tests/tidal/test_performance.py @@ -76,7 +76,7 @@ def test_power_curve(self): assert_allclose(df93_circ.values[-2], test_circ, atol=1e-5) assert_allclose(df93_rect.values[-3], test_rect, atol=1e-5) - + def test_power_curve_xarray(self): df93_circ = performance.power_curve( power=self.power, @@ -171,7 +171,7 @@ def test_velocity_profiles_xarray(self): sampling_frequency=1, window_avg_time=600, function="mean", - to_pandas=False + to_pandas=False, ) test_df94 = np.array([0.32782955, 0.69326691, 1.00948623]) @@ -205,7 +205,7 @@ def test_power_efficiency_xarray(self): ) test_df97 = np.array(24.79197) - assert_allclose(df97['Efficiency'][-1], test_df97, atol=1e-5) + assert_allclose(df97["Efficiency"][-1], test_df97, atol=1e-5) if __name__ == "__main__": diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index 9dcbcb441..1ff252ab6 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -163,24 +163,23 @@ def test_magnitude_phase_3D(self): def convert_to_dataArray(self): # test data a = 5 - t = np.arange(0,5,0.5) - i = np.arange(0,11,1) - d1 = i**2/5.0 + t = np.arange(0, 5, 0.5) + i = np.arange(0, 11, 1) + d1 = i**2 / 5.0 d2 = -d1 - + # test data formats test_n = d1 - test_s = pd.Series(d1,t) + test_s = pd.Series(d1, t) test_df = pd.DataFrame({"d1": d1}, index=t) test_df2 = pd.DataFrame({"d1": d1, "d1_duplicate": d1}, index=t) test_da = xr.DataArray( - data = d1, - dims = "time", - coords = dict(time = t), - ) + data=d1, + dims="time", + coords=dict(time=t), + ) test_ds = xr.Dataset( - data_vars={"d1": (["time"], d1)}, - coords={"time": t, "index": i} + data_vars={"d1": (["time"], d1)}, coords={"time": t, "index": i} ) test_ds2 = xr.Dataset( data_vars={ @@ -188,33 +187,33 @@ def convert_to_dataArray(self): "d2": (["ind"], d2), }, coords={"time": t, "index": i}, - ) + ) # numpy - n = utils.convert_to_dataArray(test_n, 'test_data') + n = utils.convert_to_dataArray(test_n, "test_data") self.assertIsInstance(n, xr.DataArray) - self.assertTrue(all(n.data==d1)) - self.assertEqual(n.name, 'test_data') + self.assertTrue(all(n.data == d1)) + self.assertEqual(n.name, "test_data") # Series s = utils.convert_to_dataArray(test_s) self.assertIsInstance(s, xr.DataArray) - self.assertTrue(all(s.data==d1)) + self.assertTrue(all(s.data == d1)) # DataArray da = utils.convert_to_dataArray(test_da) self.assertIsInstance(da, xr.DataArray) - self.assertTrue(all(da.data==d1)) + self.assertTrue(all(da.data == d1)) # Dataframe df = utils.convert_to_dataArray(test_df) self.assertIsInstance(df, xr.DataArray) - self.assertTrue(all(df.data==d1)) + self.assertTrue(all(df.data == d1)) # Dataset ds = utils.convert_to_dataArray(test_ds) self.assertIsInstance(ds, xr.DataArray) - self.assertTrue(all(ds.data==d1)) + self.assertTrue(all(ds.data == d1)) # int (error) with self.assertRaises(TypeError): @@ -232,53 +231,52 @@ def convert_to_dataArray(self): with self.assertRaises(ValueError): utils.convert_to_dataArray(test_ds2) - def test_convert_to_dataset(self): # test data a = 5 - t = np.arange(0,5,0.5) - i = np.arange(0,10,1) - d1 = i**2/5.0 + t = np.arange(0, 5, 0.5) + i = np.arange(0, 10, 1) + d1 = i**2 / 5.0 d2 = -d1 - + # test data formats test_n = d1 - test_s = pd.Series(d1,t) + test_s = pd.Series(d1, t) test_df2 = pd.DataFrame({"d1": d1, "d2": d2}, index=t) test_da = xr.DataArray( - data = d1, - dims = "time", - coords = dict(time = t), - ) + data=d1, + dims="time", + coords=dict(time=t), + ) test_ds2 = xr.Dataset( data_vars={ "d1": (["time"], d1), "d2": (["ind"], d2), }, coords={"time": t, "index": i}, - ) + ) # Series s = utils.convert_to_dataset(test_s) self.assertIsInstance(s, xr.Dataset) - self.assertTrue(all(s['data'].data==d1)) + self.assertTrue(all(s["data"].data == d1)) # DataArray with custom name - da = utils.convert_to_dataset(test_da,'test_name') + da = utils.convert_to_dataset(test_da, "test_name") self.assertIsInstance(da, xr.Dataset) - self.assertTrue(all(da['test_name'].data==d1)) + self.assertTrue(all(da["test_name"].data == d1)) # Dataframe df = utils.convert_to_dataset(test_df2) self.assertIsInstance(df, xr.Dataset) - self.assertTrue(all(df['d1'].data==d1)) - self.assertTrue(all(df['d2'].data==d2)) + self.assertTrue(all(df["d1"].data == d1)) + self.assertTrue(all(df["d2"].data == d2)) # Dataset ds = utils.convert_to_dataset(test_ds2) self.assertIsInstance(ds, xr.Dataset) - self.assertTrue(all(ds['d1'].data==d1)) - self.assertTrue(all(ds['d2'].data==d2)) + self.assertTrue(all(ds["d1"].data == d1)) + self.assertTrue(all(ds["d2"].data == d2)) # int (error) with self.assertRaises(TypeError): diff --git a/mhkit/tidal/io/noaa.py b/mhkit/tidal/io/noaa.py index 3cc914243..f11820695 100644 --- a/mhkit/tidal/io/noaa.py +++ b/mhkit/tidal/io/noaa.py @@ -79,7 +79,7 @@ def request_noaa_data( Data indexed by datetime with columns named according to the parameter's variable description metadata : dict or None - Request metadata. If returning xarray, metadata is instead attached to + Request metadata. If returning xarray, metadata is instead attached to the data's attributes. """ # Type check inputs @@ -257,7 +257,7 @@ def read_noaa_json(filename, to_pandas=True): data: DataFrame Timeseries Site data of direction and speed metadata : dictionary or None - Site metadata. If returning xarray, metadata is instead attached to + Site metadata. If returning xarray, metadata is instead attached to the data's attributes. """ if not isinstance(to_pandas, bool): @@ -283,7 +283,7 @@ def read_noaa_json(filename, to_pandas=True): index=pd.to_datetime(json_data["index"]), columns=json_data["columns"], ) - + if to_pandas: return data, metadata else: diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 4cc55ea08..9015a4f96 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -142,7 +142,7 @@ def power_curve( diameter=None, height=None, width=None, - to_pandas=True + to_pandas=True, ): """ Calculates power curve and power statistics for a marine energy @@ -297,7 +297,7 @@ def power_curve( "P_avg": P_bar_mean, "P_std": P_bar_std, "P_max": P_bar_max, - "P_min": P_bar_min + "P_min": P_bar_min, } ) out = out.rename({"speed_bins": "U_bins"}) @@ -484,7 +484,7 @@ def device_efficiency( hub_height, sampling_frequency, window_avg_time=600, - to_pandas=True + to_pandas=True, ): """ Calculates marine energy device efficiency based on IEC/TS 62600-200 Section 9.7. @@ -557,13 +557,8 @@ def device_efficiency( # Efficiency eta = P_vel / P_resource - - out = xr.Dataset( - { - "U_avg": vel_hub, - "Efficiency": eta - } - ) + + out = xr.Dataset({"U_avg": vel_hub, "Efficiency": eta}) out = out.rename({"speed_bins": "U_bins"}) if to_pandas: diff --git a/mhkit/tidal/resource.py b/mhkit/tidal/resource.py index da6682c90..76f041e6a 100644 --- a/mhkit/tidal/resource.py +++ b/mhkit/tidal/resource.py @@ -134,7 +134,7 @@ def principal_flow_directions(directions, width_dir): floodEbbNormalDegree1 = min(floodEbbNormalDegree1, floodEbbNormalDegree2) floodEbbNormalDegree2 = floodEbbNormalDegree1 + 180.0 # Slice directions on the 2 semi circles - mask = (directions>=floodEbbNormalDegree1) & (directions<=floodEbbNormalDegree2) + mask = (directions >= floodEbbNormalDegree1) & (directions <= floodEbbNormalDegree2) d1 = directions[mask] d2 = directions[~mask] # Shift second set of of directions to not break between 360 and 0 From 5e922019296f16c37b58a77e08095a44c7241ce2 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 7 Mar 2024 11:43:24 -0600 Subject: [PATCH 29/42] revert river.io.usgs._read_usgs_json to pandas for now, still allow xarray output --- mhkit/river/io/usgs.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py index a0eea942c..54c97966c 100644 --- a/mhkit/river/io/usgs.py +++ b/mhkit/river/io/usgs.py @@ -2,35 +2,32 @@ import json import requests import shutil -import numpy as np -import xarray as xr +import pandas as pd from mhkit.utils.cache import handle_caching def _read_usgs_json(text, to_pandas=True): - data = xr.Dataset() + data = pd.DataFrame() for i in range(len(text["value"]["timeSeries"])): try: site_name = text["value"]["timeSeries"][i]["variable"][ "variableDescription" ] - tmp = text["value"]["timeSeries"][i]["values"][0]["value"] - v = [] - t = [] - for i in range(0, len(tmp)): - v.append(tmp[i]["value"]) - t.append(tmp[i]["dateTime"]) - v = np.asarray(v).astype(float) - t = np.asarray(t).astype(np.datetime64) - site_data = xr.Dataset( - data_vars={site_name: (["dateTime"], v)}, coords={"dateTime": t} + site_data = pd.DataFrame( + text["value"]["timeSeries"][i]["values"][0]["value"] ) + site_data.set_index("dateTime", drop=True, inplace=True) + site_data.index = pd.to_datetime(site_data.index, utc=True) + site_data.rename(columns={"value": site_name}, inplace=True) + site_data[site_name] = pd.to_numeric(site_data[site_name]) + site_data.index.name = None + del site_data["qualifiers"] data = data.combine_first(site_data) except: pass - if to_pandas: - data = data.to_pandas() + if not to_pandas: + data = data.to_dataset() return data @@ -163,7 +160,8 @@ def request_usgs_data( response = requests.get(url=data_url + api_query, proxies=proxy) text = json.loads(response.text) - data = _read_usgs_json(text, to_pandas) + # handle_caching is only set-up for pandas, so force this data to output as pandas for now + data = _read_usgs_json(text, True) # After making the API request and processing the response, write the # response to a cache file @@ -172,4 +170,7 @@ def request_usgs_data( if write_json: shutil.copy(cache_filepath, write_json) + if not to_pandas: + data = data.to_dataset() + return data From 60e1b85131c72f9ad1c13574b785e6e77170c050 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 7 Mar 2024 14:48:36 -0600 Subject: [PATCH 30/42] fix cast to xarray in performance test --- mhkit/tests/tidal/test_performance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mhkit/tests/tidal/test_performance.py b/mhkit/tests/tidal/test_performance.py index 554480f13..43c13b473 100644 --- a/mhkit/tests/tidal/test_performance.py +++ b/mhkit/tests/tidal/test_performance.py @@ -126,8 +126,8 @@ def test_power_curve_xarray(self): ] ) - assert_allclose(df93_circ.isel(U_bins=-2).to_dataarray(), test_circ, atol=1e-5) - assert_allclose(df93_rect.isel(U_bins=-3).to_dataarray(), test_rect, atol=1e-5) + assert_allclose(df93_circ.isel(U_bins=-2).to_array(), test_circ, atol=1e-5) + assert_allclose(df93_rect.isel(U_bins=-3).to_array(), test_rect, atol=1e-5) def test_velocity_profiles(self): df94 = performance.velocity_profiles( From e6a70f216d617c70e8e15447dc870c65d205d6e4 Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 7 Mar 2024 15:40:18 -0600 Subject: [PATCH 31/42] fix create_points function after simplifying and converting to xarray --- mhkit/river/io/d3d.py | 56 +++++++++++++++++++------------- mhkit/tests/river/test_io_d3d.py | 8 ++--- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/mhkit/river/io/d3d.py b/mhkit/river/io/d3d.py index ce7580cde..be9330976 100644 --- a/mhkit/river/io/d3d.py +++ b/mhkit/river/io/d3d.py @@ -410,9 +410,9 @@ def create_points(x, y, waterdepth, to_pandas=True): if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") - x_array_like = ~isinstance(x, (int, float)) - y_array_like = ~isinstance(y, (int, float)) - waterdepth_array_like = ~isinstance(waterdepth, (int, float)) + x_array_like = not isinstance(x, (int, float)) + y_array_like = not isinstance(y, (int, float)) + waterdepth_array_like = not isinstance(waterdepth, (int, float)) if x_array_like and y_array_like and waterdepth_array_like: # if all inputs are arrays, grid the coordinate and waterdepth @@ -420,8 +420,8 @@ def create_points(x, y, waterdepth, to_pandas=True): y_grid = y_grid.ravel() waterdepth_grid = waterdepth_grid.ravel() - _, mask = np.where(y == y_grid) - x_grid = x[mask] + x_grid, _ = np.meshgrid(x, waterdepth) + x_grid = x_grid.ravel() else: # if at least one input is a point, grid all inputs x_grid, y_grid, waterdepth_grid = np.meshgrid(x, y, waterdepth) @@ -454,7 +454,7 @@ def variable_interpolation( x_min_lim=float("-inf"), y_max_lim=float("inf"), y_min_lim=float("-inf"), - to_pandas=False, + to_pandas=True, ): """ Interpolate multiple variables from the Delft3D onto the same points. @@ -491,8 +491,8 @@ def variable_interpolation( f"points must be a string, pd.DataFrame, or xr.Dataset. Got {type(points)}." ) - if isinstance(points, pd.DataFrame): - points = points.to_xarray() + if isinstance(points, xr.Dataset): + points = points.to_pandas() if isinstance(points, str): if not (points == "cells" or points == "faces"): @@ -508,16 +508,15 @@ def variable_interpolation( data_raw = {} for var in variables: - var_data = get_all_data_points(data, var, time_index=-1, to_pandas=False) - var_data["depth"] = var_data.waterdepth - var_data.waterlevel # added - - # var_data = var_data.loc[:, var_data.T.duplicated(keep='first')] - var_data = var_data.where(var_data.x > x_min_lim, drop=True) - var_data = var_data.where(var_data.x < x_max_lim, drop=True) - var_data = var_data.where(var_data.y > y_min_lim, drop=True) - var_data = var_data.where(var_data.y < y_max_lim, drop=True) - data_raw[var] = var_data - if isinstance(points, xr.Dataset): + var_data_df = get_all_data_points(data, var, time_index=-1, to_pandas=True) + var_data_df["depth"] = var_data_df.waterdepth - var_data_df.waterlevel # added + var_data_df = var_data_df.loc[:, ~var_data_df.T.duplicated(keep="first")] + var_data_df = var_data_df[var_data_df.x > x_min_lim] + var_data_df = var_data_df[var_data_df.x < x_max_lim] + var_data_df = var_data_df[var_data_df.y > y_min_lim] + var_data_df = var_data_df[var_data_df.y < y_max_lim] + data_raw[var] = var_data_df + if isinstance(points, pd.DataFrame): print("points provided") elif points == "faces": points = data_raw["ucx"][["x", "y", "waterdepth"]] @@ -544,8 +543,8 @@ def variable_interpolation( method="nearest", ) - if to_pandas: - transformed_data = transformed_data.to_pandas() + if not to_pandas: + transformed_data = transformed_data.to_dataset() return transformed_data @@ -677,7 +676,7 @@ def get_all_data_points(data, variable, time_index=-1, to_pandas=True): return all_data -def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values=False): +def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values=False, to_pandas=True): """ Calculate the turbulent intensity percentage for a given data set for the specified points. Assumes variable names: ucx, ucy, ucz and turkin1. @@ -700,6 +699,8 @@ def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values If false the function will return position and turbulent intensity values. If true the function will return position(x,y,z) and values need to calculate turbulent intensity (ucx, uxy, uxz and turkin1) in a Dataframe. Default False. + to_pandas : bool (optional) + Flag to output pandas instead of xarray. Default = True. Returns ------- @@ -728,6 +729,12 @@ def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values if not isinstance(time_index, int): raise TypeError("time_index must be an int") + if not isinstance(to_pandas, bool): + raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") + + if isinstance(points, xr.Dataset): + points = points.to_pandas() + max_time_index = data["time"].shape[0] - 1 # to account for zero index if abs(time_index) > max_time_index: raise ValueError( @@ -784,8 +791,8 @@ def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values ) zero_ind = neg_index[0][zero_bool] non_zero_ind = neg_index[0][~zero_bool] - TI_data.where(zero_ind)["turkin1"] = np.zeros(len(zero_ind)) - TI_data.where(non_zero_ind)["turkin1"] = [np.nan] * len(non_zero_ind) + TI_data.loc[zero_ind, "turkin1"] = np.zeros(len(zero_ind)) + TI_data.loc[non_zero_ind, "turkin1"] = [np.nan] * len(non_zero_ind) TI_data["turbulent_intensity"] = ( np.sqrt(2 / 3 * TI_data["turkin1"]) / u_mag * 100 @@ -794,4 +801,7 @@ def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values if intermediate_values == False: TI_data = TI_data.drop(TI_vars, axis=1) + if not to_pandas: + TI_data = TI_data.to_dataset() + return TI_data diff --git a/mhkit/tests/river/test_io_d3d.py b/mhkit/tests/river/test_io_d3d.py index ba981e169..41a93c166 100644 --- a/mhkit/tests/river/test_io_d3d.py +++ b/mhkit/tests/river/test_io_d3d.py @@ -1,4 +1,4 @@ -from os.path import abspath, dirname, join, isfile, normpath, relpath +from os.path import abspath, dirname, join, normpath from numpy.testing import assert_array_almost_equal import scipy.interpolate as interp import mhkit.river as river @@ -106,7 +106,7 @@ def test_create_points_two_arrays_one_point(self): """ result = river.io.d3d.create_points(np.array([1, 2]), np.array([3]), 4) expected = pd.DataFrame({"x": [1, 2], "y": [3, 3], "waterdepth": [4, 4]}) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) + pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) def test_create_points_user_made_two_arrays_one_point(self): """ @@ -152,7 +152,7 @@ def test_create_points_mixed_data_types(self): {"x": [1, 2, 1, 2], "y": [3, 4, 3, 4], "waterdepth": [5, 5, 6, 6]} ) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) + pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) def test_create_points_array_like_inputs(self): """ @@ -163,7 +163,7 @@ def test_create_points_array_like_inputs(self): {"x": [1, 2, 1, 2], "y": [3, 4, 3, 4], "waterdepth": [5, 5, 6, 6]} ) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) + pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) def test_variable_interpolation(self): data = self.d3d_flume_data From 2e88ca483eb2abce76481761f80160b927a918aa Mon Sep 17 00:00:00 2001 From: akeeste Date: Thu, 7 Mar 2024 15:44:39 -0600 Subject: [PATCH 32/42] black formatting --- mhkit/river/io/d3d.py | 4 +++- mhkit/tests/river/test_io_d3d.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/mhkit/river/io/d3d.py b/mhkit/river/io/d3d.py index be9330976..19a61df62 100644 --- a/mhkit/river/io/d3d.py +++ b/mhkit/river/io/d3d.py @@ -676,7 +676,9 @@ def get_all_data_points(data, variable, time_index=-1, to_pandas=True): return all_data -def turbulent_intensity(data, points="cells", time_index=-1, intermediate_values=False, to_pandas=True): +def turbulent_intensity( + data, points="cells", time_index=-1, intermediate_values=False, to_pandas=True +): """ Calculate the turbulent intensity percentage for a given data set for the specified points. Assumes variable names: ucx, ucy, ucz and turkin1. diff --git a/mhkit/tests/river/test_io_d3d.py b/mhkit/tests/river/test_io_d3d.py index 41a93c166..c095854d2 100644 --- a/mhkit/tests/river/test_io_d3d.py +++ b/mhkit/tests/river/test_io_d3d.py @@ -106,7 +106,9 @@ def test_create_points_two_arrays_one_point(self): """ result = river.io.d3d.create_points(np.array([1, 2]), np.array([3]), 4) expected = pd.DataFrame({"x": [1, 2], "y": [3, 3], "waterdepth": [4, 4]}) - pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) + pd.testing.assert_frame_equal( + result, expected, check_dtype=False, check_names=False + ) def test_create_points_user_made_two_arrays_one_point(self): """ @@ -152,7 +154,9 @@ def test_create_points_mixed_data_types(self): {"x": [1, 2, 1, 2], "y": [3, 4, 3, 4], "waterdepth": [5, 5, 6, 6]} ) - pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) + pd.testing.assert_frame_equal( + result, expected, check_dtype=False, check_names=False + ) def test_create_points_array_like_inputs(self): """ @@ -163,7 +167,9 @@ def test_create_points_array_like_inputs(self): {"x": [1, 2, 1, 2], "y": [3, 4, 3, 4], "waterdepth": [5, 5, 6, 6]} ) - pd.testing.assert_frame_equal(result, expected, check_dtype=False, check_names=False) + pd.testing.assert_frame_equal( + result, expected, check_dtype=False, check_names=False + ) def test_variable_interpolation(self): data = self.d3d_flume_data From 8938b6ea978b5eeae404d8c583c9bd22d3471a2f Mon Sep 17 00:00:00 2001 From: akeeste Date: Fri, 8 Mar 2024 10:57:34 -0600 Subject: [PATCH 33/42] fix output format for some river resource functions --- mhkit/river/resource.py | 6 +++++ mhkit/tests/river/test_resource.py | 42 +++++++++++++++--------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 2333e04e1..8b3c0faee 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -73,6 +73,8 @@ def exceedance_probability(D, dimension="", to_pandas=True): rank = len(D[dimension]) - rank + 1 # convert to descending rank F = 100 * rank / (len(D[dimension]) + 1) F.name = "F" + + F = F.to_dataset() # for matlab if to_pandas: F = F.to_pandas() @@ -173,6 +175,8 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr coords={dimension: D[dimension]}, ) V.name = "V" + + V = V.to_dataset() # for matlab if to_pandas: V = V.to_pandas() @@ -236,6 +240,8 @@ def velocity_to_power( P = xr.DataArray(data=power, dims=dimension, coords={dimension: V[dimension]}) P.name = "P" + + P = P.to_dataset() if to_pandas: P = P.to_pandas() diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index b1827746a..2d920c997 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -64,8 +64,8 @@ def test_exceedance_probability(self): # if N=9, max F = 100((max(Q)+1)/10) = 90% # if N=9, min F = 100((min(Q)+1)/10) = 10% f = river.resource.exceedance_probability(Q) - self.assertEqual(f.min(), 10.0) - self.assertEqual(f.max(), 90.0) + self.assertEqual(f.min().values, 10.0) + self.assertEqual(f.max().values, 90.0) def test_exceedance_probability_xarray(self): # Create arbitrary discharge between 0 and 8(N=9) @@ -75,8 +75,8 @@ def test_exceedance_probability_xarray(self): # if N=9, max F = 100((max(Q)+1)/10) = 90% # if N=9, min F = 100((min(Q)+1)/10) = 10% f = river.resource.exceedance_probability(Q) - self.assertEqual(f.min(), 10.0) - self.assertEqual(f.max(), 90.0) + self.assertEqual(f.min().values, 10.0) + self.assertEqual(f.max().values, 90.0) def test_exceedance_probability_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame @@ -121,7 +121,7 @@ def test_discharge_to_velocity(self): p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(Q, p) - self.assertAlmostEqual(np.sum(10 * Q - V), 0.00, places=2) + self.assertAlmostEqual(np.sum(10 * Q - V["V"]), 0.00, places=2) def test_discharge_to_velocity_xarray(self): # Create arbitrary discharge between 0 and 8(N=9) @@ -132,7 +132,7 @@ def test_discharge_to_velocity_xarray(self): p, r2 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Because the polynomial line fits perfect we should expect the V to equal 10*Q V = river.resource.discharge_to_velocity(Q, p, to_pandas=False) - self.assertAlmostEqual(np.sum(10 * Q - V).values, 0.00, places=2) + self.assertAlmostEqual(np.sum(10 * Q - V["V"]).values, 0.00, places=2) def test_discharge_to_velocity_D_type_error(self): D = "invalid_type" # String instead of pd.Series or pd.DataFrame @@ -154,16 +154,16 @@ def test_velocity_to_power(self): # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values p2, r22 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Set cut in/out to exclude 1 bin on either end of V range - cut_in = V[1] - cut_out = V.iloc[-2] + cut_in = V["V"][1] + cut_out = V["V"].iloc[-2] # Power should be 10x greater and exclude the ends of V - P = river.resource.velocity_to_power(V, p2, cut_in, cut_out) + P = river.resource.velocity_to_power(V["V"], p2, cut_in, cut_out) # Cut in power zero - self.assertAlmostEqual(P[0], 0.00, places=2) + self.assertAlmostEqual(P["P"][0], 0.00, places=2) # Cut out power zero - self.assertAlmostEqual(P.iloc[-1], 0.00, places=2) + self.assertAlmostEqual(P["P"].iloc[-1], 0.00, places=2) # Middle 10x greater than velocity - self.assertAlmostEqual((P[1:-1] - 10 * V[1:-1]).sum(), 0.00, places=2) + self.assertAlmostEqual((P["P"][1:-1] - 10 * V["V"][1:-1]).sum(), 0.00, places=2) def test_velocity_to_power_xarray(self): # Calculate a first order polynomial on an DV_Curve x=y line 10 times greater than the Q values @@ -175,16 +175,16 @@ def test_velocity_to_power_xarray(self): # Calculate a first order polynomial on an VP_Curve x=y line 10 times greater than the V values p2, r22 = river.resource.polynomial_fit(np.arange(9), 10 * np.arange(9), 1) # Set cut in/out to exclude 1 bin on either end of V range - cut_in = V.values[1] - cut_out = V.values[-2] + cut_in = V["V"].values[1] + cut_out = V["V"].values[-2] # Power should be 10x greater and exclude the ends of V - P = river.resource.velocity_to_power(V, p2, cut_in, cut_out, to_pandas=False) + P = river.resource.velocity_to_power(V["V"], p2, cut_in, cut_out, to_pandas=False) # Cut in power zero - self.assertAlmostEqual(P[0], 0.00, places=2) + self.assertAlmostEqual(P["P"][0], 0.00, places=2) # Cut out power zero - self.assertAlmostEqual(P[-1], 0.00, places=2) + self.assertAlmostEqual(P["P"][-1], 0.00, places=2) # Middle 10x greater than velocity - self.assertAlmostEqual((P[1:-1] - 10 * V[1:-1]).sum().values, 0.00, places=2) + self.assertAlmostEqual((P["P"][1:-1] - 10 * V["V"][1:-1]).sum().values, 0.00, places=2) def test_velocity_to_power_V_type_error(self): V = "invalid_type" # String instead of pd.Series or pd.DataFrame @@ -274,7 +274,7 @@ def test_plot_flow_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_flow_duration_curve(self.data["Q"], f) + river.graphics.plot_flow_duration_curve(self.data["Q"], f["F"]) plt.savefig(filename, format="png") plt.close() @@ -287,7 +287,7 @@ def test_plot_power_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_flow_duration_curve(self.results["P_control"], f) + river.graphics.plot_flow_duration_curve(self.results["P_control"], f["F"]) plt.savefig(filename, format="png") plt.close() @@ -300,7 +300,7 @@ def test_plot_velocity_duration_curve(self): f = river.resource.exceedance_probability(self.data.Q) plt.figure() - river.graphics.plot_velocity_duration_curve(self.results["V_control"], f) + river.graphics.plot_velocity_duration_curve(self.results["V_control"], f["F"]) plt.savefig(filename, format="png") plt.close() From 0e672cca2e4c6cd10d431cbe90626152a739fd0e Mon Sep 17 00:00:00 2001 From: akeeste Date: Fri, 8 Mar 2024 10:59:52 -0600 Subject: [PATCH 34/42] black formatting --- mhkit/river/resource.py | 10 +++++----- mhkit/tests/river/test_resource.py | 8 ++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 8b3c0faee..9234f3935 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -73,8 +73,8 @@ def exceedance_probability(D, dimension="", to_pandas=True): rank = len(D[dimension]) - rank + 1 # convert to descending rank F = 100 * rank / (len(D[dimension]) + 1) F.name = "F" - - F = F.to_dataset() # for matlab + + F = F.to_dataset() # for matlab if to_pandas: F = F.to_pandas() @@ -175,8 +175,8 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr coords={dimension: D[dimension]}, ) V.name = "V" - - V = V.to_dataset() # for matlab + + V = V.to_dataset() # for matlab if to_pandas: V = V.to_pandas() @@ -240,7 +240,7 @@ def velocity_to_power( P = xr.DataArray(data=power, dims=dimension, coords={dimension: V[dimension]}) P.name = "P" - + P = P.to_dataset() if to_pandas: diff --git a/mhkit/tests/river/test_resource.py b/mhkit/tests/river/test_resource.py index 2d920c997..8b3a73023 100644 --- a/mhkit/tests/river/test_resource.py +++ b/mhkit/tests/river/test_resource.py @@ -178,13 +178,17 @@ def test_velocity_to_power_xarray(self): cut_in = V["V"].values[1] cut_out = V["V"].values[-2] # Power should be 10x greater and exclude the ends of V - P = river.resource.velocity_to_power(V["V"], p2, cut_in, cut_out, to_pandas=False) + P = river.resource.velocity_to_power( + V["V"], p2, cut_in, cut_out, to_pandas=False + ) # Cut in power zero self.assertAlmostEqual(P["P"][0], 0.00, places=2) # Cut out power zero self.assertAlmostEqual(P["P"][-1], 0.00, places=2) # Middle 10x greater than velocity - self.assertAlmostEqual((P["P"][1:-1] - 10 * V["V"][1:-1]).sum().values, 0.00, places=2) + self.assertAlmostEqual( + (P["P"][1:-1] - 10 * V["V"][1:-1]).sum().values, 0.00, places=2 + ) def test_velocity_to_power_V_type_error(self): V = "invalid_type" # String instead of pd.Series or pd.DataFrame From e103d738681589be6f5279c763b807f287d8ae57 Mon Sep 17 00:00:00 2001 From: akeeste Date: Fri, 8 Mar 2024 14:19:16 -0600 Subject: [PATCH 35/42] fix final call to exceedance_probability --- mhkit/tidal/graphics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mhkit/tidal/graphics.py b/mhkit/tidal/graphics.py index 574ad7500..d97767738 100644 --- a/mhkit/tidal/graphics.py +++ b/mhkit/tidal/graphics.py @@ -475,9 +475,9 @@ def tidal_phase_exceedance(directions, velocities, flood, ebb, bin_size=0.1, ax= s_ebb = velocities[isEbb] s_flood = velocities[~isEbb] - F = exceedance_probability(velocities) - F_ebb = exceedance_probability(s_ebb) - F_flood = exceedance_probability(s_flood) + F = exceedance_probability(velocities)["F"] + F_ebb = exceedance_probability(s_ebb)["F"] + F_flood = exceedance_probability(s_flood)["F"] decimals = round(bin_size / 0.1) s_new = np.arange( From f55ed3d2376fe3d15d5821a85a7a4f8a1ef93399 Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 12:03:14 -0500 Subject: [PATCH 36/42] move convert_to_dataset, convert_to_dataarray to type_handling --- mhkit/utils/__init__.py | 2 +- mhkit/utils/data_utils.py | 153 ----------------------------------- mhkit/utils/type_handling.py | 150 ++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 154 deletions(-) delete mode 100644 mhkit/utils/data_utils.py diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py index 423191e1d..3be7cee04 100644 --- a/mhkit/utils/__init__.py +++ b/mhkit/utils/__init__.py @@ -8,6 +8,6 @@ ) from .cache import handle_caching, clear_cache from .upcrossing import upcrossing, peaks, troughs, heights, periods, custom -from .data_utils import convert_to_dataset, convert_to_dataArray +from .type_handling import to_numeric_array, convert_to_dataset, convert_to_dataArray _matlab = False # Private variable indicating if mhkit is run through matlab diff --git a/mhkit/utils/data_utils.py b/mhkit/utils/data_utils.py deleted file mode 100644 index 063a969e3..000000000 --- a/mhkit/utils/data_utils.py +++ /dev/null @@ -1,153 +0,0 @@ -import numpy as np -import pandas as pd -import xarray as xr - - -def convert_to_dataset(data, name="data"): - """ - Converts the given data to an xarray.Dataset. - - This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series, - an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset. - - Parameters - ---------- - data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset - The data to be converted. - - name: str (Optional) - The name to assign to the data variable in case the input is an xarray DataArray without a name. - Default value is 'data'. - - Returns - ------- - xarray.Dataset - The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset, - it is returned as is. - - Examples - -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) - >>> ds = convert_to_dataset(df) - >>> type(ds) - - - >>> series = pd.Series([1, 2, 3], name='C') - >>> ds = convert_to_dataset(series) - >>> type(ds) - - - >>> data_array = xr.DataArray([1, 2, 3]) - >>> ds = convert_to_dataset(data_array, name='D') - >>> type(ds) - - """ - if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): - raise TypeError( - "Input data must be of type pandas.DataFrame, pandas.Series, " - "xarray.DataArray, or xarray.Dataset." - f"Got {type(data)}." - ) - - if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string" f"Got {type(name)}.") - - # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or - # xr.Dataset and converts it to xr.Dataset - if isinstance(data, (pd.DataFrame, pd.Series)): - data = data.to_xarray() - - if isinstance(data, xr.DataArray): - # xr.DataArray.to_dataset() breaks if the data variable is unnamed - if data.name == None: - data.name = name - data = data.to_dataset() - - return data - - -def convert_to_dataArray(data, name="data"): - """ - Converts the given data to an xarray.DataArray. - - This function is designed to handle inputs that can be either a numpy ndarray, pandas Series, - or an xarray DataArray. For convenience, pandas DataFrame and xarray Dataset can also be input - but may only contain a single variable. The function ensures that the output is consistently - an xarray.DataArray. - - Parameters - ---------- - data: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset - The data to be converted. - - name: str (Optional) - The name to overwrite the name of the input data variable for pandas or xarray input. - Default value is 'data'. - - Returns - ------- - xarray.DataArray - The input data converted to an xarray.DataArray. If the input is already an xarray.DataArray, - it is returned as is. - - Examples - -------- - >>> df = pd.DataFrame({'A': [1, 2, 3]}) - >>> da = convert_to_dataArray(df) - >>> type(da) - - - >>> series = pd.Series([1, 2, 3], name='C') - >>> da = convert_to_dataArray(series) - >>> type(da) - - - >>> data_array = xr.DataArray([1, 2, 3]) - >>> da = convert_to_dataArray(data_array, name='D') - >>> type(da) - - """ - if not isinstance( - data, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset) - ): - raise TypeError( - "Input data must be of type np.ndarray, pandas.DataFrame, pandas.Series, " - "xarray.DataArray, or xarray.Dataset" - ) - - if not isinstance(name, str): - raise TypeError("The 'name' parameter must be a string") - - # Checks pd.DataFrame input and converts to pd.Series if possible - if isinstance(data, pd.DataFrame): - if data.shape[1] > 1: - raise ValueError( - "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {data.shape[1]}" - ) - else: - data = data.squeeze() - - # Checks xr.Dataset input and converts to xr.DataArray if possible - if isinstance(data, xr.Dataset): - if len(data.keys()) > 1: - raise ValueError( - "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {len(data.keys())}" - ) - else: - data = data.to_array() - - # Converts pd.Series to xr.DataArray - if isinstance(data, pd.Series): - data = data.to_xarray() - - # Converts np.ndarray to xr.DataArray. Assigns a simple 0-based dimension named index - if isinstance(data, np.ndarray): - data = xr.DataArray( - data=data, dims="index", coords={"index": np.arange(len(data))} - ) - - # If there's no data name, add one to prevent issues calling or converting the dataArray later one - if data.name == None: - data.name = name - - return data diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py index 844850b2d..5946877a5 100644 --- a/mhkit/utils/type_handling.py +++ b/mhkit/utils/type_handling.py @@ -21,3 +21,153 @@ def to_numeric_array(data, name): ) ) return data + + +def convert_to_dataset(data, name="data"): + """ + Converts the given data to an xarray.Dataset. + + This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series, + an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset. + + Parameters + ---------- + data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset + The data to be converted. + + name: str (Optional) + The name to assign to the data variable in case the input is an xarray DataArray without a name. + Default value is 'data'. + + Returns + ------- + xarray.Dataset + The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset, + it is returned as is. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + >>> ds = convert_to_dataset(df) + >>> type(ds) + + + >>> series = pd.Series([1, 2, 3], name='C') + >>> ds = convert_to_dataset(series) + >>> type(ds) + + + >>> data_array = xr.DataArray([1, 2, 3]) + >>> ds = convert_to_dataset(data_array, name='D') + >>> type(ds) + + """ + if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)): + raise TypeError( + "Input data must be of type pandas.DataFrame, pandas.Series, " + "xarray.DataArray, or xarray.Dataset." + f"Got {type(data)}." + ) + + if not isinstance(name, str): + raise TypeError("The 'name' parameter must be a string" f"Got {type(name)}.") + + # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or + # xr.Dataset and converts it to xr.Dataset + if isinstance(data, (pd.DataFrame, pd.Series)): + data = data.to_xarray() + + if isinstance(data, xr.DataArray): + # xr.DataArray.to_dataset() breaks if the data variable is unnamed + if data.name == None: + data.name = name + data = data.to_dataset() + + return data + + +def convert_to_dataArray(data, name="data"): + """ + Converts the given data to an xarray.DataArray. + + This function is designed to handle inputs that can be either a numpy ndarray, pandas Series, + or an xarray DataArray. For convenience, pandas DataFrame and xarray Dataset can also be input + but may only contain a single variable. The function ensures that the output is consistently + an xarray.DataArray. + + Parameters + ---------- + data: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset + The data to be converted. + + name: str (Optional) + The name to overwrite the name of the input data variable for pandas or xarray input. + Default value is 'data'. + + Returns + ------- + xarray.DataArray + The input data converted to an xarray.DataArray. If the input is already an xarray.DataArray, + it is returned as is. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3]}) + >>> da = convert_to_dataArray(df) + >>> type(da) + + + >>> series = pd.Series([1, 2, 3], name='C') + >>> da = convert_to_dataArray(series) + >>> type(da) + + + >>> data_array = xr.DataArray([1, 2, 3]) + >>> da = convert_to_dataArray(data_array, name='D') + >>> type(da) + + """ + if not isinstance( + data, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset) + ): + raise TypeError( + "Input data must be of type np.ndarray, pandas.DataFrame, pandas.Series, " + "xarray.DataArray, or xarray.Dataset" + ) + + if not isinstance(name, str): + raise TypeError("The 'name' parameter must be a string") + + # Checks pd.DataFrame input and converts to pd.Series if possible + if isinstance(data, pd.DataFrame): + if data.shape[1] > 1: + raise ValueError( + "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {data.shape[1]}" + ) + else: + data = data.squeeze() + + # Checks xr.Dataset input and converts to xr.DataArray if possible + if isinstance(data, xr.Dataset): + if len(data.keys()) > 1: + raise ValueError( + "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {len(data.keys())}" + ) + else: + data = data.to_array() + + # Converts pd.Series to xr.DataArray + if isinstance(data, pd.Series): + data = data.to_xarray() + + # Converts np.ndarray to xr.DataArray. Assigns a simple 0-based dimension named index + if isinstance(data, np.ndarray): + data = xr.DataArray( + data=data, dims="index", coords={"index": np.arange(len(data))} + ) + + # If there's no data name, add one to prevent issues calling or converting the dataArray later one + if data.name == None: + data.name = name + + return data From 0db0cf0f59693071a29d918e8871786f74f2ef7f Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 13:15:54 -0500 Subject: [PATCH 37/42] replace out variables with descriptive name --- mhkit/tidal/performance.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 9015a4f96..f9e466cfc 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -176,7 +176,7 @@ def power_curve( Returns --------- - out: pandas DataFrame or xarray Dataset + device_power_curve: pandas DataFrame or xarray Dataset Power-weighted velocity, mean power, power std dev, max and min power vs hub-height velocity. """ @@ -290,7 +290,7 @@ def power_curve( P_bar_max = P_bar_vel.groupby_bins("speed", U_bins).max() P_bar_min = P_bar_vel.groupby_bins("speed", U_bins).min() - out = xr.Dataset( + device_power_curve = xr.Dataset( { "U_avg": U_hub_mean, "U_avg_power_weighted": U_hat_mean, @@ -300,12 +300,12 @@ def power_curve( "P_min": P_bar_min, } ) - out = out.rename({"speed_bins": "U_bins"}) + device_power_curve = device_power_curve.rename({"speed_bins": "U_bins"}) if to_pandas: - out = out.to_pandas() + device_power_curve = device_power_curve.to_pandas() - return out + return device_power_curve def _average_velocity_bins(U, U_hub, bin_size): @@ -324,7 +324,7 @@ def _average_velocity_bins(U, U_hub, bin_size): Returns --------- - xarray.DataArray + U_binned: xarray.DataArray Data grouped into velocity bins. """ @@ -332,10 +332,10 @@ def _average_velocity_bins(U, U_hub, bin_size): U_bins = np.arange(0, np.nanmax(U_hub) + bin_size, bin_size) # Group time-ensembles into velocity bins based on hub-height velocity and average - out = U.assign_coords({"time": U_hub}).rename({"time": "speed"}) - out = out.groupby_bins("speed", U_bins).mean() + U_binned = U.assign_coords({"time": U_hub}).rename({"time": "speed"}) + U_binned = U_binned.groupby_bins("speed", U_bins).mean() - return out + return U_binned def _apply_function(function, bnr, U): @@ -511,7 +511,7 @@ def device_efficiency( Returns --------- - pandas.Series + device_eta : pandas.Series or xarray.DataArray Device efficiency (power coefficient) in percent. """ @@ -558,13 +558,13 @@ def device_efficiency( # Efficiency eta = P_vel / P_resource - out = xr.Dataset({"U_avg": vel_hub, "Efficiency": eta}) - out = out.rename({"speed_bins": "U_bins"}) + device_eta = xr.Dataset({"U_avg": vel_hub, "Efficiency": eta}) + device_eta = device_eta.rename({"speed_bins": "U_bins"}) if to_pandas: - out = out.to_pandas() + device_eta = device_eta.to_pandas() - return out + return device_eta def _calculate_density(water_density, bnr, mean_hub_vel, time): From 4768ad93ce5beac2b4545be37e65b8ea8388cfb9 Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 13:17:04 -0500 Subject: [PATCH 38/42] correct test function name --- mhkit/tests/utils/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index 1ff252ab6..9b39250c5 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -160,7 +160,7 @@ def test_magnitude_phase_3D(self): self.assertTrue(all(theta == phase1)) self.assertTrue(all(phi == phase2)) - def convert_to_dataArray(self): + def test_convert_to_dataArray(self): # test data a = 5 t = np.arange(0, 5, 0.5) From 5b7b796b0f158e1e72c56654b05be2681e94cbc5 Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 13:22:36 -0500 Subject: [PATCH 39/42] correct case on convert_to_dataArray --- mhkit/river/graphics.py | 4 ++-- mhkit/river/resource.py | 10 +++++----- mhkit/tests/utils/test_utils.py | 20 ++++++++++---------- mhkit/tidal/graphics.py | 6 +++--- mhkit/tidal/performance.py | 12 ++++++------ mhkit/tidal/resource.py | 4 ++-- mhkit/utils/__init__.py | 2 +- mhkit/utils/type_handling.py | 8 ++++---- 8 files changed, 33 insertions(+), 33 deletions(-) diff --git a/mhkit/river/graphics.py b/mhkit/river/graphics.py index 57e89fd61..396ce1271 100644 --- a/mhkit/river/graphics.py +++ b/mhkit/river/graphics.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr import matplotlib.pyplot as plt -from mhkit.utils import convert_to_dataArray +from mhkit.utils import convert_to_dataarray def _xy_plot(x, y, fmt=".", label=None, xlabel=None, ylabel=None, title=None, ax=None): @@ -199,7 +199,7 @@ def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None): ax : matplotlib pyplot axes """ - Q = convert_to_dataArray(Q) + Q = convert_to_dataarray(Q) if time_dimension == "": time_dimension = list(Q.coords)[0] diff --git a/mhkit/river/resource.py b/mhkit/river/resource.py index 9234f3935..2a0e06ffd 100644 --- a/mhkit/river/resource.py +++ b/mhkit/river/resource.py @@ -2,7 +2,7 @@ import numpy as np from scipy.stats import linregress as _linregress from scipy.stats import rv_histogram as _rv_histogram -from mhkit.utils import convert_to_dataArray +from mhkit.utils import convert_to_dataarray def Froude_number(v, h, g=9.80665): @@ -63,7 +63,7 @@ def exceedance_probability(D, dimension="", to_pandas=True): if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}") - D = convert_to_dataArray(D) + D = convert_to_dataarray(D) if dimension == "": dimension = list(D.coords)[0] @@ -163,7 +163,7 @@ def discharge_to_velocity(D, polynomial_coefficients, dimension="", to_pandas=Tr if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") - D = convert_to_dataArray(D) + D = convert_to_dataarray(D) if dimension == "": dimension = list(D.coords)[0] @@ -226,7 +226,7 @@ def velocity_to_power( if not isinstance(to_pandas, bool): raise TypeError(f"to_pandas must be of type str. Got: {type(to_pandas)}") - V = convert_to_dataArray(V) + V = convert_to_dataarray(V) if dimension == "": dimension = list(V.coords)[0] @@ -269,7 +269,7 @@ def energy_produced(P, seconds): if not isinstance(seconds, (int, float)): raise TypeError(f"seconds must be of type int or float. Got: {type(seconds)}") - P = convert_to_dataArray(P) + P = convert_to_dataarray(P) # Calculate Histogram of power H, edges = np.histogram(P, 100) diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index 9b39250c5..d8486568c 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -160,7 +160,7 @@ def test_magnitude_phase_3D(self): self.assertTrue(all(theta == phase1)) self.assertTrue(all(phi == phase2)) - def test_convert_to_dataArray(self): + def test_convert_to_dataarray(self): # test data a = 5 t = np.arange(0, 5, 0.5) @@ -190,46 +190,46 @@ def test_convert_to_dataArray(self): ) # numpy - n = utils.convert_to_dataArray(test_n, "test_data") + n = utils.convert_to_dataarray(test_n, "test_data") self.assertIsInstance(n, xr.DataArray) self.assertTrue(all(n.data == d1)) self.assertEqual(n.name, "test_data") # Series - s = utils.convert_to_dataArray(test_s) + s = utils.convert_to_dataarray(test_s) self.assertIsInstance(s, xr.DataArray) self.assertTrue(all(s.data == d1)) # DataArray - da = utils.convert_to_dataArray(test_da) + da = utils.convert_to_dataarray(test_da) self.assertIsInstance(da, xr.DataArray) self.assertTrue(all(da.data == d1)) # Dataframe - df = utils.convert_to_dataArray(test_df) + df = utils.convert_to_dataarray(test_df) self.assertIsInstance(df, xr.DataArray) self.assertTrue(all(df.data == d1)) # Dataset - ds = utils.convert_to_dataArray(test_ds) + ds = utils.convert_to_dataarray(test_ds) self.assertIsInstance(ds, xr.DataArray) self.assertTrue(all(ds.data == d1)) # int (error) with self.assertRaises(TypeError): - utils.convert_to_dataArray(a) + utils.convert_to_dataarray(a) # non-string name (error) with self.assertRaises(TypeError): - utils.convert_to_dataArray(test_n, 5) + utils.convert_to_dataarray(test_n, 5) # Multivariate Dataframe (error) with self.assertRaises(ValueError): - utils.convert_to_dataArray(test_df2) + utils.convert_to_dataarray(test_df2) # Multivariate Dataset (error) with self.assertRaises(ValueError): - utils.convert_to_dataArray(test_ds2) + utils.convert_to_dataarray(test_ds2) def test_convert_to_dataset(self): # test data diff --git a/mhkit/tidal/graphics.py b/mhkit/tidal/graphics.py index d97767738..0483f2080 100644 --- a/mhkit/tidal/graphics.py +++ b/mhkit/tidal/graphics.py @@ -6,7 +6,7 @@ from mhkit.river.resource import exceedance_probability from mhkit.tidal.resource import _histogram, _flood_or_ebb from mhkit.river.graphics import plot_velocity_duration_curve, _xy_plot -from mhkit.utils import convert_to_dataArray +from mhkit.utils import convert_to_dataarray def _initialize_polar(ax=None, metadata=None, flood=None, ebb=None): @@ -96,8 +96,8 @@ def _check_inputs(directions, velocities, flood, ebb): Direction in degrees added to theta ticks """ - velocities = convert_to_dataArray(velocities) - directions = convert_to_dataArray(directions) + velocities = convert_to_dataarray(velocities) + directions = convert_to_dataarray(directions) if len(velocities) != len(directions): raise ValueError("velocities and directions must have the same length") diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index f9e466cfc..7ea260e0b 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -1,6 +1,6 @@ import numpy as np import xarray as xr -from mhkit.utils import convert_to_dataArray +from mhkit.utils import convert_to_dataarray from mhkit import dolfyn from mhkit.river.performance import ( @@ -183,8 +183,8 @@ def power_curve( # Velocity should be a 2D xarray or pandas array and have dims (range, time) # Power should have a timestamp coordinate/index - power = convert_to_dataArray(power) - velocity = convert_to_dataArray(velocity) + power = convert_to_dataarray(power) + velocity = convert_to_dataarray(velocity) if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ @@ -426,7 +426,7 @@ def velocity_profiles( Average velocity profiles based on ensemble mean velocity. """ - velocity = convert_to_dataArray(velocity, "velocity") + velocity = convert_to_dataarray(velocity, "velocity") if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ @@ -517,8 +517,8 @@ def device_efficiency( # Velocity should be a 2D xarray or pandas array and have dims (range, time) # Power should have a timestamp coordinate/index - power = convert_to_dataArray(power, "power") - velocity = convert_to_dataArray(velocity, "velocity") + power = convert_to_dataarray(power, "power") + velocity = convert_to_dataarray(velocity, "velocity") if len(velocity.shape) != 2: raise ValueError( "Velocity should be 2 dimensional and have \ diff --git a/mhkit/tidal/resource.py b/mhkit/tidal/resource.py index 76f041e6a..e6b6d21c4 100644 --- a/mhkit/tidal/resource.py +++ b/mhkit/tidal/resource.py @@ -1,7 +1,7 @@ import numpy as np import math from mhkit.river.resource import exceedance_probability, Froude_number -from mhkit.utils import convert_to_dataArray +from mhkit.utils import convert_to_dataarray def _histogram(directions, velocities, width_dir, width_vel): @@ -97,7 +97,7 @@ def principal_flow_directions(directions, width_dir): ebb based on knowledge of the measurement site. """ - directions = convert_to_dataArray(directions) + directions = convert_to_dataarray(directions) if any(directions < 0) or any(directions > 360): violating_values = [d for d in directions if d < 0 or d > 360] raise ValueError( diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py index 3be7cee04..298509fa9 100644 --- a/mhkit/utils/__init__.py +++ b/mhkit/utils/__init__.py @@ -8,6 +8,6 @@ ) from .cache import handle_caching, clear_cache from .upcrossing import upcrossing, peaks, troughs, heights, periods, custom -from .type_handling import to_numeric_array, convert_to_dataset, convert_to_dataArray +from .type_handling import to_numeric_array, convert_to_dataset, convert_to_dataarray _matlab = False # Private variable indicating if mhkit is run through matlab diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py index 5946877a5..3c9a91a0d 100644 --- a/mhkit/utils/type_handling.py +++ b/mhkit/utils/type_handling.py @@ -86,7 +86,7 @@ def convert_to_dataset(data, name="data"): return data -def convert_to_dataArray(data, name="data"): +def convert_to_dataarray(data, name="data"): """ Converts the given data to an xarray.DataArray. @@ -113,17 +113,17 @@ def convert_to_dataArray(data, name="data"): Examples -------- >>> df = pd.DataFrame({'A': [1, 2, 3]}) - >>> da = convert_to_dataArray(df) + >>> da = convert_to_dataarray(df) >>> type(da) >>> series = pd.Series([1, 2, 3], name='C') - >>> da = convert_to_dataArray(series) + >>> da = convert_to_dataarray(series) >>> type(da) >>> data_array = xr.DataArray([1, 2, 3]) - >>> da = convert_to_dataArray(data_array, name='D') + >>> da = convert_to_dataarray(data_array, name='D') >>> type(da) """ From 151f77d5fc4f9a47bd4683c04de0e7b58ed794d7 Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 13:23:42 -0500 Subject: [PATCH 40/42] update return variable name in velocity_profiles --- mhkit/tidal/performance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mhkit/tidal/performance.py b/mhkit/tidal/performance.py index 7ea260e0b..3a516bec7 100644 --- a/mhkit/tidal/performance.py +++ b/mhkit/tidal/performance.py @@ -422,7 +422,7 @@ def velocity_profiles( Returns --------- - pandas.DataFrame + iec_profiles: pandas.DataFrame Average velocity profiles based on ensemble mean velocity. """ From 8b9b32d83c95023d90cdfccf0d427e593b1c52f5 Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 14:30:56 -0500 Subject: [PATCH 41/42] update handling of dataset to dataarray --- mhkit/tests/utils/test_utils.py | 4 ++-- mhkit/utils/type_handling.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index d8486568c..cebbbd6f5 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -163,8 +163,8 @@ def test_magnitude_phase_3D(self): def test_convert_to_dataarray(self): # test data a = 5 - t = np.arange(0, 5, 0.5) - i = np.arange(0, 11, 1) + t = np.arange(0., 5., 0.5) + i = np.arange(0., 10., 1) d1 = i**2 / 5.0 d2 = -d1 diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py index 3c9a91a0d..4f4f33d02 100644 --- a/mhkit/utils/type_handling.py +++ b/mhkit/utils/type_handling.py @@ -149,12 +149,14 @@ def convert_to_dataarray(data, name="data"): # Checks xr.Dataset input and converts to xr.DataArray if possible if isinstance(data, xr.Dataset): - if len(data.keys()) > 1: + keys = list(data.keys()) + if len(keys) > 1: raise ValueError( "If the input data is a pd.DataFrame or xr.Dataset, it must contain one variable. Got {len(data.keys())}" ) else: data = data.to_array() + data = data.sel(variable=keys[0]) # removes the variable dimension, further simplifying the dataarray # Converts pd.Series to xr.DataArray if isinstance(data, pd.Series): From 6c84e2083d0e938006d53d94dd4a7a5dc39f613b Mon Sep 17 00:00:00 2001 From: akeeste Date: Wed, 13 Mar 2024 14:32:36 -0500 Subject: [PATCH 42/42] black formatting --- mhkit/tests/utils/test_utils.py | 4 ++-- mhkit/utils/type_handling.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py index cebbbd6f5..315d0de19 100644 --- a/mhkit/tests/utils/test_utils.py +++ b/mhkit/tests/utils/test_utils.py @@ -163,8 +163,8 @@ def test_magnitude_phase_3D(self): def test_convert_to_dataarray(self): # test data a = 5 - t = np.arange(0., 5., 0.5) - i = np.arange(0., 10., 1) + t = np.arange(0.0, 5.0, 0.5) + i = np.arange(0.0, 10.0, 1) d1 = i**2 / 5.0 d2 = -d1 diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py index 4f4f33d02..5c6ce89c2 100644 --- a/mhkit/utils/type_handling.py +++ b/mhkit/utils/type_handling.py @@ -156,7 +156,9 @@ def convert_to_dataarray(data, name="data"): ) else: data = data.to_array() - data = data.sel(variable=keys[0]) # removes the variable dimension, further simplifying the dataarray + data = data.sel( + variable=keys[0] + ) # removes the variable dimension, further simplifying the dataarray # Converts pd.Series to xr.DataArray if isinstance(data, pd.Series):