Skip to content

Commit 268e57b

Browse files
committed
generalize empty dataset generator, notebook updates
1 parent 37f1486 commit 268e57b

File tree

2 files changed

+3969
-193
lines changed

2 files changed

+3969
-193
lines changed

pvdeg/weather.py

+127-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,25 @@
1818
import xarray as xr
1919

2020

21+
TIME_PERIODICITY_MAP = {
22+
# pandas time freq string arg
23+
# ideally these should be the same
24+
"h" : 8760,
25+
"1h" : 8760,
26+
"30min": 17520,
27+
"15min": 35040,
28+
}
29+
30+
ENTRIES_PERIODICITY_MAP = {
31+
# pandas time freq string arg
32+
# ideally these should be the same
33+
8760: "1h",
34+
17520: "30min",
35+
35040: "15min",
36+
}
37+
38+
39+
2140
def get(database, id=None, geospatial=False, **kwargs):
2241
"""
2342
Load weather data directly from NSRDB or through any other PVLIB i/o
@@ -957,7 +976,8 @@ def get_anywhere(database = "PSM3", id=None, **kwargs):
957976
return weather_db, meta
958977

959978

960-
def process_pvgis_distributed(weather_df):
979+
# RENAME, THIS SHOULD NOT REFERENCE PVGIS
980+
def _process_weather_result_distributed(weather_df):
961981
"""Create an xarray.Dataset using numpy array backend from a pvgis weather dataframe"""
962982

963983
import dask.array as da
@@ -980,9 +1000,22 @@ def _weather_distributed_vec(
9801000
):
9811001
"""
9821002
Distributed weather calculation for use with dask futures/delayed
1003+
1004+
Parameters
1005+
----------
1006+
database: str
1007+
database/source from `pvdeg.weather.get`
1008+
coord: tuple[float]
1009+
(latitude, longitude) coordinate pair. (`pvdeg.weather.get` id)
1010+
api_key: str
1011+
NSRDB developer api key (see `pvdeg.weather.get`)
1012+
email: str
1013+
NSRDB developer email (see `pvdeg.weather.get`)
9831014
984-
Returns ds, dict, None if unsucessfull
985-
Returns None, None, Exception if unsucessfull
1015+
Returns
1016+
--------
1017+
Returns ds, dict, None if unsucessful
1018+
Returns None, None, Exception if unsucessful
9861019
"""
9871020

9881021
try:
@@ -996,10 +1029,84 @@ def _weather_distributed_vec(
9961029
except Exception as e:
9971030
return None, None, e
9981031

999-
weather_ds = process_pvgis_distributed(weather_df=weather_df)
1032+
weather_ds = _process_weather_result_distributed(weather_df=weather_df)
10001033

10011034
return weather_ds, meta_dict, None
10021035

1036+
1037+
def emtpy_weather_ds(gids_size, periodicity, database):
1038+
"""
1039+
Create an empty weather dataframe for generalized input.
1040+
1041+
Parameters
1042+
---------
1043+
gids_size: int
1044+
number of entries to create along gid axis
1045+
periodicity: str
1046+
freqency, pandas `freq` string arg from `pd.date_range`.
1047+
1048+
.. code-block:: python
1049+
"1h"
1050+
"30min"
1051+
"15min"
1052+
database: str
1053+
database from `pvdeg.weather.get`
1054+
1055+
Returns
1056+
-------
1057+
weather_ds: xarray.Dataset
1058+
Weather dataset of the same format/shapes given by a `pvdeg.weather.get` geospatial call or `pvdeg.weather.weather_distributed` call or `GeosptialScenario.get_geospatial_data`.
1059+
"""
1060+
1061+
import dask.array as da
1062+
1063+
# pvgis default shapes
1064+
shapes = {
1065+
"temp_air": ("gid", "time"),
1066+
"relative_humidity": ("gid", "time"),
1067+
"ghi": ("gid", "time"),
1068+
"dni": ("gid", "time"),
1069+
"dhi": ("gid", "time"),
1070+
"IR(h)": ("gid", "time"),
1071+
"wind_speed": ("gid", "time"),
1072+
"wind_direction": ("gid", "time"),
1073+
"pressure": ("gid", "time"),
1074+
}
1075+
1076+
# additional results from NSRDB
1077+
nsrdb_extra_shapes = {
1078+
'Year': ("gid", "time"),
1079+
'Month': ("gid", "time"),
1080+
'Day': ("gid", "time"),
1081+
'Hour': ("gid", "time"),
1082+
'Minute': ("gid", "time"),
1083+
'dew_point': ("gid", "time"),
1084+
'albedo': ("gid", "time")
1085+
}
1086+
1087+
attrs = {}
1088+
global_attrs = {}
1089+
1090+
dims = {'gid', 'time'}
1091+
dims_size = {'time': TIME_PERIODICITY_MAP[periodicity], 'gid': gids_size}
1092+
1093+
if database == "NSRDB" or database == "PSM3":
1094+
shapes = shapes | nsrdb_extra_shapes
1095+
1096+
weather_ds = xr.Dataset(
1097+
data_vars={
1098+
var: (dim, da.empty([dims_size[d] for d in dim]), attrs.get(var))
1099+
for var, dim in shapes.items()
1100+
},
1101+
coords={'time': pd.date_range("2022-01-01", freq=periodicity, periods=TIME_PERIODICITY_MAP[periodicity]),
1102+
'gid': np.linspace(0, gids_size-1, gids_size, dtype=int)},
1103+
attrs=global_attrs,
1104+
)
1105+
1106+
return weather_ds
1107+
1108+
1109+
10031110
def pvgis_hourly_empty_weather_ds(gids_size):
10041111
"""
10051112
Create an empty weather dataset for pvgis hourly TMY data
@@ -1016,6 +1123,8 @@ def pvgis_hourly_empty_weather_ds(gids_size):
10161123
"""
10171124
import dask.array as da
10181125

1126+
1127+
10191128
shapes = {
10201129
"temp_air": ("gid", "time"),
10211130
"relative_humidity": ("gid", "time"),
@@ -1077,7 +1186,6 @@ def weather_distributed(
10771186
list of tuples containing (latitude, longitude) coordinates
10781187
10791188
.. code-block:: python
1080-
10811189
coords_example = [
10821190
(49.95, 1.5),
10831191
(51.95, -9.5),
@@ -1123,17 +1231,27 @@ def weather_distributed(
11231231

11241232
gids_failed = []
11251233

1126-
weather_ds = pvgis_hourly_empty_weather_ds(len(results)) # create empty weather xr.dataset
1234+
time_length = weather_ds_collection[0].sizes["time"]
1235+
periodicity = ENTRIES_PERIODICITY_MAP[time_length]
1236+
1237+
# weather_ds = pvgis_hourly_empty_weather_ds(len(results)) # create empty weather xr.dataset
1238+
weather_ds = emtpy_weather_ds(
1239+
gids_size=len(results),
1240+
periodicity=periodicity,
1241+
database=database,
1242+
)
1243+
11271244
meta_df = pd.DataFrame.from_dict(meta_dict_collection) # create populated meta pd.DataFrame
11281245

1129-
# these gids will be spatially meaningless, they will only show corresponding entries between weather_ds and meta_df
1130-
for i, row in enumerate(results): # this loop can be refactored, kinda gross
1246+
# gids are spatially meaningless if data is from PVGIS, they will only show corresponding entries between weather_ds and meta_df
1247+
# only meaningfull if data is from NSRDB
1248+
# this loop can be refactored, it is a little weird
1249+
for i, row in enumerate(results):
11311250

11321251
if row[2]:
11331252
gids_failed.append(i)
11341253
continue
11351254

1136-
# weather_ds[dict(gid=i)] = weather_ds_collection[i].to_xarray().drop_vars('time')
11371255
weather_ds[dict(gid=i)] = weather_ds_collection[i]
11381256

11391257
return weather_ds, meta_df, gids_failed

0 commit comments

Comments
 (0)