18
18
import xarray as xr
19
19
20
20
21
+ TIME_PERIODICITY_MAP = {
22
+ # pandas time freq string arg
23
+ # ideally these should be the same
24
+ "h" : 8760 ,
25
+ "1h" : 8760 ,
26
+ "30min" : 17520 ,
27
+ "15min" : 35040 ,
28
+ }
29
+
30
+ ENTRIES_PERIODICITY_MAP = {
31
+ # pandas time freq string arg
32
+ # ideally these should be the same
33
+ 8760 : "1h" ,
34
+ 17520 : "30min" ,
35
+ 35040 : "15min" ,
36
+ }
37
+
38
+
39
+
21
40
def get (database , id = None , geospatial = False , ** kwargs ):
22
41
"""
23
42
Load weather data directly from NSRDB or through any other PVLIB i/o
@@ -957,7 +976,8 @@ def get_anywhere(database = "PSM3", id=None, **kwargs):
957
976
return weather_db , meta
958
977
959
978
960
- def process_pvgis_distributed (weather_df ):
979
+ # RENAME, THIS SHOULD NOT REFERENCE PVGIS
980
+ def _process_weather_result_distributed (weather_df ):
961
981
"""Create an xarray.Dataset using numpy array backend from a pvgis weather dataframe"""
962
982
963
983
import dask .array as da
@@ -980,9 +1000,22 @@ def _weather_distributed_vec(
980
1000
):
981
1001
"""
982
1002
Distributed weather calculation for use with dask futures/delayed
1003
+
1004
+ Parameters
1005
+ ----------
1006
+ database: str
1007
+ database/source from `pvdeg.weather.get`
1008
+ coord: tuple[float]
1009
+ (latitude, longitude) coordinate pair. (`pvdeg.weather.get` id)
1010
+ api_key: str
1011
+ NSRDB developer api key (see `pvdeg.weather.get`)
1012
+ email: str
1013
+ NSRDB developer email (see `pvdeg.weather.get`)
983
1014
984
- Returns ds, dict, None if unsucessfull
985
- Returns None, None, Exception if unsucessfull
1015
+ Returns
1016
+ --------
1017
+ Returns ds, dict, None if unsucessful
1018
+ Returns None, None, Exception if unsucessful
986
1019
"""
987
1020
988
1021
try :
@@ -996,10 +1029,84 @@ def _weather_distributed_vec(
996
1029
except Exception as e :
997
1030
return None , None , e
998
1031
999
- weather_ds = process_pvgis_distributed (weather_df = weather_df )
1032
+ weather_ds = _process_weather_result_distributed (weather_df = weather_df )
1000
1033
1001
1034
return weather_ds , meta_dict , None
1002
1035
1036
+
1037
+ def emtpy_weather_ds (gids_size , periodicity , database ):
1038
+ """
1039
+ Create an empty weather dataframe for generalized input.
1040
+
1041
+ Parameters
1042
+ ---------
1043
+ gids_size: int
1044
+ number of entries to create along gid axis
1045
+ periodicity: str
1046
+ freqency, pandas `freq` string arg from `pd.date_range`.
1047
+
1048
+ .. code-block:: python
1049
+ "1h"
1050
+ "30min"
1051
+ "15min"
1052
+ database: str
1053
+ database from `pvdeg.weather.get`
1054
+
1055
+ Returns
1056
+ -------
1057
+ weather_ds: xarray.Dataset
1058
+ Weather dataset of the same format/shapes given by a `pvdeg.weather.get` geospatial call or `pvdeg.weather.weather_distributed` call or `GeosptialScenario.get_geospatial_data`.
1059
+ """
1060
+
1061
+ import dask .array as da
1062
+
1063
+ # pvgis default shapes
1064
+ shapes = {
1065
+ "temp_air" : ("gid" , "time" ),
1066
+ "relative_humidity" : ("gid" , "time" ),
1067
+ "ghi" : ("gid" , "time" ),
1068
+ "dni" : ("gid" , "time" ),
1069
+ "dhi" : ("gid" , "time" ),
1070
+ "IR(h)" : ("gid" , "time" ),
1071
+ "wind_speed" : ("gid" , "time" ),
1072
+ "wind_direction" : ("gid" , "time" ),
1073
+ "pressure" : ("gid" , "time" ),
1074
+ }
1075
+
1076
+ # additional results from NSRDB
1077
+ nsrdb_extra_shapes = {
1078
+ 'Year' : ("gid" , "time" ),
1079
+ 'Month' : ("gid" , "time" ),
1080
+ 'Day' : ("gid" , "time" ),
1081
+ 'Hour' : ("gid" , "time" ),
1082
+ 'Minute' : ("gid" , "time" ),
1083
+ 'dew_point' : ("gid" , "time" ),
1084
+ 'albedo' : ("gid" , "time" )
1085
+ }
1086
+
1087
+ attrs = {}
1088
+ global_attrs = {}
1089
+
1090
+ dims = {'gid' , 'time' }
1091
+ dims_size = {'time' : TIME_PERIODICITY_MAP [periodicity ], 'gid' : gids_size }
1092
+
1093
+ if database == "NSRDB" or database == "PSM3" :
1094
+ shapes = shapes | nsrdb_extra_shapes
1095
+
1096
+ weather_ds = xr .Dataset (
1097
+ data_vars = {
1098
+ var : (dim , da .empty ([dims_size [d ] for d in dim ]), attrs .get (var ))
1099
+ for var , dim in shapes .items ()
1100
+ },
1101
+ coords = {'time' : pd .date_range ("2022-01-01" , freq = periodicity , periods = TIME_PERIODICITY_MAP [periodicity ]),
1102
+ 'gid' : np .linspace (0 , gids_size - 1 , gids_size , dtype = int )},
1103
+ attrs = global_attrs ,
1104
+ )
1105
+
1106
+ return weather_ds
1107
+
1108
+
1109
+
1003
1110
def pvgis_hourly_empty_weather_ds (gids_size ):
1004
1111
"""
1005
1112
Create an empty weather dataset for pvgis hourly TMY data
@@ -1016,6 +1123,8 @@ def pvgis_hourly_empty_weather_ds(gids_size):
1016
1123
"""
1017
1124
import dask .array as da
1018
1125
1126
+
1127
+
1019
1128
shapes = {
1020
1129
"temp_air" : ("gid" , "time" ),
1021
1130
"relative_humidity" : ("gid" , "time" ),
@@ -1077,7 +1186,6 @@ def weather_distributed(
1077
1186
list of tuples containing (latitude, longitude) coordinates
1078
1187
1079
1188
.. code-block:: python
1080
-
1081
1189
coords_example = [
1082
1190
(49.95, 1.5),
1083
1191
(51.95, -9.5),
@@ -1123,17 +1231,27 @@ def weather_distributed(
1123
1231
1124
1232
gids_failed = []
1125
1233
1126
- weather_ds = pvgis_hourly_empty_weather_ds (len (results )) # create empty weather xr.dataset
1234
+ time_length = weather_ds_collection [0 ].sizes ["time" ]
1235
+ periodicity = ENTRIES_PERIODICITY_MAP [time_length ]
1236
+
1237
+ # weather_ds = pvgis_hourly_empty_weather_ds(len(results)) # create empty weather xr.dataset
1238
+ weather_ds = emtpy_weather_ds (
1239
+ gids_size = len (results ),
1240
+ periodicity = periodicity ,
1241
+ database = database ,
1242
+ )
1243
+
1127
1244
meta_df = pd .DataFrame .from_dict (meta_dict_collection ) # create populated meta pd.DataFrame
1128
1245
1129
- # these gids will be spatially meaningless, they will only show corresponding entries between weather_ds and meta_df
1130
- for i , row in enumerate (results ): # this loop can be refactored, kinda gross
1246
+ # gids are spatially meaningless if data is from PVGIS, they will only show corresponding entries between weather_ds and meta_df
1247
+ # only meaningfull if data is from NSRDB
1248
+ # this loop can be refactored, it is a little weird
1249
+ for i , row in enumerate (results ):
1131
1250
1132
1251
if row [2 ]:
1133
1252
gids_failed .append (i )
1134
1253
continue
1135
1254
1136
- # weather_ds[dict(gid=i)] = weather_ds_collection[i].to_xarray().drop_vars('time')
1137
1255
weather_ds [dict (gid = i )] = weather_ds_collection [i ]
1138
1256
1139
1257
return weather_ds , meta_df , gids_failed
0 commit comments