Skip to content

Commit 33bb216

Browse files
committed
add new PVGIS location to existing store
1 parent 74b745c commit 33bb216

File tree

3 files changed

+758
-114
lines changed

3 files changed

+758
-114
lines changed

pvdeg/store.py

+83-13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import xarray as xr
33
import pandas as pd
44
import numpy as np
5+
import dask.array as da
56
import zarr
67
import os
78

@@ -74,20 +75,90 @@ def store(weather_ds, meta_df):
7475

7576
combined_ds = _combine_geo_weather_meta(weather_ds, meta_df)
7677

77-
# what mode should this be
78-
# we want to add to indexes if need be or overwrite old ones
79-
combined_ds.to_zarr(
80-
store=METOROLOGICAL_DOWNLOAD_PATH,
81-
group=f"{group}-{periodicity}"
82-
)
78+
79+
if not os.path.exists(os.path.join(METOROLOGICAL_DOWNLOAD_PATH, ".zmetadata")): # no zstore in directory
80+
print("Creating Zarr")
81+
82+
combined_ds.to_zarr(
83+
store=METOROLOGICAL_DOWNLOAD_PATH,
84+
group=f"{group}-{periodicity}",
85+
)
86+
else: # store already exists
87+
print("adding to store")
88+
89+
print("opening store")
90+
stored_ds = xr.open_zarr(
91+
store=METOROLOGICAL_DOWNLOAD_PATH,
92+
group=f"{group}-{periodicity}",
93+
)
94+
95+
lat_lon_gid_2d_map = _make_coords_to_gid_da(ds_from_zarr=stored_ds)
96+
97+
for gid, values in meta_df.iterrows():
98+
99+
target_lat = values["latitude"]
100+
target_lon = values["longitude"]
101+
102+
lat_exists = np.any(lat_lon_gid_2d_map.latitude == target_lat)
103+
lon_exists = np.any(lat_lon_gid_2d_map.longitude == target_lon)
104+
105+
if lat_exists and lon_exists:
106+
print("(lat, lon) exists already")
107+
stored_gid = lat_lon_gid_2d_map.sel(latitude=target_lat, longitude=target_lon)
108+
109+
# overwrite previous value at that lat-lon, keeps old gid
110+
111+
# will this be a view
112+
# how can we assign the value
113+
# cant slice?
114+
stored_ds.sel(gid=stored_gid)[:] = combined_ds.sel(gid=gid).values()
115+
116+
else: # coordinate pair doesnt exist and it needs to be added, this will be a HEAVY operation
117+
print("add entry to dataset")
118+
119+
# we are trying to save 1 "sheet" of weather (weather at a single gid)
120+
# need to update the index to fit into the stored data after we concatenate
121+
# we want to update the arbitrary gid in the input (combined_ds) to the next index in the gid array (starts at 0, current_gid + 1 = sizes["gid"] = new gid)
122+
new_gid = stored_ds.sizes["gid"]
123+
124+
# combined_ds.sel(gid=gid) = combined_ds.sel(gid=gid).assign_coords(gid=[new_gid]) # we may have the issues with this sel returning a view
125+
updated_entry = combined_ds.sel(gid=gid).assign_coords(gid=[new_gid])
126+
127+
stored_ds = xr.concat([stored_ds, updated_entry], dim="gid")
128+
129+
# trigger rechunking
130+
# should this happen outside of the loop
131+
stored_ds = stored_ds.chunk()
132+
133+
# SAVE DATASET BACK TO STORE
134+
stored_ds.to_zarr(METOROLOGICAL_DOWNLOAD_PATH, group=f"{group}-{periodicity}", mode='w') # test with "a" probably wont work
83135

84136
print(f"dataset saved to zarr store at {METOROLOGICAL_DOWNLOAD_PATH}")
85137

138+
### THIS NEEDS TO BE DEPRECATED
139+
def _add_entry_to_ds(combined_ds, stored_ds, target_lat, target_lon, gid):
140+
141+
new_gid = stored_ds.sizes["gid"] # zero indexed so the next index will be the current size
142+
143+
# new_entry = combined_ds.sel(gid=gid).expand_dims(gid=new_gid)
144+
145+
# for var in new_entry.data_vars:
146+
# existing_data = stored_ds[var]
147+
# new_data = new_entry[var]
148+
149+
# updated_data = xr.concat([existing_data, new_data], dim='gid')
150+
stored_ds = xr.concat([stored_ds, combined_ds.sel(gid=gid)], dim="gid")
151+
152+
# stored_ds[var] = updated_datag
153+
154+
# stored_ds['latitude'] = xr.concat([stored_ds['latitude'], xr.DataArray([target_lat], dims='gid')], dim='gid')
155+
# stored_ds['longitude'] = xr.concat([stored_ds['longitude'], xr.DataArray([target_lon], dims='gid')], dim='gid')
156+
86157

87158

88159
def check_store():
89160
"""Check if you have a zarr store at the default download path defined in pvdeg.config"""
90-
if os.path.exists(os.path.join(METOROLOGICAL_DOWNLOAD_PATH, ".zattrs")):
161+
if os.path.exists(os.path.join(METOROLOGICAL_DOWNLOAD_PATH, ".zmetadata")):
91162

92163
size = sum(f.stat().st_size for f in METOROLOGICAL_DOWNLOAD_PATH.glob('**/*') if f.is_file())
93164

@@ -118,17 +189,15 @@ def _combine_geo_weather_meta(
118189
):
119190
"""Combine weather dataset and meta dataframe into a single dataset"""
120191

121-
meta_ds = xr.Dataset.from_dataframe(meta_df)
122-
# we could do some encoding scheme here, dont need to store source? unless the zarr compression handles it for us
123-
124-
meta_ds['gid'] = meta_ds['index'].values.astype(np.int32)
125-
meta_ds = meta_ds.drop_vars(["index"])
192+
meta_ds = xr.Dataset.from_dataframe(meta_df).rename({'index' : 'gid'})
126193

127194
combined = xr.merge([weather_ds, meta_ds]).assign_coords(
128195
latitude=("gid", meta_ds.latitude.values),
129196
longitude=('gid', meta_ds.longitude.values),
130197
)
131198

199+
combined["Source"] = combined["Source"].astype(str) # save as strings
200+
132201
return combined
133202

134203

@@ -140,6 +209,8 @@ def _seperate_geo_weather_meta(
140209
and seperate it into `weather_ds` and `meta_df`.
141210
"""
142211

212+
ds_from_zarr["Source"] = ds_from_zarr["Source"].astype(object) # geospatial.mapblocks needs this to be an object
213+
143214
# there may be a more optimal way to do this
144215
data = np.column_stack(
145216
[
@@ -163,7 +234,6 @@ def _make_coords_to_gid_da(
163234
):
164235
"""Create a 2D indexable array that maps coordinates (lat and lon) to gid stored in zarr store"""
165236

166-
import dask.array as da
167237

168238
# only want to do this if the arrays are dask arrays
169239
lats = ds_from_zarr.latitude.to_numpy()

pvdeg/weather.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ def _weather_distributed_vec(
968968

969969
return weather_ds, meta_dict, None
970970

971-
def pvgis_empty_weather_ds(gids_size):
971+
def pvgis_hourly_empty_weather_ds(gids_size):
972972
"""
973973
Create an empty weather dataset for pvgis hourly TMY data
974974
@@ -1074,7 +1074,7 @@ def weather_distributed(database, coords):
10741074

10751075
gids_failed = []
10761076

1077-
weather_ds = pvgis_empty_weather_ds(len(results)) # create empty weather xr.dataset
1077+
weather_ds = pvgis_hourly_empty_weather_ds(len(results)) # create empty weather xr.dataset
10781078
meta_df = pd.DataFrame.from_dict(meta_dict_collection) # create populated meta pd.DataFrame
10791079

10801080
# these gids will be spatially meaningless, they will only show corresponding entries between weather_ds and meta_df

0 commit comments

Comments
 (0)