Skip to content

Commit 7894b0a

Browse files
committed
adding point to existing zarr store
1 parent 33bb216 commit 7894b0a

File tree

2 files changed

+3019
-1434
lines changed

2 files changed

+3019
-1434
lines changed

pvdeg/store.py

+26-38
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ def store(weather_ds, meta_df):
7575

7676
combined_ds = _combine_geo_weather_meta(weather_ds, meta_df)
7777

78-
7978
if not os.path.exists(os.path.join(METOROLOGICAL_DOWNLOAD_PATH, ".zmetadata")): # no zstore in directory
8079
print("Creating Zarr")
8180

8281
combined_ds.to_zarr(
8382
store=METOROLOGICAL_DOWNLOAD_PATH,
8483
group=f"{group}-{periodicity}",
84+
mode="w-", # only use for first time creating store
8585
)
8686
else: # store already exists
8787
print("adding to store")
@@ -90,6 +90,7 @@ def store(weather_ds, meta_df):
9090
stored_ds = xr.open_zarr(
9191
store=METOROLOGICAL_DOWNLOAD_PATH,
9292
group=f"{group}-{periodicity}",
93+
# consolidated=True
9394
)
9495

9596
lat_lon_gid_2d_map = _make_coords_to_gid_da(ds_from_zarr=stored_ds)
@@ -104,57 +105,42 @@ def store(weather_ds, meta_df):
104105

105106
if lat_exists and lon_exists:
106107
print("(lat, lon) exists already")
107-
stored_gid = lat_lon_gid_2d_map.sel(latitude=target_lat, longitude=target_lon)
108108

109-
# overwrite previous value at that lat-lon, keeps old gid
109+
raise NotImplementedError
110+
111+
# stored_gid = lat_lon_gid_2d_map.sel(latitude=target_lat, longitude=target_lon)
112+
113+
# # overwrite previous value at that lat-lon, keeps old gid
114+
115+
# # need to set the gid of the current "sheet" to the stored gid
116+
# updated_entry = combined_ds.loc[{"gid": gid}].assign_coords({"gid": stored_gid}) # this may need to be a list s.t. [stored_gid]
117+
# # loc may remove the gid dimension so we might have to add it back with .expand_dims
118+
119+
# # overwrite the current entry at the gid = stored_gid entry of the zarr
120+
# updated_entry.to_zarr(store=METOROLOGICAL_DOWNLOAD_PATH, group=f"{group}-{periodicity}", mode='w')
110121

111-
# will this be a view
112-
# how can we assign the value
113-
# cant slice?
114-
stored_ds.sel(gid=stored_gid)[:] = combined_ds.sel(gid=gid).values()
115122

116123
else: # coordinate pair doesnt exist and it needs to be added, this will be a HEAVY operation
117124
print("add entry to dataset")
118125

119126
# we are trying to save 1 "sheet" of weather (weather at a single gid)
120127
# need to update the index to fit into the stored data after we concatenate
121-
# we want to update the arbitrary gid in the input (combined_ds) to the next index in the gid array (starts at 0, current_gid + 1 = sizes["gid"] = new gid)
122-
new_gid = stored_ds.sizes["gid"]
123-
124-
# combined_ds.sel(gid=gid) = combined_ds.sel(gid=gid).assign_coords(gid=[new_gid]) # we may have the issues with this sel returning a view
125-
updated_entry = combined_ds.sel(gid=gid).assign_coords(gid=[new_gid])
126128

127-
stored_ds = xr.concat([stored_ds, updated_entry], dim="gid")
129+
# this concatenates along the the gid axis
130+
# gid has no guarantee of being unqiue but duplicate gids are fine for xarray
131+
# we slice so we can get a Dataset with dimensions of (gid, time) indexing to grab one gid will drop the gid dimension
132+
new_gid = stored_ds.sizes["gid"]
128133

129-
# trigger rechunking
130-
# should this happen outside of the loop
131-
stored_ds = stored_ds.chunk()
134+
weather_sheet = combined_ds.sel(gid=slice(gid))
135+
updated_entry = weather_sheet.assign_coords({"gid": [new_gid]})
136+
updated_entry.to_zarr(store=METOROLOGICAL_DOWNLOAD_PATH, group=f"{group}-{periodicity}", mode="a", append_dim="gid")
132137

133-
# SAVE DATASET BACK TO STORE
134-
stored_ds.to_zarr(METOROLOGICAL_DOWNLOAD_PATH, group=f"{group}-{periodicity}", mode='w') # test with "a" probably wont work
138+
# new_entry_added_ds = xr.concat([stored_ds, updated_entry], dim="gid")
135139

140+
# new_entry_added_ds.to_zarr(store=METOROLOGICAL_DOWNLOAD_PATH, group=f"{group}-{periodicity}", mode="a", append_dim="gid")
141+
136142
print(f"dataset saved to zarr store at {METOROLOGICAL_DOWNLOAD_PATH}")
137143

138-
### THIS NEEDS TO BE DEPRECATED
139-
def _add_entry_to_ds(combined_ds, stored_ds, target_lat, target_lon, gid):
140-
141-
new_gid = stored_ds.sizes["gid"] # zero indexed so the next index will be the current size
142-
143-
# new_entry = combined_ds.sel(gid=gid).expand_dims(gid=new_gid)
144-
145-
# for var in new_entry.data_vars:
146-
# existing_data = stored_ds[var]
147-
# new_data = new_entry[var]
148-
149-
# updated_data = xr.concat([existing_data, new_data], dim='gid')
150-
stored_ds = xr.concat([stored_ds, combined_ds.sel(gid=gid)], dim="gid")
151-
152-
# stored_ds[var] = updated_datag
153-
154-
# stored_ds['latitude'] = xr.concat([stored_ds['latitude'], xr.DataArray([target_lat], dims='gid')], dim='gid')
155-
# stored_ds['longitude'] = xr.concat([stored_ds['longitude'], xr.DataArray([target_lon], dims='gid')], dim='gid')
156-
157-
158144

159145
def check_store():
160146
"""Check if you have a zarr store at the default download path defined in pvdeg.config"""
@@ -189,8 +175,10 @@ def _combine_geo_weather_meta(
189175
):
190176
"""Combine weather dataset and meta dataframe into a single dataset"""
191177

178+
# if meta_df.index.name == 'index':
192179
meta_ds = xr.Dataset.from_dataframe(meta_df).rename({'index' : 'gid'})
193180

181+
194182
combined = xr.merge([weather_ds, meta_ds]).assign_coords(
195183
latitude=("gid", meta_ds.latitude.values),
196184
longitude=('gid', meta_ds.longitude.values),

0 commit comments

Comments
 (0)