-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from NCAR/djgagne
Updated dependencies and added scaler application
- Loading branch information
Showing
11 changed files
with
1,767 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,29 @@ | ||
# miles-credit | ||
# NSF NCAR MILES Community Runnable Earth Digital Intelligence Twin (CREDIT) | ||
|
||
## About | ||
CREDIT is a package to train and run neural networks | ||
that can emulate full NWP models by predicting | ||
the next state of the atmosphere given the current state. | ||
|
||
## Installation | ||
Clone from miles-credit github page: | ||
```bash | ||
git clone git@github.com:NCAR/miles-credit.git | ||
cd miles-credit | ||
``` | ||
|
||
Install dependencies using environment.yml file: | ||
```bash | ||
mamba env create -f environment.yml | ||
conda activate credit | ||
``` | ||
|
||
To enable GPU support, install pytorch-cuda: | ||
```bash | ||
mamba install pytorch-cuda=12.1 -c pytorch -c nvidia | ||
``` | ||
|
||
Install miles-credit with the following command: | ||
```bash | ||
pip install . | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import os | ||
import numpy as np | ||
import xarray as xr | ||
import pandas as pd | ||
import yaml | ||
import argparse | ||
from glob import glob | ||
from multiprocessing import Pool | ||
from bridgescaler.distributed import DQuantileTransformer | ||
from bridgescaler.backend import print_scaler | ||
from os.path import exists, join | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-c", "--config", help="Path to config file") | ||
parser.add_argument("-o", "--out", help="Path to save scaler files.") | ||
parser.add_argument("-p", "--procs", type=int, help="Number of processors") | ||
args = parser.parse_args() | ||
args_dict = vars(args) | ||
config = args_dict.pop("config") | ||
with open(config) as cf: | ||
conf = yaml.load(cf, Loader=yaml.FullLoader) | ||
all_era5_files = sorted(glob(conf["data"]["save_loc"])) | ||
for e5 in all_era5_files: | ||
if "_small_" in e5: | ||
print(e5) | ||
all_era5_files.remove(e5) | ||
all_era5_filenames = [f.split("/")[-1] for f in all_era5_files] | ||
with Pool(args.procs) as p: | ||
all_scalers = p.map(fit_era5_scaler_year, all_era5_files) | ||
all_scalers_df = pd.DataFrame(all_scalers, columns=["scaler_3d", "scaler_surface"], | ||
index=all_era5_filenames) | ||
if not exists(args.out): | ||
os.makedirs(args.out) | ||
now = pd.Timestamp.utcnow().strftime("%Y-%m-%d_%H:%M") | ||
all_scalers_df.to_parquet(join(args.out, f"era5_quantile_scalers_{now}.parquet")) | ||
return | ||
|
||
|
||
def fit_era5_scaler_year(era5_file): | ||
n_times = 300 | ||
eds = xr.open_zarr(era5_file) | ||
vars_3d = ['U', 'V', 'T', 'Q'] | ||
vars_surf = ['SP', 't2m', 'V500', 'U500', 'T500', 'Z500', 'Q500'] | ||
levels = eds.level.values | ||
var_levels = [] | ||
for var in vars_3d: | ||
for level in levels: | ||
var_levels.append(f"{var}_{level:d}") | ||
dqs_3d = DQuantileTransformer(distribution="normal") | ||
dqs_surf = DQuantileTransformer(distribution="normal") | ||
rand_times = np.sort(np.random.choice(eds["time"].values, size=n_times, replace=False)) | ||
for time in rand_times: | ||
print(time) | ||
var_slices = [] | ||
for var in vars_3d: | ||
for level in levels: | ||
var_slices.append(eds[var].sel(time=time, level=level)) | ||
e3d = xr.concat(var_slices, pd.Index(var_levels, name="variable") | ||
).transpose("latitude", "longitude", "variable") | ||
dqs_3d.fit(e3d) | ||
e_surf = xr.concat([eds[v].sel(time=time) for v in vars_surf], pd.Index(vars_surf, name="variable") | ||
).transpose("latitude", "longitude", "variable") | ||
dqs_surf.fit(e_surf) | ||
dqs_3d_json = print_scaler(dqs_3d) | ||
dqs_surf_json = print_scaler(dqs_surf) | ||
return dqs_3d_json, dqs_surf_json | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from credit.data import ERA5Dataset | ||
|
||
|
||
def test_data(): | ||
assert True | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.