Skip to content

Commit

Permalink
after larmip integration
Browse files Browse the repository at this point in the history
  • Loading branch information
pvankatwyk committed Apr 16, 2024
1 parent 9ecc66c commit 5f2329f
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 22 deletions.
48 changes: 31 additions & 17 deletions ise/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,18 +548,25 @@ def _calculate_ivaf_single_file(self, directory, densities, scalefac_model, ctrl
).transpose("x", "y", "time", ...)

# subtract out control
ivaf = ivaf_ctrl.ivaf.values - ivaf
# ivaf = ivaf_ctrl.ivaf.values - ivaf
ivaf_with_ctrl = ivaf.copy()
ivaf = ivaf - ivaf_ctrl.ivaf.values

# save ivaf file (copied format from bed_data, change accordingly.)
ivaf_nc["ivaf"] = (("x", "y", "time"), ivaf)
if not ctrl_proj:
ivaf_nc['ivaf_with_control'] = (("x", "y", "time"), ivaf_with_ctrl)
ivaf_nc = ivaf_nc.drop_vars(
[
"topg",
]
)
ivaf_nc["sle"] = ivaf_nc.ivaf / 1e9 / 362.5
export_nc_path = os.path.join(directory, f"ivaf_{self.ice_sheet}_{group}_{model}_{exp}.nc")
if os.path.exists(export_nc_path):
os.remove(export_nc_path)
ivaf_nc.to_netcdf(
os.path.join(directory, f"ivaf_{self.ice_sheet}_{group}_{model}_{exp}.nc")
export_nc_path
)

print(f"{group}_{model}_{exp}: Processing successful.")
Expand Down Expand Up @@ -2481,7 +2488,7 @@ def process_AIS_atmospheric_sectors(forcing_directory, grid_file):
ice_sheet = "AIS"

start_time = time.time()
path_to_forcings = "AIS/Atmosphere_Forcing/"
path_to_forcings = "/Atmosphere_Forcing/"
af_directory = (
f"{forcing_directory}/{path_to_forcings}"
if not forcing_directory.endswith(path_to_forcings)
Expand All @@ -2496,7 +2503,6 @@ def process_AIS_atmospheric_sectors(forcing_directory, grid_file):
unique_sectors = np.unique(sectors)
all_data = []
for i, fp in enumerate(filepaths):
fp = r"/oscar/home/pvankatw/data/pvankatw/pvankatw-bfoxkemp/GHub-ISMIP6-Forcing//AIS/Atmosphere_Forcing/miroc-esm-chem_rcp8.5/Regridded_8km/MIROC-ESM-CHEM_8km_anomaly_1995-2100.nc"
print("")
print(f"File {i+1} / {len(filepaths)}")
print(f'File: {fp.split("/")[-1]}')
Expand Down Expand Up @@ -2775,14 +2781,18 @@ def _format_grid_file(grid_file):

def process_AIS_outputs(
zenodo_directory,
with_ctrl=False
):

directory = (
f"{zenodo_directory}/ComputedScalarsPaper/"
if not zenodo_directory.endswith("ComputedScalarsPaper")
else zenodo_directory
)
files = get_all_filepaths(directory, contains="ivaf_minus_ctrl_proj", filetype="nc")
if not with_ctrl:
files = get_all_filepaths(directory, contains="ivaf_minus_ctrl_proj", filetype="nc")
else:
files = get_all_filepaths(directory, contains="ivaf_AIS", filetype="nc", not_contains=['hist', 'ctrl'])
count = 0

all_files_data = []
Expand Down Expand Up @@ -2845,6 +2855,7 @@ def merge_datasets(forcings, projections, experiments_file, ice_sheet="AIS", exp
)
forcings["aogcm"] = forcings["aogcm"].apply(formatting_function)
projections.rename(columns={"AOGCM": "aogcm"}, inplace=True)
forcings['sector'] = forcings.sector.astype(int)
dataset = pd.merge(forcings, projections, on=["aogcm", "year", "sector"], how="inner")

return dataset
Expand Down Expand Up @@ -2917,22 +2928,24 @@ def process_sectors(
experiments_file,
export_directory=None,
overwrite=False,
with_ctrl=False,
):

forcing_exists = os.path.exists(f"{export_directory}/forcings.csv")
if not forcing_exists or (forcing_exists and overwrite):
# atmospheric_df = (
# process_AIS_atmospheric_sectors(forcing_directory, grid_file)
# if ice_sheet == "AIS"
# else process_GrIS_atmospheric_sectors(forcing_directory, grid_file)
# )
# atmospheric_df.to_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv", index=False)
# oceanic_df = (
# process_AIS_oceanic_sectors(forcing_directory, grid_file)
# if ice_sheet == "AIS"
# else process_GrIS_oceanic_sectors(forcing_directory, grid_file)
# )
# oceanic_df.to_csv(f"{export_directory}/{ice_sheet}_oceanic.csv", index=False)

atmospheric_df = (
process_AIS_atmospheric_sectors(forcing_directory, grid_file)
if ice_sheet == "AIS"
else process_GrIS_atmospheric_sectors(forcing_directory, grid_file)
)
atmospheric_df.to_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv", index=False)
oceanic_df = (
process_AIS_oceanic_sectors(forcing_directory, grid_file)
if ice_sheet == "AIS"
else process_GrIS_oceanic_sectors(forcing_directory, grid_file)
)
oceanic_df.to_csv(f"{export_directory}/{ice_sheet}_oceanic.csv", index=False)

atmospheric_df = pd.read_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv")
oceanic_df = pd.read_csv(f"{export_directory}/{ice_sheet}_oceanic.csv")
Expand Down Expand Up @@ -2960,6 +2973,7 @@ def process_sectors(
projections = (
process_AIS_outputs(
zenodo_directory,
with_ctrl=with_ctrl,
)
if ice_sheet == "AIS"
else process_GrIS_outputs(
Expand Down
38 changes: 38 additions & 0 deletions ise/evaluation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,41 @@ def kolmogorov_smirnov(x1, x2):
def t_test(x1, x2):
res = ttest_ind(x1, x2)
return res.statistic, res.pvalue


def calculate_ece(predictions, uncertainties, true_values, bins=10):
"""
Calculate the Expected Calibration Error (ECE) for regression model predictions.
Args:
predictions (numpy.ndarray): Array of predicted means by the model.
uncertainties (numpy.ndarray): Array of predicted standard deviations (uncertainty estimates).
true_values (numpy.ndarray): Array of actual values.
bins (int): Number of bins to use for grouping predictions by their uncertainty.
Returns:
float: The Expected Calibration Error.
"""
bin_limits = np.linspace(np.min(uncertainties), np.max(uncertainties), bins+1)
ece = 0.0
total_count = len(predictions)

for i in range(bins):
bin_mask = (uncertainties >= bin_limits[i]) & (uncertainties < bin_limits[i+1])
if np.sum(bin_mask) == 0:
continue
bin_predictions = predictions[bin_mask]
bin_uncertainties = uncertainties[bin_mask]
bin_true_values = true_values[bin_mask]

# Assume Gaussian distribution: about 95.4% of data should fall within ±2 standard deviations
lower_bounds = bin_predictions - 2 * bin_uncertainties
upper_bounds = bin_predictions + 2 * bin_uncertainties
in_range = (bin_true_values >= lower_bounds) & (bin_true_values <= upper_bounds)
observed_probability = np.mean(in_range)
expected_probability = 0.954 # For ±2 standard deviations in Gaussian distribution

# Calculate the absolute difference weighted by the number of elements in the bin
ece += np.abs(observed_probability - expected_probability) * np.sum(bin_mask) / total_count

return ece
7 changes: 5 additions & 2 deletions ise/models/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,8 @@ def fit(
"""
# Convert data to tensors and move to device
X, y = to_tensor(X).to(self.device), to_tensor(y).to(self.device)

if y.ndimension() == 1:
y = y.unsqueeze(1)
# Check if validation data is provided
if val_X is not None and val_y is not None:
validate = True
Expand Down Expand Up @@ -848,7 +849,7 @@ def __init__(
self.num_input_features = forcing_size
self.num_predicted_sle = sle_size
self.flow_hidden_features = sle_size * 2
self.projection_length=projection_length,
self.projection_length=projection_length
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.to(self.device)

Expand Down Expand Up @@ -893,6 +894,8 @@ def fit(self, X, y, epochs=100, batch_size=64):
batch_size (int): The batch size for training (default: 64).
"""
X, y = to_tensor(X).to(self.device), to_tensor(y).to(self.device)
if y.ndimension() == 1:
y = y.unsqueeze(1)
dataset = EmulatorDataset(X, y, sequence_length=1, projection_length=self.projection_length)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
self.train()
Expand Down
12 changes: 9 additions & 3 deletions ise/utils/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,16 @@ def get_all_filepaths(
all_files = [file for file in all_files if file.endswith(filetype)]

if contains:
all_files = [file for file in all_files if contains in file]
if isinstance(contains, str):
contains = [contains]
for substr in contains:
all_files = [file for file in all_files if substr in file]

if not_contains:
all_files = [file for file in all_files if not_contains not in file]
if isinstance(not_contains, str):
not_contains = [not_contains]
for substr in not_contains:
all_files = [file for file in all_files if substr not in file]

return all_files

Expand Down Expand Up @@ -629,7 +635,7 @@ def to_tensor(x):
"""
if x is None:
return None
if isinstance(x, pd.DataFrame):
if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series):
x = torch.tensor(x.values, dtype=torch.float32)
elif isinstance(x, np.ndarray):
x = torch.tensor(x, dtype=torch.float32)
Expand Down

0 comments on commit 5f2329f

Please sign in to comment.