after larmip integration

Brown-SciML · Apr 16, 2024 · 5f2329f · 5f2329f
1 parent 9ecc66c
commit 5f2329f
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 22 deletions.
diff --git a/ise/data/process.py b/ise/data/process.py
@@ -548,18 +548,25 @@ def _calculate_ivaf_single_file(self, directory, densities, scalefac_model, ctrl
             ).transpose("x", "y", "time", ...)
 
             # subtract out control
-            ivaf = ivaf_ctrl.ivaf.values - ivaf
+            # ivaf = ivaf_ctrl.ivaf.values - ivaf
+            ivaf_with_ctrl = ivaf.copy()
+            ivaf = ivaf - ivaf_ctrl.ivaf.values
 
         # save ivaf file (copied format from bed_data, change accordingly.)
         ivaf_nc["ivaf"] = (("x", "y", "time"), ivaf)
+        if not ctrl_proj:
+            ivaf_nc['ivaf_with_control'] = (("x", "y", "time"), ivaf_with_ctrl)
         ivaf_nc = ivaf_nc.drop_vars(
             [
                 "topg",
             ]
         )
         ivaf_nc["sle"] = ivaf_nc.ivaf / 1e9 / 362.5
+        export_nc_path = os.path.join(directory, f"ivaf_{self.ice_sheet}_{group}_{model}_{exp}.nc")
+        if os.path.exists(export_nc_path):
+            os.remove(export_nc_path)
         ivaf_nc.to_netcdf(
-            os.path.join(directory, f"ivaf_{self.ice_sheet}_{group}_{model}_{exp}.nc")
+            export_nc_path
         )
 
         print(f"{group}_{model}_{exp}: Processing successful.")
@@ -2481,7 +2488,7 @@ def process_AIS_atmospheric_sectors(forcing_directory, grid_file):
     ice_sheet = "AIS"
 
     start_time = time.time()
-    path_to_forcings = "AIS/Atmosphere_Forcing/"
+    path_to_forcings = "/Atmosphere_Forcing/"
     af_directory = (
         f"{forcing_directory}/{path_to_forcings}"
         if not forcing_directory.endswith(path_to_forcings)
@@ -2496,7 +2503,6 @@ def process_AIS_atmospheric_sectors(forcing_directory, grid_file):
     unique_sectors = np.unique(sectors)
     all_data = []
     for i, fp in enumerate(filepaths):
-        fp = r"/oscar/home/pvankatw/data/pvankatw/pvankatw-bfoxkemp/GHub-ISMIP6-Forcing//AIS/Atmosphere_Forcing/miroc-esm-chem_rcp8.5/Regridded_8km/MIROC-ESM-CHEM_8km_anomaly_1995-2100.nc"
         print("")
         print(f"File {i+1} / {len(filepaths)}")
         print(f'File: {fp.split("/")[-1]}')
@@ -2775,14 +2781,18 @@ def _format_grid_file(grid_file):
 
 def process_AIS_outputs(
     zenodo_directory,
+    with_ctrl=False
 ):
 
     directory = (
         f"{zenodo_directory}/ComputedScalarsPaper/"
         if not zenodo_directory.endswith("ComputedScalarsPaper")
         else zenodo_directory
     )
-    files = get_all_filepaths(directory, contains="ivaf_minus_ctrl_proj", filetype="nc")
+    if not with_ctrl:
+        files = get_all_filepaths(directory, contains="ivaf_minus_ctrl_proj", filetype="nc")
+    else:
+        files = get_all_filepaths(directory, contains="ivaf_AIS", filetype="nc", not_contains=['hist', 'ctrl'])
     count = 0
 
     all_files_data = []
@@ -2845,6 +2855,7 @@ def merge_datasets(forcings, projections, experiments_file, ice_sheet="AIS", exp
     )
     forcings["aogcm"] = forcings["aogcm"].apply(formatting_function)
     projections.rename(columns={"AOGCM": "aogcm"}, inplace=True)
+    forcings['sector'] = forcings.sector.astype(int)
     dataset = pd.merge(forcings, projections, on=["aogcm", "year", "sector"], how="inner")
 
     return dataset
@@ -2917,22 +2928,24 @@ def process_sectors(
     experiments_file,
     export_directory=None,
     overwrite=False,
+    with_ctrl=False,
 ):
 
     forcing_exists = os.path.exists(f"{export_directory}/forcings.csv")
     if not forcing_exists or (forcing_exists and overwrite):
-        # atmospheric_df = (
-        #     process_AIS_atmospheric_sectors(forcing_directory, grid_file)
-        #     if ice_sheet == "AIS"
-        #     else process_GrIS_atmospheric_sectors(forcing_directory, grid_file)
-        # )
-        # atmospheric_df.to_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv", index=False)
-        # oceanic_df = (
-        #     process_AIS_oceanic_sectors(forcing_directory, grid_file)
-        #     if ice_sheet == "AIS"
-        #     else process_GrIS_oceanic_sectors(forcing_directory, grid_file)
-        # )
-        # oceanic_df.to_csv(f"{export_directory}/{ice_sheet}_oceanic.csv", index=False)
+
+        atmospheric_df = (
+            process_AIS_atmospheric_sectors(forcing_directory, grid_file)
+            if ice_sheet == "AIS"
+            else process_GrIS_atmospheric_sectors(forcing_directory, grid_file)
+        )
+        atmospheric_df.to_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv", index=False)
+        oceanic_df = (
+            process_AIS_oceanic_sectors(forcing_directory, grid_file)
+            if ice_sheet == "AIS"
+            else process_GrIS_oceanic_sectors(forcing_directory, grid_file)
+        )
+        oceanic_df.to_csv(f"{export_directory}/{ice_sheet}_oceanic.csv", index=False)
 
         atmospheric_df = pd.read_csv(f"{export_directory}/{ice_sheet}_atmospheric.csv")
         oceanic_df = pd.read_csv(f"{export_directory}/{ice_sheet}_oceanic.csv")
@@ -2960,6 +2973,7 @@ def process_sectors(
         projections = (
             process_AIS_outputs(
                 zenodo_directory,
+                with_ctrl=with_ctrl,
             )
             if ice_sheet == "AIS"
             else process_GrIS_outputs(

diff --git a/ise/evaluation/metrics.py b/ise/evaluation/metrics.py
@@ -120,3 +120,41 @@ def kolmogorov_smirnov(x1, x2):
 def t_test(x1, x2):
     res = ttest_ind(x1, x2)
     return res.statistic, res.pvalue
+
+
+def calculate_ece(predictions, uncertainties, true_values, bins=10):
+    """
+    Calculate the Expected Calibration Error (ECE) for regression model predictions.
+    
+    Args:
+    predictions (numpy.ndarray): Array of predicted means by the model.
+    uncertainties (numpy.ndarray): Array of predicted standard deviations (uncertainty estimates).
+    true_values (numpy.ndarray): Array of actual values.
+    bins (int): Number of bins to use for grouping predictions by their uncertainty.
+
+    Returns:
+    float: The Expected Calibration Error.
+    """
+    bin_limits = np.linspace(np.min(uncertainties), np.max(uncertainties), bins+1)
+    ece = 0.0
+    total_count = len(predictions)
+
+    for i in range(bins):
+        bin_mask = (uncertainties >= bin_limits[i]) & (uncertainties < bin_limits[i+1])
+        if np.sum(bin_mask) == 0:
+            continue
+        bin_predictions = predictions[bin_mask]
+        bin_uncertainties = uncertainties[bin_mask]
+        bin_true_values = true_values[bin_mask]
+
+        # Assume Gaussian distribution: about 95.4% of data should fall within ±2 standard deviations
+        lower_bounds = bin_predictions - 2 * bin_uncertainties
+        upper_bounds = bin_predictions + 2 * bin_uncertainties
+        in_range = (bin_true_values >= lower_bounds) & (bin_true_values <= upper_bounds)
+        observed_probability = np.mean(in_range)
+        expected_probability = 0.954  # For ±2 standard deviations in Gaussian distribution
+
+        # Calculate the absolute difference weighted by the number of elements in the bin
+        ece += np.abs(observed_probability - expected_probability) * np.sum(bin_mask) / total_count
+
+    return ece
diff --git a/ise/models/hybrid.py b/ise/models/hybrid.py
@@ -472,7 +472,8 @@ def fit(
         """
         # Convert data to tensors and move to device
         X, y = to_tensor(X).to(self.device), to_tensor(y).to(self.device)
-
+        if y.ndimension() == 1:
+            y = y.unsqueeze(1)
         # Check if validation data is provided
         if val_X is not None and val_y is not None:
             validate = True
@@ -848,7 +849,7 @@ def __init__(
         self.num_input_features = forcing_size
         self.num_predicted_sle = sle_size
         self.flow_hidden_features = sle_size * 2
-        self.projection_length=projection_length,
+        self.projection_length=projection_length
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.to(self.device)
 
@@ -893,6 +894,8 @@ def fit(self, X, y, epochs=100, batch_size=64):
             batch_size (int): The batch size for training (default: 64).
         """
         X, y = to_tensor(X).to(self.device), to_tensor(y).to(self.device)
+        if y.ndimension() == 1:
+            y = y.unsqueeze(1)
         dataset = EmulatorDataset(X, y, sequence_length=1, projection_length=self.projection_length)
         data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
         self.train()

diff --git a/ise/utils/functions.py b/ise/utils/functions.py
@@ -460,10 +460,16 @@ def get_all_filepaths(
             all_files = [file for file in all_files if file.endswith(filetype)]
 
     if contains:
-        all_files = [file for file in all_files if contains in file]
+        if isinstance(contains, str):
+            contains = [contains]
+        for substr in contains:
+            all_files = [file for file in all_files if substr in file]
 
     if not_contains:
-        all_files = [file for file in all_files if not_contains not in file]
+        if isinstance(not_contains, str):
+            not_contains = [not_contains]
+        for substr in not_contains:
+            all_files = [file for file in all_files if substr not in file]
 
     return all_files
 
@@ -629,7 +635,7 @@ def to_tensor(x):
     """
     if x is None:
         return None
-    if isinstance(x, pd.DataFrame):
+    if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series):
         x = torch.tensor(x.values, dtype=torch.float32)
     elif isinstance(x, np.ndarray):
         x = torch.tensor(x, dtype=torch.float32)