before master merge

Brown-SciML · Mar 20, 2024 · f3abb42 · f3abb42
1 parent af2f5db
commit f3abb42
Show file tree

Hide file tree

Showing 6 changed files with 129 additions and 14 deletions.
diff --git a/ise/data/dataclasses.py b/ise/data/dataclasses.py
@@ -75,6 +75,16 @@ def __len__(self):
             return self.X.shape[0] * self.X.shape[1]
 
     def __getitem__(self, i):
+        """
+        Returns the i-th item in the dataset.
+
+        Args:
+            i (int): Index of the item to retrieve.
+
+        Returns:
+            If `y` is None, returns the input sequence at index `i` as a PyTorch tensor.
+            Otherwise, returns a tuple containing the input sequence at index `i` and the corresponding target value.
+        """
         # Calculate projection index and timestep index
         projection_index = i // self.num_timesteps
         time_step_index = i % self.num_timesteps
@@ -104,30 +114,92 @@ def __getitem__(self, i):
 
 
 class PyTorchDataset(Dataset):
+    """
+    A PyTorch dataset for general data loading.
+
+    Args:
+        X (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The input data.
+        y (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The target data.
+
+    Methods:
+        __getitem__(index): Returns the item at the given index.
+        __len__(): Returns the length of the dataset.
+    """
+
     def __init__(self, X, y):
         self.X_data = X
         self.y_data = y
 
     def __getitem__(self, index):
+        """
+        Returns the item at the given index.
+
+        Args:
+            index (int): Index of the item to retrieve.
+
+        Returns:
+            If `y` is None, returns the input data at index `index`.
+            Otherwise, returns a tuple containing the input data at index `index` and the corresponding target value.
+        """
         if self.y_data is None:
             return self.X_data[index]
         return self.X_data[index], self.y_data[index]
 
     def __len__(self):
+        """
+        Returns the length of the dataset.
+
+        Returns:
+            The length of the dataset.
+        """
         return len(self.X_data)
 
 
 class TSDataset(Dataset):
+    """
+    A PyTorch dataset for time series data.
+
+    Args:
+        X (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The input data.
+        y (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The target data.
+        sequence_length (int): The length of the input sequence.
+
+    Attributes:
+        X (torch.Tensor): The input data as a PyTorch tensor.
+        y (torch.Tensor): The target data as a PyTorch tensor.
+        sequence_length (int): The length of the input sequence.
+
+    Methods:
+        __len__(): Returns the length of the dataset.
+        __getitem__(i): Returns the i-th item in the dataset.
+    """
+
     def __init__(self, X, y, sequence_length=5):
         super().__init__()
         self.X = X
         self.y = y
         self.sequence_length = sequence_length
 
     def __len__(self):
+        """
+        Returns the length of the dataset.
+
+        Returns:
+            The length of the dataset.
+        """
         return len(self.X)
 
     def __getitem__(self, i):
+        """
+        Returns the i-th item in the dataset.
+
+        Args:
+            i (int): Index of the item to retrieve.
+
+        Returns:
+            If `y` is None, returns the input sequence at index `i` as a PyTorch tensor.
+            Otherwise, returns a tuple containing the input sequence at index `i` and the corresponding target value.
+        """
         if i >= self.sequence_length - 1:
             i_start = i - self.sequence_length + 1
             x = self.X[i_start : (i + 1), :]

diff --git a/ise/models/experiments.py b/ise/models/experiments.py
@@ -4,7 +4,7 @@
 from torch import nn
 
 from ise.data._EmulatorData import EmulatorData
-from ise.models.sector import ExploratoryModel, TimeSeriesEmulator
+from ise.models.sector import ExploratoryModel, VariationalLSTMEmulator
 from ise.models.train import Trainer
 from ise.utils.functions import (
     _structure_architecture_args,
@@ -18,7 +18,7 @@ def lag_sequence_test(
     lag_array,
     sequence_array,
     iterations,
-    model_class=TimeSeriesEmulator,
+    model_class=VariationalLSTMEmulator,
     emulator_data_args=None,
     architecture=None,
     verbose=True,
@@ -91,7 +91,7 @@ def rnn_architecture_test(
     rnn_layers_array: List[int],
     hidden_nodes_array: List[int],
     iterations: int,
-    model_class=TimeSeriesEmulator,
+    model_class=VariationalLSTMEmulator,
     verbose: bool = True,
     epochs: int = 100,
     batch_size: int = 100,

diff --git a/ise/models/grid.py b/ise/models/grid.py
@@ -1,3 +1,6 @@
+
+
+
 import json
 import os
 import warnings
@@ -16,7 +19,6 @@
 
 class PCA(nn.Module):
     def __init__(self, n_components):
-
         """
         Principal Component Analysis (PCA) model.
 
@@ -587,7 +589,7 @@ def __init__(
                     "forcing_size and sle_size must be provided if weak_predictors is not provided"
                 )
             self.loss_choices = [torch.nn.MSELoss(), MSEDeviationLoss(threshold=1.0, penalty_multiplier=2.0), torch.nn.L1Loss(), torch.nn.HuberLoss()]
-            loss_probabilities = [.50, .15, .15, .2]
+            loss_probabilities = [.45, .05, .3, .2]
             self.weak_predictors = [
                 WeakPredictor(
                     lstm_num_layers=np.random.randint(low=1, high=3, size=1)[0],
@@ -894,14 +896,52 @@ def aleatoric(self, features, num_samples):
 
     def save(self, path):
         """
-        Saves the trained model to the specified path.
+        Saves the model parameters and metadata to the specified path.
 
         Args:
             path (str): The path to save the model.
         """
-        if not self.trained:
-            raise ValueError("This model has not been trained yet. Please train the model before saving.")
+        # Prepare metadata for saving
+        metadata = {
+            'forcing_size': self.forcing_size,
+            'sle_size': self.sle_size,
+        }
+        metadata_path = path + '_metadata.json'
+
+        # Save metadata
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=4)
+
+        # Save model parameters
         torch.save(self.state_dict(), path)
+        print(f"Model and metadata saved to {path} and {metadata_path}, respectively.")
+
+    @staticmethod
+    def load(path):
+        """
+        Loads the NormalizingFlow model from the specified path.
+
+        Args:
+            path (str): The path to load the model from.
+
+        Returns:
+            NormalizingFlow: The loaded NormalizingFlow model.
+        """
+        # Load metadata
+        metadata_path = path + '_metadata.json'
+        with open(metadata_path, 'r') as f:
+            metadata = json.load(f)
+
+        # Reconstruct the model using the loaded metadata
+        model = NormalizingFlow(forcing_size=metadata['forcing_size'], sle_size=metadata['sle_size'])
+
+        # Load the model parameters
+        model.load_state_dict(torch.load(path))
+        model.eval()  # Set the model to evaluation mode
+
+        return model
+
+
 
 
 
@@ -982,7 +1022,7 @@ def fit(self, X, y, epochs=100, nf_epochs=None, de_epochs=None, sequence_length=
             self.deep_ensemble.fit(X_latent, y, epochs=de_epochs, sequence_length=sequence_length)
         self.trained = True
 
-    def forward(self, x):
+    def forward(self, x, smooth_projection=False,):
         """
         Performs a forward pass through the hybrid emulator.
 
@@ -1000,6 +1040,9 @@ def forward(self, x):
         X_latent = torch.concatenate((x, z), axis=1)
         prediction, epistemic = self.deep_ensemble(X_latent)
         aleatoric = self.normalizing_flow.aleatoric(x, 100)
+
+        if smooth_projection:
+            stop = ''
         return prediction, epistemic, aleatoric
 
     def save(self, save_dir):

diff --git a/ise/models/sector.py b/ise/models/sector.py
@@ -44,7 +44,7 @@ def forward(self, x):
         return self.model(x)
 
 
-class TimeSeriesEmulator(torch.nn.Module):
+class VariationalLSTMEmulator(torch.nn.Module):
     def __init__(self, architecture, mc_dropout=False, dropout_prob=None):
         super().__init__()
         self.model_name = "TimeSeriesEmulator"

diff --git a/ise/pipelines/testing.py b/ise/pipelines/testing.py
@@ -7,7 +7,7 @@
 
 from ise.evaluation._tests import binned_sle_table, test_pretrained_model
 from ise.evaluation.plots import SectorPlotter
-from ise.models.sector import TimeSeriesEmulator
+from ise.models.sector import VariationalLSTMEmulator
 from ise.utils.functions import (
     calculate_distribution_metrics,
     combine_testing_results,
@@ -87,7 +87,7 @@ def analyze_model(
     if isinstance(model_path, str):
         model = load_model(
             model_path=model_path,
-            model_class=TimeSeriesEmulator,
+            model_class=VariationalLSTMEmulator,
             architecture=architecture,
             mc_dropout=mc_dropout,
             dropout_prob=dropout_prob,

diff --git a/ise/pipelines/training.py b/ise/pipelines/training.py
@@ -12,7 +12,7 @@
 from torch import nn
 
 from ise.models.gp import GP
-from ise.models.sector import ExploratoryModel, TimeSeriesEmulator
+from ise.models.sector import ExploratoryModel, VariationalLSTMEmulator
 from ise.models.train import Trainer
 from ise.utils.functions import unscale_column
 
@@ -22,7 +22,7 @@ def train_timeseries_network(
     architecture: dict = None,
     epochs: int = 20,
     batch_size: int = 100,
-    model_class=TimeSeriesEmulator,
+    model_class=VariationalLSTMEmulator,
     loss=nn.MSELoss(),
     mc_dropout: bool = True,
     dropout_prob: float = 0.1,