Skip to content

Commit

Permalink
before master merge
Browse files Browse the repository at this point in the history
  • Loading branch information
pvankatwyk committed Mar 20, 2024
1 parent af2f5db commit f3abb42
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 14 deletions.
72 changes: 72 additions & 0 deletions ise/data/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ def __len__(self):
return self.X.shape[0] * self.X.shape[1]

def __getitem__(self, i):
"""
Returns the i-th item in the dataset.
Args:
i (int): Index of the item to retrieve.
Returns:
If `y` is None, returns the input sequence at index `i` as a PyTorch tensor.
Otherwise, returns a tuple containing the input sequence at index `i` and the corresponding target value.
"""
# Calculate projection index and timestep index
projection_index = i // self.num_timesteps
time_step_index = i % self.num_timesteps
Expand Down Expand Up @@ -104,30 +114,92 @@ def __getitem__(self, i):


class PyTorchDataset(Dataset):
"""
A PyTorch dataset for general data loading.
Args:
X (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The input data.
y (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The target data.
Methods:
__getitem__(index): Returns the item at the given index.
__len__(): Returns the length of the dataset.
"""

def __init__(self, X, y):
self.X_data = X
self.y_data = y

def __getitem__(self, index):
"""
Returns the item at the given index.
Args:
index (int): Index of the item to retrieve.
Returns:
If `y` is None, returns the input data at index `index`.
Otherwise, returns a tuple containing the input data at index `index` and the corresponding target value.
"""
if self.y_data is None:
return self.X_data[index]
return self.X_data[index], self.y_data[index]

def __len__(self):
"""
Returns the length of the dataset.
Returns:
The length of the dataset.
"""
return len(self.X_data)


class TSDataset(Dataset):
"""
A PyTorch dataset for time series data.
Args:
X (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The input data.
y (pandas.DataFrame, numpy.ndarray, or torch.Tensor): The target data.
sequence_length (int): The length of the input sequence.
Attributes:
X (torch.Tensor): The input data as a PyTorch tensor.
y (torch.Tensor): The target data as a PyTorch tensor.
sequence_length (int): The length of the input sequence.
Methods:
__len__(): Returns the length of the dataset.
__getitem__(i): Returns the i-th item in the dataset.
"""

def __init__(self, X, y, sequence_length=5):
super().__init__()
self.X = X
self.y = y
self.sequence_length = sequence_length

def __len__(self):
"""
Returns the length of the dataset.
Returns:
The length of the dataset.
"""
return len(self.X)

def __getitem__(self, i):
"""
Returns the i-th item in the dataset.
Args:
i (int): Index of the item to retrieve.
Returns:
If `y` is None, returns the input sequence at index `i` as a PyTorch tensor.
Otherwise, returns a tuple containing the input sequence at index `i` and the corresponding target value.
"""
if i >= self.sequence_length - 1:
i_start = i - self.sequence_length + 1
x = self.X[i_start : (i + 1), :]
Expand Down
6 changes: 3 additions & 3 deletions ise/models/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from torch import nn

from ise.data._EmulatorData import EmulatorData
from ise.models.sector import ExploratoryModel, TimeSeriesEmulator
from ise.models.sector import ExploratoryModel, VariationalLSTMEmulator
from ise.models.train import Trainer
from ise.utils.functions import (
_structure_architecture_args,
Expand All @@ -18,7 +18,7 @@ def lag_sequence_test(
lag_array,
sequence_array,
iterations,
model_class=TimeSeriesEmulator,
model_class=VariationalLSTMEmulator,
emulator_data_args=None,
architecture=None,
verbose=True,
Expand Down Expand Up @@ -91,7 +91,7 @@ def rnn_architecture_test(
rnn_layers_array: List[int],
hidden_nodes_array: List[int],
iterations: int,
model_class=TimeSeriesEmulator,
model_class=VariationalLSTMEmulator,
verbose: bool = True,
epochs: int = 100,
batch_size: int = 100,
Expand Down
55 changes: 49 additions & 6 deletions ise/models/grid.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@



import json
import os
import warnings
Expand All @@ -16,7 +19,6 @@

class PCA(nn.Module):
def __init__(self, n_components):

"""
Principal Component Analysis (PCA) model.
Expand Down Expand Up @@ -587,7 +589,7 @@ def __init__(
"forcing_size and sle_size must be provided if weak_predictors is not provided"
)
self.loss_choices = [torch.nn.MSELoss(), MSEDeviationLoss(threshold=1.0, penalty_multiplier=2.0), torch.nn.L1Loss(), torch.nn.HuberLoss()]
loss_probabilities = [.50, .15, .15, .2]
loss_probabilities = [.45, .05, .3, .2]
self.weak_predictors = [
WeakPredictor(
lstm_num_layers=np.random.randint(low=1, high=3, size=1)[0],
Expand Down Expand Up @@ -894,14 +896,52 @@ def aleatoric(self, features, num_samples):

def save(self, path):
"""
Saves the trained model to the specified path.
Saves the model parameters and metadata to the specified path.
Args:
path (str): The path to save the model.
"""
if not self.trained:
raise ValueError("This model has not been trained yet. Please train the model before saving.")
# Prepare metadata for saving
metadata = {
'forcing_size': self.forcing_size,
'sle_size': self.sle_size,
}
metadata_path = path + '_metadata.json'

# Save metadata
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=4)

# Save model parameters
torch.save(self.state_dict(), path)
print(f"Model and metadata saved to {path} and {metadata_path}, respectively.")

@staticmethod
def load(path):
"""
Loads the NormalizingFlow model from the specified path.
Args:
path (str): The path to load the model from.
Returns:
NormalizingFlow: The loaded NormalizingFlow model.
"""
# Load metadata
metadata_path = path + '_metadata.json'
with open(metadata_path, 'r') as f:
metadata = json.load(f)

# Reconstruct the model using the loaded metadata
model = NormalizingFlow(forcing_size=metadata['forcing_size'], sle_size=metadata['sle_size'])

# Load the model parameters
model.load_state_dict(torch.load(path))
model.eval() # Set the model to evaluation mode

return model





Expand Down Expand Up @@ -982,7 +1022,7 @@ def fit(self, X, y, epochs=100, nf_epochs=None, de_epochs=None, sequence_length=
self.deep_ensemble.fit(X_latent, y, epochs=de_epochs, sequence_length=sequence_length)
self.trained = True

def forward(self, x):
def forward(self, x, smooth_projection=False,):
"""
Performs a forward pass through the hybrid emulator.
Expand All @@ -1000,6 +1040,9 @@ def forward(self, x):
X_latent = torch.concatenate((x, z), axis=1)
prediction, epistemic = self.deep_ensemble(X_latent)
aleatoric = self.normalizing_flow.aleatoric(x, 100)

if smooth_projection:
stop = ''
return prediction, epistemic, aleatoric

def save(self, save_dir):
Expand Down
2 changes: 1 addition & 1 deletion ise/models/sector.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def forward(self, x):
return self.model(x)


class TimeSeriesEmulator(torch.nn.Module):
class VariationalLSTMEmulator(torch.nn.Module):
def __init__(self, architecture, mc_dropout=False, dropout_prob=None):
super().__init__()
self.model_name = "TimeSeriesEmulator"
Expand Down
4 changes: 2 additions & 2 deletions ise/pipelines/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from ise.evaluation._tests import binned_sle_table, test_pretrained_model
from ise.evaluation.plots import SectorPlotter
from ise.models.sector import TimeSeriesEmulator
from ise.models.sector import VariationalLSTMEmulator
from ise.utils.functions import (
calculate_distribution_metrics,
combine_testing_results,
Expand Down Expand Up @@ -87,7 +87,7 @@ def analyze_model(
if isinstance(model_path, str):
model = load_model(
model_path=model_path,
model_class=TimeSeriesEmulator,
model_class=VariationalLSTMEmulator,
architecture=architecture,
mc_dropout=mc_dropout,
dropout_prob=dropout_prob,
Expand Down
4 changes: 2 additions & 2 deletions ise/pipelines/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torch import nn

from ise.models.gp import GP
from ise.models.sector import ExploratoryModel, TimeSeriesEmulator
from ise.models.sector import ExploratoryModel, VariationalLSTMEmulator
from ise.models.train import Trainer
from ise.utils.functions import unscale_column

Expand All @@ -22,7 +22,7 @@ def train_timeseries_network(
architecture: dict = None,
epochs: int = 20,
batch_size: int = 100,
model_class=TimeSeriesEmulator,
model_class=VariationalLSTMEmulator,
loss=nn.MSELoss(),
mc_dropout: bool = True,
dropout_prob: float = 0.1,
Expand Down

0 comments on commit f3abb42

Please sign in to comment.