Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Added useful class attributes to SAST based transforms #2006

Merged
merged 16 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions aeon/classification/shapelet_based/_sast.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ def plot_most_important_feature_on_ts(self, ts, feature_importance, limit: int =
"""
import matplotlib.pyplot as plt

# get overall importance irrespective of class
feature_importance = [abs(x) for x in feature_importance]

features = zip(self._transformer._kernel_orig, feature_importance)
sorted_features = sorted(features, key=itemgetter(1), reverse=True)

Expand Down
29 changes: 23 additions & 6 deletions aeon/transformations/collection/shapelet_based/_rsast.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ class RSAST(BaseCollectionTransformer):

Parameters
----------
n_random_points: int default = 10 the number of initial random points to extract
n_random_points: int default = 10
the number of initial random points to extract
len_method: string default="both" the type of statistical tool used to get
the length of shapelets. "both"=ACF&PACF, "ACF"=ACF, "PACF"=PACF,
"None"=Extract randomly any length from the TS
Expand All @@ -65,8 +66,6 @@ class RSAST(BaseCollectionTransformer):
the number of reference time series to select per class
seed : int, default = None
the seed of the random generator
classifier : sklearn compatible classifier, default = None
if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used.
n_jobs : int, default -1
Number of threads to use for the transform.

Expand Down Expand Up @@ -114,6 +113,9 @@ def __init__(
self._kernels = None # z-normalized subsequences
self._cand_length_list = {}
self._kernel_orig = []
self._start_points = []
self._classes = []
self._source_series = [] # To store the index of the original time series
self._kernels_generators = {} # Reference time series
super().__init__()

Expand Down Expand Up @@ -156,7 +158,12 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "RSAST":
self.num_classes = classes.shape[0]
m_kernel = 0

# 1--calculate ANOVA per each time t throught the lenght of the TS
# Initialize lists to store start positions, classes, and source series
self._start_points = []
self._classes = []
self._source_series = []

# 1--calculate ANOVA per each time t throughout the length of the TS
for i in range(X_.shape[1]):
statistic_per_class = {}
for c in classes:
Expand Down Expand Up @@ -187,11 +194,15 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "RSAST":

cnt = np.min([self.nb_inst_per_class, X_c.shape[0]]).astype(int)

choosen = self._random_state.permutation(X_c.shape[0])[:cnt]
# Store the original indices of the sampled time series
original_indices = np.where(y == c)[0]

chosen_indices = self._random_state.permutation(X_c.shape[0])[:cnt]

self._kernels_generators[c] = []

for rep, idx in enumerate(choosen):
for rep, idx in enumerate(chosen_indices):
original_idx = original_indices[idx] # Get the original index
# defining indices for length list
idx_len_list = c + "," + str(idx) + "," + str(rep)

Expand Down Expand Up @@ -292,6 +303,12 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "RSAST":
self._kernel_orig.append(np.squeeze(kernel))
self._kernels_generators[c].extend(X_c[idx].reshape(1, -1))

# Store the start position,
# class, and the original index in the training set
self._start_points.append(i)
self._classes.append(c)
self._source_series.append(original_idx)

# 3--save the calculated subsequences
n_kernels = len(self._kernel_orig)

Expand Down
30 changes: 25 additions & 5 deletions aeon/transformations/collection/shapelet_based/_sast.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,18 @@ class SAST(BaseCollectionTransformer):
----------
lengths : int[], default = None
an array containing the lengths of the subsequences
to be generated. If None, will be infered during fit
to be generated. If None, will be inferred during fit
as np.arange(3, X.shape[1])
stride : int, default = 1
the stride used when generating subsquences
nb_inst_per_class : int default = 1
the stride used when generating subsequences
nb_inst_per_class : int, default = 1
the number of reference time series to select per class
seed : int, default = None
the seed of the random generator
n_jobs : int, default -1
Number of threads to use for the transform.
The available cpu count is used if this value is less than 1
The available CPU count is used if this value is less than 1


References
----------
Expand Down Expand Up @@ -104,6 +105,9 @@ def __init__(
self.nb_inst_per_class = nb_inst_per_class
self._kernels = None # z-normalized subsequences
self._kernel_orig = None # non z-normalized subsequences
self._start_points = [] # To store the start positions
self._classes = [] # To store the class of each shapelet
self._source_series = [] # To store the index of the original time series
self.kernels_generators_ = {} # Reference time series
self.n_jobs = n_jobs
self.seed = seed
Expand Down Expand Up @@ -137,8 +141,10 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":

classes = np.unique(y)
self._num_classes = classes.shape[0]

class_values_of_candidates = []
candidates_ts = []
source_series_indices = [] # List to store original indices

for c in classes:
X_c = X_[y == c]

Expand All @@ -148,6 +154,10 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":
choosen = self._random_state.permutation(X_c.shape[0])[:cnt]
candidates_ts.append(X_c[choosen])
self.kernels_generators_[c] = X_c[choosen]
class_values_of_candidates.extend([c] * cnt)
source_series_indices.extend(
np.where(y == c)[0][choosen]
) # Record the original indices

candidates_ts = np.concatenate(candidates_ts, axis=0)

Expand All @@ -163,6 +173,9 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":
(n_kernels, max_shp_length), dtype=np.float32, fill_value=np.nan
)
self._kernel_orig = []
self._start_points = [] # Reset start positions
self._classes = [] # Reset class information
self._source_series = [] # Reset source series information

k = 0
for shp_length in self._length_list:
Expand All @@ -172,6 +185,13 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, list]) -> "SAST":
can = np.squeeze(candidates_ts[i][j:end])
self._kernel_orig.append(can)
self._kernels[k, :shp_length] = z_normalise_series(can)
self._start_points.append(j) # Store the start position
self._classes.append(
class_values_of_candidates[i]
) # Store the class of the shapelet
self._source_series.append(
source_series_indices[i]
) # Store the original index of the time series
k += 1
return self

Expand Down
160 changes: 160 additions & 0 deletions examples/classification/SastVsViz.ipynb

Large diffs are not rendered by default.