added attributes to rsast too

IRKnyazev · IRKnyazev · commit 98ee17687d91 · 2024-08-23T13:53:52.000+07:00
diff --git a/aeon/transformations/collection/shapelet_based/_rsast.py b/aeon/transformations/collection/shapelet_based/_rsast.py
@@ -54,7 +54,8 @@ class RSAST(BaseCollectionTransformer):
 
     Parameters
     ----------
-    n_random_points: int default = 10 the number of initial random points to extract
+    n_random_points: int default = 10 
+        the number of initial random points to extract
     len_method:  string default="both" the type of statistical tool used to get
     the length of shapelets. "both"=ACF&PACF, "ACF"=ACF, "PACF"=PACF,
     "None"=Extract randomly any length from the TS
@@ -63,10 +64,27 @@ class RSAST(BaseCollectionTransformer):
         the number of reference time series to select per class
     seed : int, default = None
         the seed of the random generator
-    classifier : sklearn compatible classifier, default = None
+    estimator : sklearn compatible classifier, default = None
         if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used.
     n_jobs : int, default -1
         Number of threads to use for the transform.
+        
+    Attributes
+    ---------
+    _kernels : list
+        The z-normalized subsequences used for transformation.
+    _kernel_orig : list 
+        The original (non z-normalized) subsequences.
+    _start_positions : list 
+        The starting positions of each subsequence within the original time series.
+    _classes : list
+        The class labels associated with each subsequence.
+    _source_series: list
+        The index of the original time series in the training set from which each
+        subsequence was derived.
+    _kernels_generators_ : dict
+        A dictionary mapping class labels to the selected reference time series
+        for that class.
 
     References
     ----------
@@ -112,6 +130,9 @@ def __init__(
         self._kernels = None  # z-normalized subsequences
         self._cand_length_list = {}
         self._kernel_orig = []
+        self._start_positions = []  
+        self._classes = []  
+        self._source_series = []  # To store the index of the original time series
         self._kernels_generators = {}  # Reference time series
         super().__init__()
 
@@ -154,7 +175,12 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
         self.num_classes = classes.shape[0]
         m_kernel = 0
 
-        # 1--calculate ANOVA per each time t throught the lenght of the TS
+        # Initialize lists to store start positions, classes, and source series
+        self._start_positions = []
+        self._classes = []
+        self._source_series = []
+
+        # 1--calculate ANOVA per each time t throughout the length of the TS
         for i in range(X_.shape[1]):
             statistic_per_class = {}
             for c in classes:
@@ -184,12 +210,16 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
             X_c = X_[y == c]
 
             cnt = np.min([self.nb_inst_per_class, X_c.shape[0]]).astype(int)
+            
+            # Store the original indices of the sampled time series
+            original_indices = np.where(y == c)[0]
 
-            choosen = self._random_state.permutation(X_c.shape[0])[:cnt]
+            chosen_indices = self._random_state.permutation(X_c.shape[0])[:cnt]
 
             self._kernels_generators[c] = []
 
-            for rep, idx in enumerate(choosen):
+            for rep, idx in enumerate(chosen_indices):
+                original_idx = original_indices[idx]  # Get the original index
                 # defining indices for length list
                 idx_len_list = c + "," + str(idx) + "," + str(rep)
 
@@ -290,6 +320,11 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
                         self._kernel_orig.append(np.squeeze(kernel))
                         self._kernels_generators[c].extend(X_c[idx].reshape(1, -1))
 
+                        # Store the start position, class, and the original index in the training set
+                        self._start_positions.append(i)
+                        self._classes.append(c)
+                        self._source_series.append(original_idx)
+
         # 3--save the calculated subsequences
         n_kernels = len(self._kernel_orig)
 
diff --git a/aeon/transformations/collection/shapelet_based/_sast.py b/aeon/transformations/collection/shapelet_based/_sast.py
@@ -70,9 +70,11 @@ class SAST(BaseCollectionTransformer):
     _classes : list
         The class labels associated with each subsequence.
     _source_series: list
-        The index of the original time series in the training set from which each subsequence was derived.
+        The index of the original time series in the training set from which each
+        subsequence was derived.
     kernels_generators_ : dict
-        A dictionary mapping class labels to the selected reference time series for that class.
+        A dictionary mapping class labels to the selected reference time series
+        for that class.
 
 
     References