@@ -54,7 +54,8 @@ class RSAST(BaseCollectionTransformer):
54
54
55
55
Parameters
56
56
----------
57
- n_random_points: int default = 10 the number of initial random points to extract
57
+ n_random_points: int default = 10
58
+ the number of initial random points to extract
58
59
len_method: string default="both" the type of statistical tool used to get
59
60
the length of shapelets. "both"=ACF&PACF, "ACF"=ACF, "PACF"=PACF,
60
61
"None"=Extract randomly any length from the TS
@@ -63,10 +64,27 @@ class RSAST(BaseCollectionTransformer):
63
64
the number of reference time series to select per class
64
65
seed : int, default = None
65
66
the seed of the random generator
66
- classifier : sklearn compatible classifier, default = None
67
+ estimator : sklearn compatible classifier, default = None
67
68
if None, a RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) is used.
68
69
n_jobs : int, default -1
69
70
Number of threads to use for the transform.
71
+
72
+ Attributes
73
+ ---------
74
+ _kernels : list
75
+ The z-normalized subsequences used for transformation.
76
+ _kernel_orig : list
77
+ The original (non z-normalized) subsequences.
78
+ _start_positions : list
79
+ The starting positions of each subsequence within the original time series.
80
+ _classes : list
81
+ The class labels associated with each subsequence.
82
+ _source_series: list
83
+ The index of the original time series in the training set from which each
84
+ subsequence was derived.
85
+ _kernels_generators_ : dict
86
+ A dictionary mapping class labels to the selected reference time series
87
+ for that class.
70
88
71
89
References
72
90
----------
@@ -112,6 +130,9 @@ def __init__(
112
130
self ._kernels = None # z-normalized subsequences
113
131
self ._cand_length_list = {}
114
132
self ._kernel_orig = []
133
+ self ._start_positions = []
134
+ self ._classes = []
135
+ self ._source_series = [] # To store the index of the original time series
115
136
self ._kernels_generators = {} # Reference time series
116
137
super ().__init__ ()
117
138
@@ -154,7 +175,12 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
154
175
self .num_classes = classes .shape [0 ]
155
176
m_kernel = 0
156
177
157
- # 1--calculate ANOVA per each time t throught the lenght of the TS
178
+ # Initialize lists to store start positions, classes, and source series
179
+ self ._start_positions = []
180
+ self ._classes = []
181
+ self ._source_series = []
182
+
183
+ # 1--calculate ANOVA per each time t throughout the length of the TS
158
184
for i in range (X_ .shape [1 ]):
159
185
statistic_per_class = {}
160
186
for c in classes :
@@ -184,12 +210,16 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
184
210
X_c = X_ [y == c ]
185
211
186
212
cnt = np .min ([self .nb_inst_per_class , X_c .shape [0 ]]).astype (int )
213
+
214
+ # Store the original indices of the sampled time series
215
+ original_indices = np .where (y == c )[0 ]
187
216
188
- choosen = self ._random_state .permutation (X_c .shape [0 ])[:cnt ]
217
+ chosen_indices = self ._random_state .permutation (X_c .shape [0 ])[:cnt ]
189
218
190
219
self ._kernels_generators [c ] = []
191
220
192
- for rep , idx in enumerate (choosen ):
221
+ for rep , idx in enumerate (chosen_indices ):
222
+ original_idx = original_indices [idx ] # Get the original index
193
223
# defining indices for length list
194
224
idx_len_list = c + "," + str (idx ) + "," + str (rep )
195
225
@@ -290,6 +320,11 @@ def _fit(self, X: np.ndarray, y: Union[np.ndarray, List]) -> "RSAST":
290
320
self ._kernel_orig .append (np .squeeze (kernel ))
291
321
self ._kernels_generators [c ].extend (X_c [idx ].reshape (1 , - 1 ))
292
322
323
+ # Store the start position, class, and the original index in the training set
324
+ self ._start_positions .append (i )
325
+ self ._classes .append (c )
326
+ self ._source_series .append (original_idx )
327
+
293
328
# 3--save the calculated subsequences
294
329
n_kernels = len (self ._kernel_orig )
295
330
0 commit comments