-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_scalers.py
75 lines (56 loc) · 2.26 KB
/
data_scalers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Create a function to fits the scaler for the dataset with MinMaxScaler
def data_preprocessing_minmax_scaler_fit(data):
"""
Fits the scaler for the dataset with MinMaxScaler.
Parameters:
- data (pandas.DataFrame): The dataset to scale.
Returns:
- MinMaxScaler: The fitted scaler.
Raises:
- ValueError: If the specified component amount is greater than the number of features in the dataset.
"""
try:
# Scale data before applying PCA
scaling = MinMaxScaler()
# Fit the scaler to the data and transform it
scaler = scaling.fit(data)
return scaler
except ValueError:
raise ValueError("The specified component amount is greater than the number of features in the dataset.")
# Create a function to transform the dataset with MinMaxScaler
def data_preprocessing_minmax_scaler_transform(scaler, data):
"""
Transform the dataset with MinMaxScaler.
Parameters:
- scaler (MinMaxScaler): The fitted scaler.
Returns:
- numpy.ndarray: The scaled dataset.
Raises:
- ValueError: If the specified component amount is greater than the number of features in the dataset.
"""
try:
# Transform the data
Scaled_data = scaler.transform(data)
return Scaled_data
except ValueError:
raise ValueError("The specified component amount is greater than the number of features in the dataset.")
# Create a function to fits the scaler for the dataset with StandardScaler
def data_preprocessing_std_scaler(data):
"""
Fits the scaler for the dataset with StandardScaler.
Parameters:
- data (pandas.DataFrame): The dataset to scale.
Returns:
- numpy.ndarray: The scaled dataset.
Raises:
- ValueError: If the specified component amount is greater than the number of features in the dataset.
"""
try:
# Scale data before applying PCA
scaling = StandardScaler()
# Fit the scaler to the data and transform it
Scaled_data = scaling.fit_transform(data)
return Scaled_data
except ValueError:
raise ValueError("The specified component amount is greater than the number of features in the dataset.")