-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsl_models.py
63 lines (50 loc) · 2.04 KB
/
sl_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
## shallow learning methods,
# e.g., logistic regression, Random Forest, SVM,
# to fit the synthetic CTIS data.
class Imputer(BaseEstimator, TransformerMixin):
def __init__(self, features, method='constant', value='missing'):
self.features = features
self.method = method
self.value = value
def fit(self, X, y=None):
if self.method=='mean':
self.value = X[self.features].mean()
return self
def transform(self, X):
X_transformed = X.copy()
X_transformed[self.features] = X[self.features].fillna(self.value)
return X_transformed
class Scaler(BaseEstimator, TransformerMixin):
def __init__(self, features):
self.features = features
def fit(self, X, y=None):
self.min = X[self.features].min()
self.range = X[self.features].max()-self.min
return self
def transform(self, X):
X_transformed = X.copy()
X_transformed[self.features] = (X[self.features]-self.min)/self.range
return X_transformed
class Encoder(BaseEstimator, TransformerMixin):
def __init__(self, features, drop='first'):
self.features = features
self.drop = drop
def fit(self, X, y=None):
self.encoder = OneHotEncoder(sparse=False, drop=self.drop)
self.encoder.fit(X[self.features])
return self
def transform(self, X):
X_transformed = pd.concat([X.drop(columns=self.features).reset_index(drop=True),
pd.DataFrame(self.encoder.transform(X[self.features]),
columns=self.encoder.get_feature_names_out(self.features))],
axis=1)
return X_transformed
if __name__ == "__main__":
pipe = Pipeline([
('num_imputer', Imputer(NUMERICAL, method='mean')),
('scaler', Scaler(NUMERICAL)),
('cat_imputer', Imputer(CATEGORICAL)),
('encoder', Encoder(CATEGORICAL)),
('model', LogisticRegression())
])
pipe.fit(X_train, y_train)