-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathtrain.py
70 lines (54 loc) · 1.92 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import skops.io as sio
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
## Loading the Data
drug_df = pd.read_csv("Data/drug.csv")
drug_df = drug_df.sample(frac=1)
## Train Test Split
from sklearn.model_selection import train_test_split
X = drug_df.drop("Drug", axis=1).values
y = drug_df.Drug.values
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=125
)
## Pipeline
cat_col = [1,2,3]
num_col = [0,4]
transform = ColumnTransformer(
[
("encoder", OrdinalEncoder(), cat_col),
("num_imputer", SimpleImputer(strategy="median"), num_col),
("num_scaler", StandardScaler(), num_col),
]
)
pipe = Pipeline(
steps=[
("preprocessing", transform),
("model", RandomForestClassifier(n_estimators=10, random_state=125)),
]
)
## Training
pipe.fit(X_train, y_train)
## Model Evaluation
predictions = pipe.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
f1 = f1_score(y_test, predictions, average="macro")
print("Accuracy:", str(round(accuracy, 2) * 100) + "%", "F1:", round(f1, 2))
## Confusion Matrix Plot
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
predictions = pipe.predict(X_test)
cm = confusion_matrix(y_test, predictions, labels=pipe.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_)
disp.plot()
plt.savefig("./Results/model_results.png", dpi=120)
## Write metrics to file
with open("./Results/metrics.txt", "w") as outfile:
outfile.write(f"\nAccuracy = {round(accuracy, 2)}, F1 Score = {round(f1, 2)}")
## Saving the model file
sio.dump(pipe, "./Model/drug_pipeline.skops")