-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
141 lines (113 loc) · 4.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import sys
import keras
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from src.pythonSQL.v3.data_loader_v3 import get_data
from model.nn.shipType import gen_compiled_ship_type_classifier_model
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, f1_score
from imblearn.over_sampling import SMOTE
import model.sciKitModels as sciKitModels
def train(use_smote: bool = False, df_column_suffix: str = "") -> pd.DataFrame:
# Get data from pythonSQL V3
dataset = get_data()
# Prepare Data
df_y = dataset.pop("response")
df_x = dataset
# Smote Oversampling
if use_smote:
smote = SMOTE()
df_x, df_y = smote.fit_resample(df_x, df_y)
scaler = StandardScaler()
df_x = scaler.fit_transform(df_x)
df_on_hot_y = pd.get_dummies(df_y)
test_size = 0.4
random_state = 1
# Split Data into Train and Test
df_train_x, df_test_x = train_test_split(
df_x, test_size=test_size, random_state=random_state
)
df_train_y, df_test_y = train_test_split(
df_y, test_size=test_size, random_state=random_state
)
df_train_one_hot_y, df_test_one_hot_y = train_test_split(
df_on_hot_y, test_size=test_size, random_state=random_state
)
input_dim = df_train_x.shape[1]
output_dim = df_train_one_hot_y.shape[1]
# Dataframe wich stores Test results
df_results = pd.DataFrame()
# Create Callbacks for Keras Model here
callback_early_stop = keras.callbacks.EarlyStopping(
monitor="val_accuracy", patience=5, min_delta=0.01, start_from_epoch=10
)
# Model 1: used to classify ship type based on a trip
ship_type_classifier = gen_compiled_ship_type_classifier_model(
input_dim=input_dim, output_dim=output_dim
)
ship_type_classifier.fit(
x=df_train_x,
y=df_train_one_hot_y,
epochs=50,
batch_size=1,
shuffle=True,
verbose=1,
validation_split=0.2,
callbacks=[callback_early_stop],
)
# Score the ship type classifier
dict_network_score = {}
dict_network_score["model Name"] = "Ship Type Classifier"
network_prediction_y = ship_type_classifier.predict(df_test_x).argmax(axis=1)
network_balanced_acc = balanced_accuracy_score(
y_true=df_test_y, y_pred=network_prediction_y
)
print("Network Balanced Acc:", network_balanced_acc)
dict_network_score[f"Balanced Accuracy {df_column_suffix}"] = round(
network_balanced_acc, 3
)
network_f1 = f1_score(df_test_y, network_prediction_y)
print("Network F1 Score:", network_f1)
dict_network_score[f"F1 Score {df_column_suffix}"] = round(network_f1, 3)
df_tmp = pd.DataFrame([dict_network_score])
df_results = pd.concat([df_results, df_tmp], ignore_index=True)
# print(ship_type_classifier.summary())
# Model 2: used to reconstruct missing AIS Data based on a trip
# reproduction_lstm = gen_compiled_LSTM_model()
# reproduction_lstm.fit(x=x_2, epochs=20, callbacks=[callback])
# get SciKit Models and their parameters
# modify the model Parameters in the model/sciKitModels.py file
models, params = sciKitModels.getAllModels()
for model, param in zip(models, params):
clf = GridSearchCV(estimator=model, param_grid=param, scoring="f1", refit=True)
clf.fit(X=df_train_x, y=df_train_y)
best_model_pred = clf.predict(df_test_x)
tmp_dict = {}
model_name = model.__class__.__name__
print("Model: ", model_name)
tmp_dict["model Name"] = model_name
best_parameter = clf.best_params_
print("Parameter:", best_parameter)
tmp_dict[f"best parameter {df_column_suffix}"] = best_parameter
f_one = round(f1_score(df_test_y, best_model_pred), 3)
print("F1 score:", f_one)
tmp_dict[f"F1 Score {df_column_suffix}"] = f_one
balanced_acc = round(balanced_accuracy_score(df_test_y, best_model_pred), 3)
print("balanced test Acc:", balanced_acc)
tmp_dict[f"Balanced Accuracy {df_column_suffix}"] = balanced_acc
print(
"confusion Matrix:\n",
confusion_matrix(df_test_y, best_model_pred),
)
print()
df_tmp = pd.DataFrame([tmp_dict])
df_results = pd.concat([df_results, df_tmp], ignore_index=True)
return df_results
# model_list = [ship_type_classifier, reproduction_lstm]
# return model_list
if __name__ == "__main__":
df_no_smote = train()
df_smote = train(use_smote=True, df_column_suffix="SMOTE")
df_results = pd.merge(df_no_smote, df_smote, on="model Name")
df_results.to_csv("figures/results.csv")