-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_model.py
185 lines (166 loc) · 8.51 KB
/
run_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# Import required libraries
# Core Usage
from Core.Model import Workflow
# Regression Usage
from Regression.OrdinaryLeastSquare import MLR, POLY
from Regression.SupportVectorRegression import SVR
from Regression.DecisionTreeRegression import DTR
from Regression.RandomForestRegression import RFR
# Classification Usage
from Classification.LinearClassifiers import LogSR
import warnings
# Ignore warnings
warnings.filterwarnings('ignore')
class RunModel(Workflow):
def __init__(self, input_path, job_type, target, features=['ALL'], algorithms=['ALL'], file_format='csv',
missing_value=False, search_value=None, feature_selection=None, feature_scaling=False,
encode_categorical=False, binary_transform=False, categorical_features=None, binary_classifier=False
):
"""
Object Initialization for run_model
:param job_type: type of model that you are trying to apply
It could be any one of below
- Regression
- Classification
:param input_path: path to load input file
:param target: Target variable [Dependent Variable(Y)]
:param features: Features of matrix of X [Independent Variable]
:param algorithms: list of algorithms to be applied according to job_type, it can be anyone from below
- ALL --> ALL IN
- MLR --> MultiLinearRegression
- POLY --> PolynomialRegression
- SVR --> SupportVectorRegression
- DTREE --> DecisionTreeRegression & DecisionTreeClassification
- RFR --> RandomForestRegression
- SIGMOID --> LogisticRegression
- KNN --> K-NearestNeighbours Classifier
- SVM --> SupportVectorMachine Classifier
- RFC --> RandomForestClassifier
- BAYESIAN --> NaiveBayesClassifier
:param file_format: input file format to be read
:param missing_value: True, if dataset contains any missing value, default to False
:param search_value: applicable when missing_value is True, default to None
:param feature_selection: Apply Feature selection on Regression model which follows the below feature selection
procedures. for feature selection we are using statsmodel library, it is a iterative approach,
by default it is None
- BE: BackWard-Elimination
- FS: Forward-Selection
:param feature_scaling: scale matix of feature od X, default to False, it must be true when feature_selection
is enabled
:param encode_categorical: encodes categorical data to integer format when it sets True, default to False
:param binary_transform: binzrie encoded categorical data to binary format, default to False
:param categorical_features: list of categorical features to be encoded
:param binary_classifier: defines whether the target variable is Binary or not, defaulted to False.
If true Logistic Regression would take into consideration.
If false remaining classifiers only considered
"""
super().__init__(input_path, file_format, target, features, missing_value, search_value, job_type,
feature_selection, feature_scaling, encode_categorical, binary_transform,
categorical_features)
self.__job_type = job_type
self.__binary_classifier = binary_classifier
self.__algorithms = self.evaluate_algorithms(algorithms)
self.estimator_summary = dict()
def evaluate_algorithms(self, algorithms):
"""
Function will evaluate input parameters and drives the model accordingly
:param algorithms: list of algorithms that you wanted to apply on dataset
:return:
"""
# Raise Exception if algorithms is not a list
if not isinstance(algorithms, list):
raise TypeError('"algorithms" must be a type of list not a type of {}'.format(type(algorithms)))
# Checks Regression algorithms in supported list
if self.__job_type.lower() == 'regression':
supported_algorithms = ['ALL', 'MLR', 'POLY', 'SVR', 'DTREE', 'RFR']
for alg in algorithms:
if alg not in supported_algorithms:
raise Exception('Given regression algorithm "{}" not supported currently, '
'please use any one from the list {}'.format(alg, supported_algorithms))
# Checks Classification algorithms in supported list
elif self.__job_type.lower() == 'classification':
supported_algorithms = ['ALL', 'SIGMOID', 'KNN', 'SVM', 'DTREE', 'RFC', 'BAYESIAN']
for alg in algorithms:
if alg not in supported_algorithms:
raise Exception('Given classification algorithm "{}" not supported currently, '
'please use any one from the list {}'.format(alg, supported_algorithms))
# Raise Exception if the give job_type is not applicable
else:
raise TypeError('Given job_type "{}" not supported currently'.format(self.__job_type))
return algorithms
def run(self):
"""
Function to run models based on the algorithms parameter
:return: Status summary of job
"""
for algo in self.__algorithms:
if algo.upper() == 'MLR':
print('Running Multi-linear Regression...!')
estimator = MLR()
self.estimator_summary[algo] = super().run_estimator(estimator)
elif algo.upper() == 'POLY':
print('Running Polynomial Regression...!')
estimator = POLY()
self.estimator_summary[algo] = super().run_estimator(estimator)
elif algo.upper() == 'SVR':
print('Running Support Vector Regression...!')
estimator = SVR()
self.estimator_summary[algo] = super().run_estimator(estimator)
elif algo.upper() == 'DTREE':
if self.__job_type.lower() == 'regression':
print('Running Decision Tree Regression...!')
estimator = DTR()
self.estimator_summary[algo] = super().run_estimator(estimator)
else:
raise TypeError('DTree for Classification not found')
elif algo.upper() == 'RFR':
print('Running Random Forest Regression...!')
estimator = RFR()
self.estimator_summary[algo] = super().run_estimator(estimator)
elif algo.upper() == 'SIGMOID':
if self.__binary_classifier:
print('Running Logistic Regression...!')
estimator = LogSR()
self.estimator_summary[algo] = super().run_estimator(estimator)
def get_summary(self, model_name='ALL'):
"""
Function will return model summary for specified mode, default to ALL
:param model_name: name of model which we are trying to get summary
:return: best fit model summary
"""
if not isinstance(model_name, str):
raise TypeError('model_name must be string, not a type of {}'.format(type(model_name)))
if model_name != 'ALL':
print('Model Summary: {}'.format(model_name))
print(self.estimator_summary[model_name])
else:
print('Model Summary:')
print(self.estimator_summary)
if __name__ == '__main__':
# Test Car-MPG Prediction
# Load input dataset [carMPG, CarPricePrediction]
file_path = 'E:\\Project Work\\SpyderWorkspace\\UCI-UseCases\\dataset\\Regression\\carMPG.csv'
# Initialize object
obj = RunModel(job_type='Regression',
input_path=file_path,
target='MPG',
features=['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model_year',
'Origin'],
algorithms=['MLR', 'POLY', 'SVR', 'DTREE', 'RFR'],
file_format='csv',
missing_value=True,
search_value='?',
feature_selection=None,
feature_scaling=True,
encode_categorical=None,
binary_transform=False,
categorical_features=None
)
# Run models
obj.run()
# Get Model Summary
obj.get_summary('MLR')
obj.get_summary('POLY')
obj.get_summary('SVR')
obj.get_summary('DTREE')
obj.get_summary('RFR')