-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLGBM.py
247 lines (185 loc) · 11.5 KB
/
LGBM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import tensorflow as tf
import streamlit as st
from streamlit.components import v1 as components
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
import joblib
from data_preprocessing_LGBM import data_preprocessing
st.set_page_config(layout="wide")
# Load part 1
with open('LGBM_1.html', 'r') as file:
html_content_1 = file.read()
components.html(html_content_1, width = None, height=1000)
#Add some spacing between elements on the website
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.markdown("""
<style>
.element-container {
margin-bottom: 0px;
}
.stSlider, .stSelectbox {
padding-bottom: 10px;
}
.stMarkdown {
padding-bottom: 1px;
}
</style>
""", unsafe_allow_html=True)
st.markdown("""
<div style="text-align: center">
<h1 style="color: #7af5b9;">Using my Best Deployed and Saved Light Gradient Boosting Model (LGBM)</h1>
<h3 style="color: #7af5b9;">Please select the values that you want, and then press the "predict" button to see if they would survive the titanic</h3>
<h3 style="color: #7af5b9;">Just a heads up that some of these values are categorial like "q" or "t" and it isn't clear what they mean. So don't worry if you don't understand what certain values mean. Just randomly pick.</h3>
</div>
""", unsafe_allow_html=True)
st.write("")
st.write("")
st.write("")
# Create a column layout so the text below is less wide
col1, col2, col3 = st.columns([1,6,1])
# User input for each feature
with col2:
# Geo Levels
st.markdown('<h4 style="color:white;">Geographic Region Level 1 (0-30)</h4>', unsafe_allow_html=True)
geo_level_1_id = st.slider('Geo Level 1 ID', 0, 30)
st.markdown('<hr>', unsafe_allow_html=True)
st.markdown('<h4 style="color:white;">Geographic Region Level 2 (0-1427)</h4>', unsafe_allow_html=True)
geo_level_2_id = st.slider('Geo Level 2 ID', 0, 1427)
st.markdown('<hr>', unsafe_allow_html=True)
st.markdown('<h4 style="color:white;">Geographic Region Level 3 (0-12567)</h4>', unsafe_allow_html=True)
geo_level_3_id = st.slider('Geo Level 3 ID', 0, 12567)
st.markdown('<hr>', unsafe_allow_html=True)
# Building Characteristics
st.markdown('<h4 style="color:white;">Number of floors before the earthquake</h4>', unsafe_allow_html=True)
count_floors_pre_eq = st.slider('Count Floors Pre-EQ', 1, 10)
st.markdown('<hr>', unsafe_allow_html=True)
st.markdown('<h4 style="color:white;">Age of the building in years</h4>', unsafe_allow_html=True)
age = st.slider('Age', 0, 100)
st.markdown('<hr>', unsafe_allow_html=True)
st.markdown('<h4 style="color:white;">Normalized area of the building footprint</h4>', unsafe_allow_html=True)
area_percentage = st.slider('Area Percentage', 1, 100)
st.markdown('<hr>', unsafe_allow_html=True)
st.markdown('<h4 style="color:white;">Normalized height of the building footprint</h4>', unsafe_allow_html=True)
height_percentage = st.slider('Height Percentage', 1, 100)
st.markdown('<hr>', unsafe_allow_html=True)
# Categorical Features
land_surface_condition = st.selectbox('Land Surface Condition', ['n', 'o', 't'])
foundation_type = st.selectbox('Foundation Type', ['h', 'i', 'r', 'u', 'w'])
roof_type = st.selectbox('Roof Type', ['n', 'q', 'x'])
ground_floor_type = st.selectbox('Ground Floor Type', ['f', 'm', 'v', 'x', 'z'])
other_floor_type = st.selectbox('Other Floor Type', ['j', 'q', 's', 'x'])
position = st.selectbox('Position', ['j', 'o', 's', 't'])
plan_configuration = st.selectbox('Plan Configuration', ['a', 'c', 'd', 'f', 'm', 'n', 'o', 'q', 's', 'u'])
# Binary Features
st.markdown('<h4 style="color:white;">Superstructure Material Types</h4>', unsafe_allow_html=True)
has_superstructure_adobe_mud = st.checkbox('Adobe/Mud')
has_superstructure_mud_mortar_stone = st.checkbox('Mud Mortar - Stone')
has_superstructure_stone_flag = st.checkbox('Stone')
has_superstructure_cement_mortar_stone = st.checkbox('Cement Mortar - Stone')
has_superstructure_mud_mortar_brick = st.checkbox('Mud Mortar - Brick')
has_superstructure_cement_mortar_brick = st.checkbox('Cement Mortar - Brick')
has_superstructure_timber = st.checkbox('Timber')
has_superstructure_bamboo = st.checkbox('Bamboo')
has_superstructure_rc_non_engineered = st.checkbox('RC Non-Engineered')
has_superstructure_rc_engineered = st.checkbox('RC Engineered')
has_superstructure_other = st.checkbox('Other')
# Family Count
st.markdown('<h4 style="color:white;">Number of families living in the building</h4>', unsafe_allow_html=True)
count_families = st.slider('Count Families', 1, 10)
st.markdown('<hr>', unsafe_allow_html=True)
# Secondary Use
st.markdown('<h4 style="color:white;">Secondary Uses of the Building</h4>', unsafe_allow_html=True)
has_secondary_use = st.checkbox('Has Secondary Use')
st.write("")
st.write("")
st.write("")
def predict(geo_level_1_id, geo_level_2_id, geo_level_3_id, count_floors_pre_eq, age, area_percentage, height_percentage,
land_surface_condition, foundation_type, roof_type, ground_floor_type, other_floor_type, position, plan_configuration,
has_superstructure_adobe_mud, has_superstructure_mud_mortar_stone, has_superstructure_stone_flag,
has_superstructure_cement_mortar_stone, has_superstructure_mud_mortar_brick, has_superstructure_cement_mortar_brick,
has_superstructure_timber, has_superstructure_bamboo, has_superstructure_rc_non_engineered, has_superstructure_rc_engineered,
has_superstructure_other, count_families, has_secondary_use):
# Convert inputs to model's expected format
geo_level_1_id = int(geo_level_1_id)
geo_level_2_id = int(geo_level_2_id)
geo_level_3_id = int(geo_level_3_id)
count_floors_pre_eq = int(count_floors_pre_eq)
age = int(age)
area_percentage = int(area_percentage)
height_percentage = int(height_percentage)
land_surface_condition = str(land_surface_condition)
foundation_type = str(foundation_type)
roof_type = str(roof_type)
ground_floor_type = str(ground_floor_type)
other_floor_type = str(other_floor_type)
position = str(position)
plan_configuration = str(plan_configuration)
has_superstructure_adobe_mud = int(has_superstructure_adobe_mud)
has_superstructure_mud_mortar_stone = int(has_superstructure_mud_mortar_stone)
has_superstructure_stone_flag = int(has_superstructure_stone_flag)
has_superstructure_cement_mortar_stone = int(has_superstructure_cement_mortar_stone)
has_superstructure_mud_mortar_brick = int(has_superstructure_mud_mortar_brick)
has_superstructure_cement_mortar_brick = int(has_superstructure_cement_mortar_brick)
has_superstructure_timber = int(has_superstructure_timber)
has_superstructure_bamboo = int(has_superstructure_bamboo)
has_superstructure_rc_non_engineered = int(has_superstructure_rc_non_engineered)
has_superstructure_rc_engineered = int(has_superstructure_rc_engineered)
has_superstructure_other = int(has_superstructure_other)
count_families = int(count_families)
has_secondary_use = int(has_secondary_use)
# Prepare the input data in the correct format
preprocessed_data = data_preprocessing(geo_level_1_id, geo_level_2_id, geo_level_3_id, count_floors_pre_eq, age, area_percentage, height_percentage,
land_surface_condition, foundation_type, roof_type, ground_floor_type, other_floor_type, position, plan_configuration,
has_superstructure_adobe_mud, has_superstructure_mud_mortar_stone, has_superstructure_stone_flag,
has_superstructure_cement_mortar_stone, has_superstructure_mud_mortar_brick, has_superstructure_cement_mortar_brick,
has_superstructure_timber, has_superstructure_bamboo, has_superstructure_rc_non_engineered, has_superstructure_rc_engineered,
has_superstructure_other, count_families, has_secondary_use)
model = joblib.load("saved_LGBM_model7.joblib")
# # Print dataframe of the data
#st.dataframe(preprocessed_data)
# I've never used .predict_proba but it will give us a 2D numpy array giving the probabilities for each class, like .predict() for Tensorflow does for multi-class classification.
predictions = model.predict_proba(preprocessed_data) #For my LGBM notebook, I just use .predict() but I'll use .predict_proba() here to give user more info
# Print table that tells the percentage for each class
st.markdown("<h4>This table shows the percentage certainty for each column. The '0' column <span style='color: green; font-weight: bold;'> is low damage </span>, the '1' column is <span style='color: gray; font-weight: bold;'>medium damage</span>, and the '2' column is <span style='color: red; font-weight: bold;'>high damage</span></h4>.", unsafe_allow_html=True)
predictions_percent = np.around(predictions*100, 2)
predictions_percent = pd.DataFrame(predictions_percent) # Convert to dataframe
predictions_percent = predictions_percent.astype(str)
predictions_percent.iloc[0, 0] = f"{predictions_percent.iloc[0, 0]}%"
predictions_percent.iloc[0, 1] = f"{predictions_percent.iloc[0, 1]}%"
predictions_percent.iloc[0, 2] = f"{predictions_percent.iloc[0, 2]}%"
st.write(predictions_percent)
# Get the class with the highest probability
predictions = predictions.argmax(axis=-1)
return predictions
# Make the buttons bigger
st.markdown("""
<style>
.stButton>button {
font-size: 10px;
padding: 20px 40px;
}
</style>
""", unsafe_allow_html=True)
if st.button('Predict'):
prediction = predict(geo_level_1_id, geo_level_2_id, geo_level_3_id, count_floors_pre_eq, age, area_percentage, height_percentage,
land_surface_condition, foundation_type, roof_type, ground_floor_type, other_floor_type, position, plan_configuration,
has_superstructure_adobe_mud, has_superstructure_mud_mortar_stone, has_superstructure_stone_flag,
has_superstructure_cement_mortar_stone, has_superstructure_mud_mortar_brick, has_superstructure_cement_mortar_brick,
has_superstructure_timber, has_superstructure_bamboo, has_superstructure_rc_non_engineered, has_superstructure_rc_engineered,
has_superstructure_other, count_families, has_secondary_use)
if prediction == 0:
st.markdown(f'## Prediction: Your building experienced <span style="color: green; font-weight: bold;">very low damage!</span> Very lucky! ', unsafe_allow_html=True)
elif prediction == 1:
st.markdown(f'## Prediction: Your building experienced <span style="color: gray; font-weight: bold;">a medium amout of damage.</span> I hope you are okay! ', unsafe_allow_html=True)
else:
st.markdown(f'## Prediction: Your building experienced <span style="color: red; font-weight: bold;">very HIGH damage!</span> Your building is most likely destroyed!', unsafe_allow_html=True)
# Load part 2
with open('LGBM_2.html', 'r') as file:
html_content_2 = file.read()
components.html(html_content_2, width = None, height=6000)