-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfcc_predict_health_costs_with_regression.py
60 lines (49 loc) · 2.29 KB
/
fcc_predict_health_costs_with_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding: utf-8 -*-
"""fcc_predict_health_costs_with_regression.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18Cmc4q6Zuyiuqvh-tQBU5FZBWLGfJWb4
"""
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
# get data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')
dataset.head()
one_hot = LabelBinarizer()
dataset['region'] = one_hot.fit_transform(dataset.region.values)
dataset['sex'] = dataset.sex.replace({'male': 1, 'female':0})
dataset['smoker'] = dataset.smoker.replace({'yes':1, 'no': 0})
X = dataset.iloc[ : , 0:6].values
y = dataset.expenses.values
y = y.astype('float32')
X = X.astype('float32')
train_dataset, test_dataset, train_labels, test_labels = train_test_split(X, y, test_size = 0.2)
tf.convert_to_tensor(train_dataset)
tf.convert_to_tensor(test_dataset)
tf.convert_to_tensor(train_labels)
tf.convert_to_tensor(test_labels)
model = Sequential()
init = tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=1)
model.add(Dense(64, activation = 'linear', kernel_initializer = init, input_shape = (6, )))
model.add(BatchNormalization())
model.add(Dense(32, activation = 'relu', kernel_initializer = init))
model.add(Dense(32, activation = 'linear'))
model.add(Dense(16, activation = 'linear', kernel_initializer = 'RandomNormal', input_shape = (6, )))
model.add(BatchNormalization())
model.add(Dense(16, activation = 'relu', kernel_initializer = 'RandomNormal'))
model.add(Dense(1, activation = 'linear'))
model.compile(optimizer = 'adam', loss = 'mse', metrics = 'mae')
es = EarlyStopping(monitor = 'mae', min_delta = 0, patience = 50, verbose = 0, mode = 'auto', baseline = None, restore_best_weights = True)
model.fit(train_dataset, train_labels, batch_size = 32, epochs = 400, validation_data = (test_dataset, test_labels), verbose = 1, callbacks = [es])
error = model.evaluate(test_dataset, test_labels, verbose = 1)
model.predict(test_dataset)