-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMedical_Premium_Predicted.R
66 lines (53 loc) · 1.8 KB
/
Medical_Premium_Predicted.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Name : Suci Aulya Putri
"Medical Premium Prediction"
#Import Data
library(readr)
medical_premium <- read_csv("Medicalpremium.csv")
medical_premium
# 1. Split data: train - test
library(caTools)
set.seed(123)
sample <- sample.split(medical_premium$PremiumPrice, SplitRatio = .80)
train <- subset(medical_premium, sample == TRUE)
test <- subset(medical_premium, sample == FALSE)
#modeling
PremiumPrice_lm <- lm(PremiumPrice ~ . , data=train)
summary(PremiumPrice_lm)
# diagnostic studies
# plotting the regression line
# residual plot
PremiumPrice_lm_res <- resid(PremiumPrice_lm)
# to check assumptions: linearity, constant variance, and independence
plot(predict(PremiumPrice_lm, train), PremiumPrice_lm_res,
ylab="Residuals", xlab="Fitted Values",
main="Premium Price")
abline(0, 0)
# draw using ggplot2
library(ggplot2)
PremiumPrice_lm_res_df <- data.frame(fitted_value=predict(PremiumPrice_lm, train),
residual=resid(PremiumPrice_lm))
ggplot(PremiumPrice_lm_res_df, aes(x=fitted_value, y=residual)) +
geom_point() +
geom_hline(yintercept=0, color='blue')+
labs(
title = 'Regressing Premium Price With ggplot2'
)
# QQ plot
# normality of residuals
PremiumPrice_lm_stdres = rstandard(PremiumPrice_lm)
qqnorm(PremiumPrice_lm_stdres,
ylab="Standardized Residuals",
xlab="Normal Scores",
main="Premium Price ")
qqline(PremiumPrice_lm_stdres)
# prediction
predicted <- predict(PremiumPrice_lm, test)
actual_pred <- data.frame(cbind(actual=test$PremiumPrice, predicted=predicted))
predicted
# model evaluation
# MAE
MAE <- mean(abs(actual_pred$actual-actual_pred$predicted))
MAE
# MAPE
MAPE <- mean(abs((actual_pred$predicted - actual_pred$actual))/actual_pred$actual)
MAPE