forked from freeCodeCamp/boilerplate-medical-data-visualizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmedical_data_visualizer.py
70 lines (52 loc) · 2.52 KB
/
medical_data_visualizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Import data
df = pd.read_csv("medical_examination.csv")
#Calculate BMI
BMI = df["weight"]/((df["height"])/100)**2
# Add 'overweight' column
df["overweight"] = (BMI > 25).astype(int)
# Normalize data by making 0 always good and 1 always bad.
#If the value of 'cholesterol' or 'gluc' is 1, make the value 0. If the value is more than 1, make the value 1.
# normalize cholesterol and gluc
df["cholesterol"] = (df["cholesterol"] > 1).astype(int)
df["gluc"] = (df["gluc"] > 1).astype(int)
# Draw Categorical Plot
def draw_cat_plot():
# Create DataFrame for cat plot using `pd.melt` using just the values from 'cholesterol', 'gluc', 'smoke', 'alco', 'active', and 'overweight'.
df_cat = pd.melt(df, id_vars="cardio", value_vars=["cholesterol", "gluc", "smoke", "alco", "active", "overweight"])
# Group and reformat the data to split it by 'cardio'. Show the counts of each feature. You will have to rename one of the columns for the catplot to work correctly.
df_cat = df_cat.groupby(["cardio", "variable"])["value"].value_counts().reset_index(name="total")
# Draw the catplot with 'sns.catplot()'
catplot = sns.catplot(data=df_cat, x="variable", y="total", col="cardio", kind="bar", hue="value")
# Get the figure for the output
fig = catplot.fig
# Do not modify the next two lines
fig.savefig('catplot.png')
return fig
# Draw Heat Map
def draw_heat_map():
# Clean the data
#we are keeping the following: ap_hi >= ap_lo AND q0.025<=height<=0.975 AND q0.025<=weight<=0.975
df_heat = df[(df["ap_lo"] <= df["ap_hi"])
& (df["height"] >= df["height"].quantile(0.025))
& (df["height"] <= df["height"].quantile(0.975))
& (df["weight"] >= df["weight"].quantile(0.025))
& (df["weight"] <= df["weight"].quantile(0.975))]
# Calculate the correlation matrix
corr = df_heat.corr()
# Generate a mask for the upper triangle
# This removes the upper triangle across the correlation matrix (square is cut along the diagonal)
mask = np.triu(corr)
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(10,10))
# Draw the heatmap with 'sns.heatmap()'
# We are plotting "corr",
# annot places the values of the correlation on each matrix element
# fmt formats the annot to have 1 decimal place
sns.heatmap(corr, annot=True, mask=mask, fmt=".1f")
# Do not modify the next two lines
fig.savefig('heatmap.png')
return fig