-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProject
111 lines (81 loc) · 3.48 KB
/
Project
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Load the readr, ggplot2, and dplyr packages
library(readr)
library(ggplot2)
library(dplyr)
# Read datasets/confirmed_cases_worldwide.csv into confirmed_cases_worldwide
confirmed_cases_worldwide <- read_csv("datasets/confirmed_cases_worldwide.csv")
# See the result
confirmed_cases_worldwide
# Draw a line plot of cumulative cases vs. date
# Label the y-axis
ggplot(confirmed_cases_worldwide,aes(x=date,y=cum_cases)) +
geom_line() +
labs(y="Cumulative confirmed cases")
# Read in datasets/confirmed_cases_china_vs_world.csv
confirmed_cases_china_vs_world <- read_csv("datasets/confirmed_cases_china_vs_world.csv")
# See the result
confirmed_cases_china_vs_world
# Draw a line plot of cumulative cases vs. date, colored by is_china
# Define aesthetics within the line geom
plt_cum_confirmed_cases_china_vs_world <- ggplot(confirmed_cases_china_vs_world) +
geom_line(aes(x=date,y=cum_cases,color=is_china)) +
ylab("Cumulative confirmed cases")
# See the plot
plt_cum_confirmed_cases_china_vs_world
who_events <- tribble(
~ date, ~ event,
"2020-01-30", "Global health\nemergency declared",
"2020-03-11", "Pandemic\ndeclared",
"2020-02-13", "China reporting\nchange"
) %>%
mutate(date = as.Date(date))
who_events
# Using who_events, add vertical dashed lines with an xintercept at date
# and text at date, labeled by event, and at 100000 on the y-axis
plt_cum_confirmed_cases_china_vs_world +
geom_vline(aes(xintercept=date),data=who_events,linetype="dashed") +
geom_text(aes(x=date,label=event),data=who_events,y=1e5)
# Filter for China, from Feb 15
china_after_feb15 <- confirmed_cases_china_vs_world %>%
filter(is_china == "China", date >= "2020-02-15")
china_after_feb15
# Using china_after_feb15, draw a line plot cum_cases vs. date
# Add a smooth trend line using linear regression, no error bars
ggplot(china_after_feb15,aes(x=date,y=cum_cases)) +
geom_line() +
geom_smooth(method = "lm", se = FALSE) +
ylab("Cumulative confirmed cases")
# Filter confirmed_cases_china_vs_world for not China
not_china <- confirmed_cases_china_vs_world%>%
filter(is_china =="Not China")
not_china
# Using not_china, draw a line plot cum_cases vs. date
# Add a smooth trend line using linear regression, no error bars
plt_not_china_trend_lin <- ggplot(not_china,aes(x=date,y=cum_cases)) +
geom_line() +
geom_smooth(method = "lm", se = FALSE) +
ylab("Cumulative confirmed cases")
# See the result
plt_not_china_trend_lin
# Modify the plot to use a logarithmic scale on the y-axis
plt_not_china_trend_lin +
scale_y_log10()
# Run this to get the data for each country
confirmed_cases_by_country <- read_csv("datasets/confirmed_cases_by_country.csv")
glimpse(confirmed_cases_by_country)
# Group by country, summarize to calculate total cases, find the top 7
top_countries_by_total_cases <- confirmed_cases_by_country %>%
group_by(country) %>%
summarize(total_cases=max(cum_cases)) %>%
top_n(7, total_cases)
# See the result
top_countries_by_total_cases
# See the result
top_countries_by_total_cases
dataset from datasets/confirmed_cases_top7_outside_china.csv
confirmed_cases_top7_outside_china <- read_csv("datasets/confirmed_cases_top7_outside_china.csv")
# Glimpse at the contents of confirmed_cases_top7_outside_china
glimpse(confirmed_cases_top7_outside_china)
# Using confirmed_cases_top7_outside_china, draw a line plot of
# cum_cases vs. date, colored by country
ggplot(confirmed_cases_top7_outside_china,aes(x=date,y=cum_cases,color=country))+ geom_line()+ylab("Cumulative confirmed cases")