-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsites.R
105 lines (83 loc) · 4.67 KB
/
sites.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# The following manual steps have to be performed ONCE:
# 0. Make sure you open R by clicking on one of the three files. That way the working directory is set correctly.
# 1. Install the required packages.
# 2. Download the official list of sites (http://whc.unesco.org/en/syndication) xls file and save it in the
# same folder as the three R files (the working directory)
# 3. Run code in this file up to line 60 (i.e. run only Section 1.1)
# 4. Unquote and run code in section 1.2 to copy only the following columns in a new csv file called unesco_data.csv:
# unique_nb id_no name description justification date_inscribed secondary_dates danger_list longitude latitude category state region iso_code udnp_code transboundary
# (if you're from the US, replace write.csv2 with write.csv)
# 5. Run Section 2
# Afterwards, this file is only needed after updates to the official UNESCO list. Just run the entire file
# (without Section 1.1).
# nice to know: https://en.wikipedia.org/wiki/List_of_World_Heritage_Sites_by_year_of_inscription
library(dplyr); library(reshape2); library(readxl); library(stringr)
### ------ read in an clean up data (for 2023) -----
dat = data.frame(read_excel("orig/whc-sites-2023.xls"))
dat = transmute(dat, unique_nb=unique_number, id_no=id_no, name=name_en,
description=short_description_en, justification=justification_en,
date_inscribed=date_inscribed, secondary_dates=secondary_dates,
danger_list=danger_list, longitude=longitude, latitude=latitude,
category=category, state=states_name_en, region=region_en,
iso_code=iso_code, udnp_code=as.character(udnp_code),
transboundary=transboundary)
for (x in c("<p>","</p>","\n","<em>","</em>","<I>","</I>","<i>","</i>")){
dat$description = gsub(x,"",dat$description)
dat$justification = gsub(x,"",dat$justification)
dat$name = gsub(x,"",dat$name)
}; rm(x)
### --- Section 1.1: Manual correction of missing/ wrong location values (based on my own research) ----
dat[grep("Bauhaus and its Sites",dat$name),c("longitude","latitude")] = c(12.14,51.5)
dat[grep("Primeval Beech Forests",dat$name),c("longitude","latitude")] = c(8.58,51.8)
dat[grep("Frontiers of the Roman",dat$name),c("longitude","latitude")]=c(8.5,50.27)
dat[grep("Venetian Works of Defence",dat$name),c("longitude","latitude")]=c(18.771043,42.424884)
dat[grep("Prehistoric Pile Dwellings around the Alps",dat$name),c("longitude","latitude")]=c(9.22831,47.72634)
dat[grep("Melaka and George Town",dat$name),c("longitude","latitude")]=c(102.240143, 2.200844)
dat[grep(1613,dat$id_no),c("longitude","latitude")]=c(7.71924, 50.3340619)
x=NULL
for(i in 1:nrow(dat)){
if(length(which(round(dat[,"latitude"],3) == round(dat[i,"latitude"],3) &
round(dat[,"longitude"],3) == round(dat[i,"longitude"],3))) > 1){
x = rbind(x, dat[i,c("name","longitude","latitude")])
x = rbind(x, dat[which(round(dat[,"latitude"],3) == round(dat[i,"latitude"],3) &
round(dat[,"longitude"],3) == round(dat[i,"longitude"],3)),
c("name","longitude","latitude")])
}
};
dat[grep("Jongmyo",dat$name),c("longitude","latitude")] = c(126.9941, 37.5746)
dat[grep("Changdeokgung",dat$name),c("longitude","latitude")] = c(126.9927, 37.5823)
rm(i,x)
### ------- Section 1.2: Run only upon first usage (see readme)
# dat %>%
# mutate(user1 = "",
# user2 = "",
# color = "",
# color2= "") %>%
# write.csv2("unesco_data.csv", row.names=F, na="")
##### ------ Section 2. Merging with personalized information (visiting dates) ---
history = read.csv2("unesco_data.csv", stringsAsFactors=F) %>%
select(id_no, user1, user2)
unesco_data = full_join(dat, history, by="id_no")
if(nrow(unesco_data) == nrow(dat)){
if("name.x" %in% colnames(unesco_data)){
unesco_data = unesco_data %>%
rename(name = name.x) %>%
select(-name.y)
}
unesco_data = unesco_data %>%
mutate(color = ifelse(category=="Cultural", "yellow",
ifelse(category=="Natural", "green", "greenyellow")),
color2 = ifelse(is.na(danger_list), color,
ifelse(substr(danger_list,1,1) == "Y", "red", color)))
write.csv2(unesco_data, "unesco_data.csv", row.names = F, na="")
}else{
print("Attention: Differing number of rows")
}
# ### If the number of rows differs, the following code can be used for debugging
#
# err = unesco_data$id_no[which(!(unesco_data$id_no %in% dat$id_no))]
#
# unesco_data = unesco_data %>%
# filter(id_no != err)
#
# ### after running the code again, the row number should be equal