Skip to content

Commit

Permalink
update GBIF
Browse files Browse the repository at this point in the history
  • Loading branch information
8Ginette8 committed Mar 19, 2024
1 parent 0259c9d commit f603097
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 159 deletions.
4 changes: 2 additions & 2 deletions R/get_gbif.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ get_gbif = function(sp_name = NULL,
######################################################


if (is.null(sp_name)) {
stop("'sp_name' must be provided to search for species records...")
if (is.null(sp_name)|is.na(sp_name)) {
stop("Given 'sp_name' is NA or NULL, a string must be provided...")
}


Expand Down
320 changes: 163 additions & 157 deletions R/get_status.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,183 +61,189 @@ get_status=function(sp_name = NULL,
conf_match = 80,
all = FALSE)
{
if (!search){
# Search input name via fuzzy match and direct search
bone.search = rgbif::name_backbone(sp_name,
rank = rank,
phylum = phylum,
class = class,
order = order,
family = family,
verbose = FALSE,
strict = FALSE)
} else {
# Search input name via strict match and refined search
bone.search = rgbif::name_backbone(sp_name,
verbose = TRUE,
strict = TRUE)

q.crit = !sapply(list(rank,phylum,class,order,family),is.null)

# Filter by given criterias if results
if (!bone.search$matchType[1]%in%"NONE"){
if (any(q.crit)){
id.crit = c("rank","phylum","class","order","family")[q.crit]
p.crit = unlist(list(rank,phylum,class,order,family)[q.crit])
n.test = id.crit%in%names(bone.search)
if (any(n.test)){
# selecting which
id.crit2 = id.crit[n.test]
p.crit2 = p.crit[n.test]
# Apply the rigth criterias
for (i in 1:length(id.crit2)){
bone.search = bone.search[c(bone.search[,id.crit2[i]])[[1]]%in%p.crit2[i],]
if (nrow(bone.search)==0){
bone.search = data.frame(matchType="NONE")
}
# Error message
if (is.na(sp_name)|is.null(sp_name)){
stop("Given 'sp_name' is NA or NULL, a string must be provided...")
}

# Search
if (!search){
# Search input name via fuzzy match and direct search
bone.search = rgbif::name_backbone(sp_name,
rank = rank,
phylum = phylum,
class = class,
order = order,
family = family,
verbose = FALSE,
strict = FALSE)
} else {
# Search input name via strict match and refined search
bone.search = rgbif::name_backbone(sp_name,
verbose = TRUE,
strict = TRUE)

q.crit = !sapply(list(rank,phylum,class,order,family),is.null)

# Filter by given criterias if results
if (!bone.search$matchType[1]%in%"NONE"){
if (any(q.crit)){
id.crit = c("rank","phylum","class","order","family")[q.crit]
p.crit = unlist(list(rank,phylum,class,order,family)[q.crit])
n.test = id.crit%in%names(bone.search)
if (any(n.test)){
# selecting which
id.crit2 = id.crit[n.test]
p.crit2 = p.crit[n.test]
# Apply the rigth criterias
for (i in 1:length(id.crit2)){
bone.search = bone.search[c(bone.search[,id.crit2[i]])[[1]]%in%p.crit2[i],]
if (nrow(bone.search)==0){
bone.search = data.frame(matchType="NONE")
}
}
if (!all(n.test)){
pp = paste(id.crit[!n.test],collapse=", ")
warning(paste0("'",pp,"' level(s) not available for this taxa in GBIF, could not be employed..."))
}
}
if (!all(n.test)){
pp = paste(id.crit[!n.test],collapse=", ")
warning(paste0("'",pp,"' level(s) not available for this taxa in GBIF, could not be employed..."))
}
}

# Normal procedure with or without criterias
if (nrow(bone.search)>1){
if (all(!bone.search$rank%in%c("SPECIES","SUBSPECIES","VARIETY"))){
}

# Normal procedure with or without criterias
if (nrow(bone.search)>1){
if (all(!bone.search$rank%in%c("SPECIES","SUBSPECIES","VARIETY"))){
cat("Not match found...","\n")
return(data.frame(NULL))

} else {
s.keep = bone.search[bone.search$rank%in%c("SPECIES","SUBSPECIES","VARIETY"),]
s.keep = s.keep[s.keep$status%in%c("ACCEPTED","SYNONYM"),]
s.keep = s.keep[s.keep$matchType%in%"EXACT",]
if (nrow(s.keep)==0){
cat("Not match found...","\n")
return(data.frame(NULL))

} else {
s.keep = bone.search[bone.search$rank%in%c("SPECIES","SUBSPECIES","VARIETY"),]
s.keep = s.keep[s.keep$status%in%c("ACCEPTED","SYNONYM"),]
s.keep = s.keep[s.keep$matchType%in%"EXACT",]
if (nrow(s.keep)==0){
cat("Not match found...","\n")
return(data.frame(NULL))

} else if (nrow(s.keep)>1){

# If we only find subpsecies and variety, we need to (default) prioritize
if (all(s.keep$rank%in%c("VARIETY","SUBSPECIES"))){
if ("var."%in%strsplit(sp_name," ")[[1]]){
bone.search = s.keep[s.keep$rank%in%"VARIETY",]
if (nrow(bone.search)==0){
bone.search = s.keep[s.keep$rank%in%"SUBSPECIES",]
}

} else {
} else if (nrow(s.keep)>1){

# If we only find subpsecies and variety, we need to (default) prioritize
if (all(s.keep$rank%in%c("VARIETY","SUBSPECIES"))){
if ("var."%in%strsplit(sp_name," ")[[1]]){
bone.search = s.keep[s.keep$rank%in%"VARIETY",]
if (nrow(bone.search)==0){
bone.search = s.keep[s.keep$rank%in%"SUBSPECIES",]
}

} else {
bone.search = s.keep[s.keep$rank%in%"SPECIES",]
}

coltax = c("familyKey","orderKey","classKey","phylumKey")%in%colnames(bone.search)
key.test = bone.search[,c("familyKey","orderKey","classKey","phylumKey")[coltax]]

if (any(bone.search$status%in%"ACCEPTED") & length(unique(key.test[,1]))==1){
bone.search = bone.search[bone.search$status%in%"ACCEPTED",]
bone.search = s.keep[s.keep$rank%in%"SUBSPECIES",]
}

} else {
bone.search = s.keep
bone.search = s.keep[s.keep$rank%in%"SPECIES",]
}
# If not the same species overall return NULL
s.usp = length(unique(bone.search$speciesKey))==1
if (!s.usp){
cat("No synonyms distinction could be made. Consider using phylum/class/order/family...","\n")
return(data.frame(NULL))

} else {
bone.search = bone.search[1,]
}
}
}
}

if (bone.search$matchType%in%"NONE") {
cat("No species name found...","\n")
return(data.frame(NULL))
}
coltax = c("familyKey","orderKey","classKey","phylumKey")%in%colnames(bone.search)
key.test = bone.search[,c("familyKey","orderKey","classKey","phylumKey")[coltax]]

if (bone.search$confidence[1]<conf_match) {
cat("Confidence match not high enough...","\n")
return(data.frame(NULL))
}

# Extract key of accepted name
if (bone.search$status%in%"SYNONYM") {
accep.key = bone.search$acceptedUsageKey
} else {
accep.key = bone.search$usageKey
}
if (any(bone.search$status%in%"ACCEPTED") & length(unique(key.test[,1]))==1){
bone.search = bone.search[bone.search$status%in%"ACCEPTED",]
}

# Extract accepted name and save it with its key in the prepared output
accep.name = rgbif::name_usage(accep.key,data="name")$data
syn.syn = rgbif::name_usage(accep.key,data="synonyms")$data
main.dat = rgbif::name_usage(accep.key,data="all")$data
iucn = try(rgbif::name_usage(accep.key,data="iucnRedListCategory")$data,silent=TRUE)
if (methods::is(iucn,"try-error")) {iucn = "NOT_FOUND"} else {iucn = iucn$category}

# Specific columns
c.key = suppressWarnings(c(accep.key,syn.syn$key))
c.sc = suppressWarnings(c(accep.name$scientificName,syn.syn$scientificName))
c.can = suppressWarnings(c(accep.name$canonicalName,syn.syn$canonicalName))
c.status = c("ACCEPTED",rep("SYNONYM",length(suppressWarnings(syn.syn$scientificName))))

# If missing codes, then we continue the search to find possible name correspondence
if (all) {

# Combine everything and search for related names (i.e. other string version)
all.key = suppressWarnings(c(accep.key,syn.syn$key,main.dat$key))
all.version = lapply(all.key,function(x){
out = suppressWarnings(rgbif::name_usage(x,data="related")$data$scientificName)
if (is.null(out)){
return(NULL)
} else {
return(data.frame(key=x,scientificName=out))
bone.search = s.keep
}
})

# Extract all names
a.n = suppressWarnings(accep.name[,c("key","scientificName")])
a.n$key = accep.key
c.n = suppressWarnings(main.dat[,c("key","scientificName")])
r.n = suppressWarnings(unique(do.call("rbind",all.version)))

# Conditions for synonymy
s.n = try(suppressWarnings(syn.syn[,c("key","scientificName")]),silent=TRUE)
if (class(s.n)[1]%in%"try-error") {s.n = data.frame(key=NULL,scientificName=NULL)}
all.names = rbind(a.n,s.n,c.n,r.n)

# Specific columns
c.key = all.names$key
c.sc = suppressWarnings(all.names$scientificName)
c.can = suppressWarnings(all.names$canonicalName)
c.status = c("ACCEPTED",
rep("SYNONYM",nrow(s.n)),
rep("CHILDREN",nrow(c.n)),
rep("RELATED",nrow(r.n)))
}
# If not the same species overall return NULL
s.usp = length(unique(bone.search$speciesKey))==1
if (!s.usp){
cat("No synonyms distinction could be made. Consider using phylum/class/order/family...","\n")
return(data.frame(NULL))

# Which is null in main.dat for Genus, Family, Order, Phyllum?
exist.not = c("genus","order","family","phylum")%in%names(main.dat)
main.out = data.frame(Genus=NA,Family=NA,Order=NA,Phylum=NA)
main.out[exist.not] = main.dat[,c("genus","order","family","phylum")[exist.not]]

# Extract accepted names and synonyms
out = data.frame(canonicalName = c.can,
rank = bone.search$rank,
gbif_key = c.key,
scientificName = c.sc,
gbif_status = c.status,
main.out,
IUCN_status = iucn)

return(out[!duplicated(out[,2]),])
} else {
bone.search = bone.search[1,]
}
}
}
}

if (bone.search$matchType%in%"NONE") {
cat("No species name found...","\n")
return(data.frame(NULL))
}

if (bone.search$confidence[1]<conf_match) {
cat("Confidence match not high enough...","\n")
return(data.frame(NULL))
}

# Extract key of accepted name
if (bone.search$status%in%"SYNONYM") {
accep.key = bone.search$acceptedUsageKey
} else {
accep.key = bone.search$usageKey
}

# Extract accepted name and save it with its key in the prepared output
accep.name = rgbif::name_usage(accep.key,data="name")$data
syn.syn = rgbif::name_usage(accep.key,data="synonyms")$data
main.dat = rgbif::name_usage(accep.key,data="all")$data
iucn = try(rgbif::name_usage(accep.key,data="iucnRedListCategory")$data,silent=TRUE)
if (methods::is(iucn,"try-error")) {iucn = "NOT_FOUND"} else {iucn = iucn$category}

# Specific columns
c.key = suppressWarnings(c(accep.key,syn.syn$key))
c.sc = suppressWarnings(c(accep.name$scientificName,syn.syn$scientificName))
c.can = suppressWarnings(c(accep.name$canonicalName,syn.syn$canonicalName))
c.status = c("ACCEPTED",rep("SYNONYM",length(suppressWarnings(syn.syn$scientificName))))

# If missing codes, then we continue the search to find possible name correspondence
if (all) {

# Combine everything and search for related names (i.e. other string version)
all.key = suppressWarnings(c(accep.key,syn.syn$key,main.dat$key))
all.version = lapply(all.key,function(x){
out = suppressWarnings(rgbif::name_usage(x,data="related")$data$scientificName)
if (is.null(out)){
return(NULL)
} else {
return(data.frame(key=x,scientificName=out))
}
})

# Extract all names
a.n = suppressWarnings(accep.name[,c("key","scientificName")])
a.n$key = accep.key
c.n = suppressWarnings(main.dat[,c("key","scientificName")])
r.n = suppressWarnings(unique(do.call("rbind",all.version)))

# Conditions for synonymy
s.n = try(suppressWarnings(syn.syn[,c("key","scientificName")]),silent=TRUE)
if (class(s.n)[1]%in%"try-error") {s.n = data.frame(key=NULL,scientificName=NULL)}
all.names = rbind(a.n,s.n,c.n,r.n)

# Specific columns
c.key = all.names$key
c.sc = suppressWarnings(all.names$scientificName)
c.can = suppressWarnings(all.names$canonicalName)
c.status = c("ACCEPTED",
rep("SYNONYM",nrow(s.n)),
rep("CHILDREN",nrow(c.n)),
rep("RELATED",nrow(r.n)))
}

# Which is null in main.dat for Genus, Family, Order, Phyllum?
exist.not = c("genus","order","family","phylum")%in%names(main.dat)
main.out = data.frame(Genus=NA,Family=NA,Order=NA,Phylum=NA)
main.out[exist.not] = main.dat[,c("genus","order","family","phylum")[exist.not]]

# Extract accepted names and synonyms
out = data.frame(canonicalName = c.can,
rank = bone.search$rank,
gbif_key = c.key,
scientificName = c.sc,
gbif_status = c.status,
main.out,
IUCN_status = iucn)

return(out[!duplicated(out[,2]),])
}

0 comments on commit f603097

Please sign in to comment.