From 09149eed8b8ebb7d667b9ef8bd8c2cc074b964b0 Mon Sep 17 00:00:00 2001 From: ehwenk Date: Tue, 23 Jan 2024 11:32:38 +1100 Subject: [PATCH] more changes * ensure 1-sentence function names are similar to longer descriptions * change new function descriptions file from qmd to rmd * more work formatting tables for vignettes * add missing info into vignettes; delete an unused vignette --- R/align_taxa.R | 4 ++-- R/create_species_state_origin_matrix.R | 4 ++-- R/create_taxonomic_update_lookup.R | 2 +- R/native_anywhere_in_australia.R | 2 +- R/standardise_names.R | 3 +-- R/state_diversity_counts.R | 2 +- R/strip_names.R | 4 ++-- R/update_taxonomy.R | 2 +- _pkgdown.yml | 6 ++--- .../APCalign_outputs_documentation.csv | 4 ++-- inst/extdata/match_taxa_documentation.csv | 8 +++---- .../extdata/update_taxonomy_documentation.csv | 15 +++++------- man/align_taxa.Rd | 4 ++-- man/create_species_state_origin_matrix.Rd | 4 ++-- man/create_taxonomic_update_lookup.Rd | 2 +- man/native_anywhere_in_australia.Rd | 2 +- man/standardise_names.Rd | 3 +-- man/state_diversity_counts.Rd | 2 +- man/strip_names.Rd | 2 +- man/strip_names_2.Rd | 2 +- man/update_taxonomy.Rd | 2 +- vignettes/articles/data-providers.Rmd | 12 +++++----- ...{function_notes.qmd => function_notes.Rmd} | 15 ++++++++---- vignettes/updating-taxon-names.Rmd | 23 +++++++++++++++---- 24 files changed, 72 insertions(+), 57 deletions(-) rename vignettes/articles/{function_notes.qmd => function_notes.Rmd} (95%) diff --git a/R/align_taxa.R b/R/align_taxa.R index 7036c319..158a373c 100644 --- a/R/align_taxa.R +++ b/R/align_taxa.R @@ -1,6 +1,6 @@ -#' Find taxonomic alignments for a list of names to a version of the Australian Plant Census (APC) through standardizing formatting and checking for spelling issues +#' For a list of Australian plant names, find taxonomic or scientific name alignments to the APC or APNI through standardizing formatting and fixing spelling errors #' -#' Finds taxonomic alignments in the APC or APNI. +#' This function finds taxonomic alignments in APC or scientific name alignments in APNI. #' It uses the internal function `match_taxa` to attempt to match input strings to taxon names in the APC/APNI. #' It sequentially searches for matches against more than 20 different string patterns, #' prioritising exact matches (to accepted names as well as synonyms, orthographic variants) over fuzzy matches. diff --git a/R/create_species_state_origin_matrix.R b/R/create_species_state_origin_matrix.R index a802e1e8..59e6f564 100644 --- a/R/create_species_state_origin_matrix.R +++ b/R/create_species_state_origin_matrix.R @@ -1,7 +1,7 @@ -#' Process geographic data and return state level species origin and diversity counts +#' Use the taxon distribution data from the APC to determine state level native and introduced origin status #' #' This function processes the geographic data available in the APC and -#' returns state level diversity for native, introduced and more complicated species origins. +#' returns state level native, introduced and more complicated origins status for all taxa. #' #' #' @family diversity methods diff --git a/R/create_taxonomic_update_lookup.R b/R/create_taxonomic_update_lookup.R index bddef021..1753781a 100644 --- a/R/create_taxonomic_update_lookup.R +++ b/R/create_taxonomic_update_lookup.R @@ -1,4 +1,4 @@ -#' Create a lookup table to help fix the taxonomy for a list of Australian plant species +#' Create a lookup table with the best-possible scientific name match for a list of Australian plant names #' #' This function takes a list of Australian plant names that need to be reconciled with current taxonomy and #' generates a lookup table of the best-possible scientific name match for each input name. diff --git a/R/native_anywhere_in_australia.R b/R/native_anywhere_in_australia.R index cb82f2d6..05767c8b 100644 --- a/R/native_anywhere_in_australia.R +++ b/R/native_anywhere_in_australia.R @@ -1,4 +1,4 @@ -#' Check if a vector of species are native anywhere in Australia +#' For a vector of taxon names in to the APC, check if the species are native anywhere in Australia #' #' This function checks if the given species is native anywhere in Australia according to the APC. #' Note that this will not detect within-Australia introductions, e.g. if a species is from Western Australia and is invasive on the east coast. diff --git a/R/standardise_names.R b/R/standardise_names.R index fe36ce9a..e1bfb4e8 100644 --- a/R/standardise_names.R +++ b/R/standardise_names.R @@ -1,7 +1,6 @@ -#' Standardise Taxon Names +#' Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature. #' -#' Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature. #' The function takes a character vector of taxon names as input and #' returns a character vector of taxon names using standardised taxonomic syntax as output. #' In particular it standardises taxon rank abbreviations and qualifiers (subsp., var., f.), as people use many variants of these terms. diff --git a/R/state_diversity_counts.R b/R/state_diversity_counts.R index a9d7b434..36c169b7 100644 --- a/R/state_diversity_counts.R +++ b/R/state_diversity_counts.R @@ -1,4 +1,4 @@ -#' Calculate Australian plant state-level diversity for native, introduced, and more complicated species origins +#' For Australian states and territories, use data from the APC to calculate state-level diversity for native, introduced, and more complicated species origins #' #' This function calculates state-level diversity for native, introduced, and more complicated species origins #' based on the geographic data available in the APC. diff --git a/R/strip_names.R b/R/strip_names.R index c175aad5..488f23f8 100644 --- a/R/strip_names.R +++ b/R/strip_names.R @@ -1,4 +1,4 @@ -#' Strip taxonomic names of subtaxa designations and special characters +#' Strip taxonomic names of taxon rank abbreviations and qualifiers and special characters #' #' Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), #' special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector @@ -34,7 +34,7 @@ strip_names <- function(taxon_names) { tolower() } -#' Strip taxonomic names of subtaxa designations, filled words and special characters +#' Strip taxonomic names of taxon rank abbreviations and qualifiers, filler words and special characters #' #' Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), #' additional filler words and characters (" x " for hybrid taxa, "sp."), diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R index 7c9afe4d..f49c0f25 100644 --- a/R/update_taxonomy.R +++ b/R/update_taxonomy.R @@ -1,4 +1,4 @@ -#' Use APC and APNI to update taxonomy, replacing synonyms to current taxa where relevant +#' For a list of taxon names aligned to the APC, update the name to an accepted taxon concept per the APC and add scientific name and taxon concept metadata to names aligned to either the APC or APNI. #' #' This function uses the APC to update the taxonomy of names aligned to a taxon concept listed in the APC to the currently accepted name for the taxon concept. #' The aligned_data data frame that is input must contain 5 columns, diff --git a/_pkgdown.yml b/_pkgdown.yml index c82eaa02..2833f317 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,9 +19,9 @@ navbar: - text: "Data providers" - text: APC and APNI href: articles/data-providers.html - - text: "Data caching" - - text: How is APC/APNI stored in APCalign? - href: 'articles/caching.html' + - text: "Functions" + - text: Details on the 10 exported functions, including examples of usage + href: function_notes.html - text: ------- - text: "Taxon matching" - text: Our fuzzy matching algorithm diff --git a/inst/extdata/APCalign_outputs_documentation.csv b/inst/extdata/APCalign_outputs_documentation.csv index 6ced2f53..12a48d13 100644 --- a/inst/extdata/APCalign_outputs_documentation.csv +++ b/inst/extdata/APCalign_outputs_documentation.csv @@ -2,7 +2,7 @@ variable,returned by,description original_name,default,The original plant name. aligned_name,default,The input plant name that has been aligned to a taxon name in the APC or APNI by the align_taxa function. accepted_name,default,The APC-accepted plant name when available. -suggested_name,default,The suggested plant name to use. Identical to the accepted_name when an accepted_name exists; otherwise the the suggested_name is the aligned_name. +suggested_name,default,The suggested plant name to use. Identical to the accepted_name when an accepted_name exists; otherwise the suggested_name is the aligned_name or the aligned name with an outdated genus updated. genus,default,The genus of the accepted (or suggested) name; only APC-accepted genus names are filled in. family,full,The family of the accepted (or suggested) name; only APC-accepted family names are filled in. taxon_rank,default,The taxonomic rank of the suggested (and accepted) name. @@ -18,4 +18,4 @@ taxon_ID_genus,full,An identifier for the genus; only filled in if an APC-accept scientific_name_ID,full,An identifier for the nomenclatural (not taxonomic) details of a scientific name; available for both APC and APNI names. taxonomic_status_aligned,full,The taxonomic status of the aligned name before any taxonomic updates have been applied. row_number,full,The row number of a specific original_name in the input. -number_of_collapsed_taxa,default,The number of possible taxon names that have been collapsed when taxonomic_splits == "collapse_to_higher_taxon". +number_of_collapsed_taxa,default,"The number of possible taxon names that have been collapsed when taxonomic_splits == ""collapse_to_higher_taxon""." diff --git a/inst/extdata/match_taxa_documentation.csv b/inst/extdata/match_taxa_documentation.csv index c3e48408..0da47f8e 100644 --- a/inst/extdata/match_taxa_documentation.csv +++ b/inst/extdata/match_taxa_documentation.csv @@ -7,12 +7,12 @@ match_03a,"Detect ` -- `, `--` (intergrade taxa) and align to genus","first word match_03b,"Detect ` -- `, `--` (intergrade taxa) and align to genus","first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, match_03c,"Detect ` -- `, `--` (intergrade taxa) and align to genus","first word (""genus"")",fuzzy,other APC taxon concepts,genus, match_03d,"Detect ` -- `, `--` (intergrade taxa) and align to genus","first word (""genus"")",fuzzy,APNI,genus, -match_03e,"Detect ` -- `, `--` (intergrade taxa), but fail to align to genus",NA,no match,NA,genus, +match_03e,"Detect ` -- `, `--` (intergrade taxa), but fail to align to genus",NA,no match,NA,NA, match_04a,Detect ` \` (indecision between taxa) and align to genus.,"first word (""genus"")",exact,"APC accepted taxon concepts, other APC taxon concepts, APNI",genus,Next find strings that indicate a name reflects a data collector's indecision about which of two (or more) taxa is the appropriate taxon. These names can only be aligned to a genus. match_04b,Detect ` \` (indecision between taxa) and align to genus.,"first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, match_04c,Detect ` \` (indecision between taxa) and align to genus.,"first word (""genus"")",fuzzy,other APC taxon concepts,genus, match_04d,Detect ` \` (indecision between taxa) and align to genus.,"first word (""genus"")",fuzzy,APNI,genus, -match_04e,"Detect ` \` (indecision between taxa), but fail to align to genus",NA,no match,NA,genus, +match_04e,"Detect ` \` (indecision between taxa), but fail to align to genus",NA,no match,NA,NA, match_05a,"Detect scientific names, including authorship",original_name,exact,APC accepted taxon concepts,species/infraspecific,"Check if strings are full scientific names, including authorship." match_05b,"Detect scientific names, including authorship",original_name,exact,other APC taxon concepts,species/infraspecific, match_06a,"Detect canonical names, lacking authorship",cleaned_name,exact,APC accepted taxon concepts,species/infraspecific,"Check if strings are taxon names, lacking authorship." @@ -24,14 +24,14 @@ match_09a,"Detect `aff`, `affinis` (affinity to) and align to genus","first word match_09b,"Detect `aff`, `affinis` (affinity to) and align to genus","first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, match_09c,"Detect `aff`, `affinis` (affinity to) and align to genus","first word (""genus"")",fuzzy,other APC taxon concepts,genus, match_09d,"Detect `aff`, `affinis` (affinity to) and align to genus","first word (""genus"")",fuzzy,APNI,genus, -match_09e,"Detect `aff`, `affinis` (affinity to), but fail to align to genus",NA,no match,NA,genus, +match_09e,"Detect `aff`, `affinis` (affinity to), but fail to align to genus",NA,no match,NA,NA, match_10a,"Detect canonical names, lacking authorship",stripped_name,imprecise fuzzy,APC accepted taxon concepts,species/infraspecific,"Further checks if strings are taxon names, lacking authorship, now with imprecise fuzzy matching" match_10b,"Detect canonical names, lacking authorship",stripped_name,imprecise fuzzy,other APC taxon concepts,species/infraspecific, match_11a,Detect ` x ` (hybrid taxon) and align to genus,"first word (""genus"")",exact,"APC accepted taxon concepts, other APC taxon concepts, APNI",genus,"Find strings that indicate a name that is a hybrid between two taxa. Such names, unless documented in APC (i.e. matches 6, 7 above) can only be aligned to genus." match_11b,Detect ` x ` (hybrid taxon) and align to genus,"first word (""genus"")",fuzzy,APC accepted taxon concepts,genus, match_11c,Detect ` x ` (hybrid taxon) and align to genus,"first word (""genus"")",fuzzy,other APC taxon concepts,genus, match_11d,Detect ` x ` (hybrid taxon) and align to genus,"first word (""genus"")",fuzzy,APNI,genus, -match_11e,"Detect ` x ` (hybrid taxon), but fail to align to genus",NA,no match,NA,genus, +match_11e,"Detect ` x ` (hybrid taxon), but fail to align to genus",NA,no match,NA,NA, match_12a,"Detect canonical names, by checking first three words in string",trinomial (from stripped_name_2),exact,APC accepted taxon concepts,species/infraspecific,"Check if the first three words in the name string match with a taxon name, allowing notes to be discarded. Also useful for aligning phrase names." match_12b,"Detect canonical names, by checking first three words in string",trinomial (from stripped_name_2),exact,other APC taxon concepts,species/infraspecific, match_13a,"Detect canonical names, by checking first three words in string",trinomial (from stripped_name_2),fuzzy,APC accepted taxon concepts,species/infraspecific, diff --git a/inst/extdata/update_taxonomy_documentation.csv b/inst/extdata/update_taxonomy_documentation.csv index 58f2e306..6ad2bf13 100644 --- a/inst/extdata/update_taxonomy_documentation.csv +++ b/inst/extdata/update_taxonomy_documentation.csv @@ -1,14 +1,11 @@ -function name,categories of aligned names processed:,,,,columns filled in,, -,taxonomic dataset,taxon rank,updates to aligned name,format of `suggested_name`,accepted name (& taxon_ID),genus (& taxon_ID_genus),scientific_name_ID +,categories of aligned names processed:,,,,columns filled in,, +function name,taxonomic dataset,taxon rank,updates to aligned name,format of `suggested_name`,accepted name (& taxon_ID),genus (& taxon_ID_genus),scientific_name_ID update_taxonomy_APC_genus,APC,genus,to APC accepted genus,`genus sp. [notes]` *,no,yes,no update_taxonomy_APNI_genus,APNI,genus,none,`genus sp. [notes]`,no,no,no update_taxonomy_APC_family,APC,family,none,`family sp. [notes]`,no,no,no update_taxonomy_APC_species_and_infraspecific_taxa,APC,species & infraspecific,,APC accepted species** name,yes,yes,yes -"taxonomic_splits = ""most_likely_species""",,,to APC accepted taxon concept,most likely APC accepted species** name [alternative possible names],yes,yes,yes -"taxonomic_splits = ""return_all""",,,to APC accepted taxon concept,all possible APC accepted species** name (extra rows added),yes,yes,yes -"taxonomic_splits = ""collapse_to_higher_taxon""",,,collapsed to APC accepted genus,`genus sp.` [collapsed names],no,yes,no +" -- taxonomic_splits = ""most_likely_species""",,,to APC accepted taxon concept,most likely APC accepted species** name [alternative possible names],yes,yes,yes +" -- taxonomic_splits = ""return_all""",,,to APC accepted taxon concept,all possible APC accepted species** name (extra rows added),yes,yes,yes +" -- taxonomic_splits = ""collapse_to_higher_taxon""",,,collapsed to APC accepted genus,`genus sp.` [collapsed names],no,yes,no update_taxonomy_APNI_species_and_infraspecific_taxa,APNI,species & infraspecific,none to species name; genus to APC accepted genus if possible,APNI listed species** name*,no,sometimes,yes -not required,(not aligned),(not aligned),none,original name,no,no,no -,,,,,,, -* genus updated to APC accepted genus if possible,,,,,,, -** species or infraspecific taxon name,,,,,,, +(names not aligned),(not aligned),(not aligned),none,original name,no,no,no diff --git a/man/align_taxa.Rd b/man/align_taxa.Rd index d4b9d7a6..2598d66c 100644 --- a/man/align_taxa.Rd +++ b/man/align_taxa.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/align_taxa.R \name{align_taxa} \alias{align_taxa} -\title{Find taxonomic alignments for a list of names to a version of the Australian Plant Census (APC) through standardizing formatting and checking for spelling issues} +\title{For a list of Australian plant names, find taxonomic or scientific name alignments to the APC or APNI through standardizing formatting and fixing spelling errors} \usage{ align_taxa( original_name, @@ -70,7 +70,7 @@ A tibble with columns that include original_name, aligned_name, taxonomic_datase } } \description{ -Finds taxonomic alignments in the APC or APNI. +This function finds taxonomic alignments in APC or scientific name alignments in APNI. It uses the internal function \code{match_taxa} to attempt to match input strings to taxon names in the APC/APNI. It sequentially searches for matches against more than 20 different string patterns, prioritising exact matches (to accepted names as well as synonyms, orthographic variants) over fuzzy matches. diff --git a/man/create_species_state_origin_matrix.Rd b/man/create_species_state_origin_matrix.Rd index 26a3f24c..ed019678 100644 --- a/man/create_species_state_origin_matrix.Rd +++ b/man/create_species_state_origin_matrix.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_species_state_origin_matrix.R \name{create_species_state_origin_matrix} \alias{create_species_state_origin_matrix} -\title{Process geographic data and return state level species origin and diversity counts} +\title{Use the taxon distribution data from the APC to determine state level native and introduced origin status} \usage{ create_species_state_origin_matrix(resources = load_taxonomic_resources()) } @@ -14,7 +14,7 @@ A tibble with columns representing each state and rows representing each species } \description{ This function processes the geographic data available in the APC and -returns state level diversity for native, introduced and more complicated species origins. +returns state level native, introduced and more complicated origins status for all taxa. } \examples{ \donttest{create_species_state_origin_matrix()} diff --git a/man/create_taxonomic_update_lookup.Rd b/man/create_taxonomic_update_lookup.Rd index 3d91f6a9..b72cd64e 100644 --- a/man/create_taxonomic_update_lookup.Rd +++ b/man/create_taxonomic_update_lookup.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_taxonomic_update_lookup.R \name{create_taxonomic_update_lookup} \alias{create_taxonomic_update_lookup} -\title{Create a lookup table to help fix the taxonomy for a list of Australian plant species} +\title{Create a lookup table with the best-possible scientific name match for a list of Australian plant names} \usage{ create_taxonomic_update_lookup( taxa, diff --git a/man/native_anywhere_in_australia.Rd b/man/native_anywhere_in_australia.Rd index a62df607..2e9e6cd4 100644 --- a/man/native_anywhere_in_australia.Rd +++ b/man/native_anywhere_in_australia.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/native_anywhere_in_australia.R \name{native_anywhere_in_australia} \alias{native_anywhere_in_australia} -\title{Check if a vector of species are native anywhere in Australia} +\title{For a vector of taxon names in to the APC, check if the species are native anywhere in Australia} \usage{ native_anywhere_in_australia(species, resources = load_taxonomic_resources()) } diff --git a/man/standardise_names.Rd b/man/standardise_names.Rd index ab0e494a..fc691262 100644 --- a/man/standardise_names.Rd +++ b/man/standardise_names.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/standardise_names.R \name{standardise_names} \alias{standardise_names} -\title{Standardise Taxon Names} +\title{Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature.} \usage{ standardise_names(taxon_names) } @@ -13,7 +13,6 @@ standardise_names(taxon_names) A character vector of standardised taxon names. } \description{ -Standardises taxon names by performing a series of text substitutions to remove common inconsistencies in taxonomic nomenclature. The function takes a character vector of taxon names as input and returns a character vector of taxon names using standardised taxonomic syntax as output. In particular it standardises taxon rank abbreviations and qualifiers (subsp., var., f.), as people use many variants of these terms. diff --git a/man/state_diversity_counts.Rd b/man/state_diversity_counts.Rd index f410d883..9f2e3f68 100644 --- a/man/state_diversity_counts.Rd +++ b/man/state_diversity_counts.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/state_diversity_counts.R \name{state_diversity_counts} \alias{state_diversity_counts} -\title{Calculate Australian plant state-level diversity for native, introduced, and more complicated species origins} +\title{For Australian states and territories, use data from the APC to calculate state-level diversity for native, introduced, and more complicated species origins} \usage{ state_diversity_counts(state, resources = load_taxonomic_resources()) } diff --git a/man/strip_names.Rd b/man/strip_names.Rd index 484bb7cf..459288c4 100644 --- a/man/strip_names.Rd +++ b/man/strip_names.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/strip_names.R \name{strip_names} \alias{strip_names} -\title{Strip taxonomic names of subtaxa designations and special characters} +\title{Strip taxonomic names of taxon rank abbreviations and qualifiers and special characters} \usage{ strip_names(taxon_names) } diff --git a/man/strip_names_2.Rd b/man/strip_names_2.Rd index b4edb4fc..2812d9bd 100644 --- a/man/strip_names_2.Rd +++ b/man/strip_names_2.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/strip_names.R \name{strip_names_2} \alias{strip_names_2} -\title{Strip taxonomic names of subtaxa designations, filled words and special characters} +\title{Strip taxonomic names of taxon rank abbreviations and qualifiers, filler words and special characters} \usage{ strip_names_2(taxon_names) } diff --git a/man/update_taxonomy.Rd b/man/update_taxonomy.Rd index 3cbb4929..487d23a4 100644 --- a/man/update_taxonomy.Rd +++ b/man/update_taxonomy.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/update_taxonomy.R \name{update_taxonomy} \alias{update_taxonomy} -\title{Use APC and APNI to update taxonomy, replacing synonyms to current taxa where relevant} +\title{For a list of taxon names aligned to the APC, update the name to an accepted taxon concept per the APC and add scientific name and taxon concept metadata to names aligned to either the APC or APNI.} \usage{ update_taxonomy( aligned_data, diff --git a/vignettes/articles/data-providers.Rmd b/vignettes/articles/data-providers.Rmd index 6802898a..22fda6ca 100644 --- a/vignettes/articles/data-providers.Rmd +++ b/vignettes/articles/data-providers.Rmd @@ -18,23 +18,23 @@ library(dplyr) ## Australian Plant Census (APC) -The [Australian Plant Census (APC)](https://biodiversity.org.au/nsl/services/search/taxonomy) is national database of accepted taxonomic names for [Australian vascular plants](https://bie.ala.org.au/species/NZOR-6-33408). The APC includes information on synonyms, and misapplications of them, as well as established status (native/introduced) and distribution across states and territories. +The [Australian Plant Census (APC)](https://biodiversity.org.au/nsl/services/search/taxonomy) is the national taxonomic database of accepted names for [Australian vascular plants](https://bie.ala.org.au/species/NZOR-6-33408). The APC includes information on synonyms, and misapplications of them, as well as established status (native/introduced) and distribution across states and territories. -'APCalign' will first use the APC to align your taxonomic names to ones that exist in the database. +'APCalign' will first attempt to align your plant names to scientific names that exist in the APC. -## Australian Plant Index (APNI) +## Australian Plant Names Index (APNI) -The [Australian Plant Index (APNI)](https://www.anbg.gov.au/apni/) is a database containing names of Australian plants and their use in scientific literature. It is primarily used by the botanical community for standardising synonyms. Importantly, APNI does not provide recommendation of taxonomy or nomenclature, this is where the APC comes in. +The [Australian Plant Names Index (APNI)](https://www.anbg.gov.au/apni/) is a database containing all names used for Australian plants in scientific literature. It is primarily used by the botanical community for standardising synonyms. Importantly, APNI does not provide recommendations of taxonomy or nomenclature; only the APC indicates which taxonomy is considered accepted. 'APCalign' uses APNI when an alignment cannot be found in the APC. ## Data standards and meta-data -Data from both APNI and APC are formatted according to the [Darwin Core standard](https://dwc.tdwg.org/terms/), a widely used format for many databases. +Data from both APNI and APC are formatted according to the [Darwin Core standard](https://dwc.tdwg.org/terms/), a widely used data standard for biodiversity data. You can find the meta-data for the APC and APNI below: - [Meta-data for APC output](https://ibis-cloud.atlassian.net/wiki/spaces/NP/pages/1154383943/NSL+Taxon+export+format) -- [Meta-data of the APNI output](https://ibis-cloud.atlassian.net/wiki/spaces/NP/pages/1154383919/NSL+Name+export+format) +- [Meta-data for APNI output](https://ibis-cloud.atlassian.net/wiki/spaces/NP/pages/1154383919/NSL+Name+export+format) For more details on APNI and APC, we recommend taking a read of [their extensive documentation](https://ibis-cloud.atlassian.net/wiki/spaces/NP/pages/1380483087/NSL+API+Documentation#1.-Introduction). diff --git a/vignettes/articles/function_notes.qmd b/vignettes/articles/function_notes.Rmd similarity index 95% rename from vignettes/articles/function_notes.qmd rename to vignettes/articles/function_notes.Rmd index bcc27692..d95fc85c 100644 --- a/vignettes/articles/function_notes.qmd +++ b/vignettes/articles/function_notes.Rmd @@ -1,3 +1,10 @@ +--- +title: "Function notes" +author: "Elizabeth Wenk" +date: "2024-01-22" +output: html_document +--- + # APCalign functions APCalign exports [10 functions](https://traitecoevo.github.io/APCalign/reference/index.html) to facilitate the alignment of submitted plant names to scientific names on the APC and APNI lists. They are listed in order of likelihood of use. @@ -60,9 +67,8 @@ updated_taxa <- **notes**\ - If you will be running the function `APCalign::create_taxonomic_update_lookup` many times, it is best to load the taxonomic resources separately using `resources <- load_taxonomic_resources()`, then add the argument `resources = resources`\ - The name `Banksia cerrata` does not align as the fuzzy matching algorithm does not allow the first letter of the genus and species epithet to change.\ -- It is recommended that you begin with `imprecise_fuzzy_matches = FALSE` (the default), as quite a few of the less precise fuzzy matches are likely to be erroneous. This argument should be turned on only if you plan to check all alignments manually.\ - The argument `taxonomic_splits` allows you to choose the outcome for updating the names of taxa with ambiguous taxonomic histories; this applies to scientific names that were once attached to a more broadly circumscribed taxon concept, that was then split into several more narrowly circumscribed taxon concepts, one of which retains the original name. There are three options: `most_likely_species` returns the name that is retained, with alternative names documented in square brackets; `return_all` adds additional rows to the output, one for each possible taxon concept; `collapse_to_higher_taxon` returns the genus with possible names in square brackets.\ -- The argument `identifier` allows you to add a fix text string to all genus- and family- level names, such as `identifier = "Royal NP"` would return \`Acacia sp. \[Royal NP\]. +- The argument `identifier` allows you to add a fix text string to all genus- and family- level names, such as `identifier = "Royal NP"` would return \`Acacia sp. [Royal NP]`. ### align_taxa @@ -131,7 +137,7 @@ resources = load_taxonomic_resources() ### create_species_state_origin_matrix -**description**: This function processes the geographic data available in the APC and returns state level diversity for native, introduced and more complicated species origins. +**description**: This function processes the geographic data available in the APC and returns state level native, introduced and more complicated origins status for all taxa. **arguments**: @@ -211,7 +217,7 @@ taxon_names #input vector of taxon names ### strip_names_2 -**description**: Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), additional filler words and characters (" x " \[hybrid taxa\], "sp."), special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector of names is also converted to lowercase. +**description**: Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"), additional filler words and characters (" x " [hybrid taxa], "sp."), special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector of names is also converted to lowercase. **arguments**: @@ -220,3 +226,4 @@ taxon_names #input vector of taxon names ``` **output**: A character vector of stripped taxonomic names, with subtaxa designations, special characters, additional filler words and extra whitespace removed, and all letters converted to lowercase. + diff --git a/vignettes/updating-taxon-names.Rmd b/vignettes/updating-taxon-names.Rmd index 39f52ea9..e9194941 100644 --- a/vignettes/updating-taxon-names.Rmd +++ b/vignettes/updating-taxon-names.Rmd @@ -60,11 +60,11 @@ APCalign_outputs_documentation <- ) ``` -# Aligning taxa with APC and APNI +# Aligning taxon names with taxon concepts/names in APC and APNI -XXXX +The following table indicates the rules for each of the 51 separate algorithms sequentially applied to attempt to align each submitted name to a taxon concept in APC or scientific names in APNI. -The following +Note, if the table is truncated on your screen, use horizontal scroll to view the entire table. ```{r, results='show'} match_taxa_documentation %>% @@ -74,14 +74,27 @@ match_taxa_documentation %>% # Updating taxonomy -XXX +The following table indicates the separate functions used to: + +- update aligned names to accepted names in the APC +- add best-practice suggested names to all submitted names +- add identifiers to taxon concepts (in the APC) or scientific names (in the APC or APNI) + +Different functions are used depending on the taxon rank of the aligned name and the taxonomic dataset to which the name was aligned (APC vs APNI). + ```{r, results='show'} update_taxonomy_documentation %>% - my_kable_styling() + my_kable_styling() %>% + kableExtra::add_header_above(c(" " = 1, "categories of aligned names processed" = 4, "columns filled in" = 3)) ``` +-* genus updated to APC accepted genus if possible; ** species or infraspecific taxon name + + # Outputs of APCalign +The following columns are output by the core function `create_taxonomic_update_lookup` and the two component functions `align_taxa` and `update_taxonomy`. + ```{r, results='show'} APCalign_outputs_documentation %>% my_kable_styling()