-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathregexeR.R
36 lines (32 loc) · 1.7 KB
/
regexeR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
## Regular Expressions are strings that match conditions. Here, we set up three:
## * =re.url= to verify properly formatted URLs
## * =re.email= to verify properly formatted email addresses
## * =re.currency= to detect dollars, pounds, and euros
library(rex)
valid_chars <- rex(except_some_of(".", "/", " ", "-"))
re.url <- rex(
start,
# Conditions for protocols at the beginning of the URL, if it exists. Swap commenting if protocol required.
maybe(list("http", maybe("s")) %or% "ftp", "://"), # HTTP/FTP prefix not required in a URL
# group(list("http", maybe("s")) %or% "ftp", "://"), # HTTP/FPT prefix required in a URL
# Conditions for user:pass authentication passed in the URL, if it exists
maybe(non_spaces, maybe(":", zero_or_more(non_space)), "@"),
# Conditions for host names
group(zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
# Conditions for domain names
zero_or_more(".", zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
# Conditions for top-level domains (e.g. .com, .co, .in)
group(".", valid_chars %>% at_least(2)),
# Conditions for server port numbers, if they exist
maybe(":", digit %>% between(2, 5)),
# Conditions for APIs/endpoint identifiers, if they exist
maybe("/", non_space %>% zero_or_more()),
end
)
re.email <- "\\<[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\>"
re.currency <- "^([£€$]([0-9]([0-9,])*)(.\\d{2})?|([0-9]([0-9,]))(.\\d{2})?([pcmMK]|bn| [mb]illion))$" # US/UK/EURO only
re.phone <- "[(]?([0-9]{3})[)]?[. -]([A-Z0-9]{3})[. -]([A-Z0-9]{4}).*','\\1-\\2-\\3"
regex.url <- re.url
regex.email <- re.email
regex.currency <- re.currency
regex.phone <- re.phone