diff --git a/include/ada/implementation.h b/include/ada/implementation.h index 117b0ff6b..7d6893b35 100644 --- a/include/ada/implementation.h +++ b/include/ada/implementation.h @@ -11,10 +11,7 @@ #include "ada/parser.h" #include "ada/common_defs.h" -#include "ada/encoding_type.h" #include "ada/url.h" -#include "ada/state.h" -#include "ada/url_aggregator.h" #include "ada/url_pattern_regex.h" namespace ada { @@ -61,11 +58,13 @@ bool can_parse(std::string_view input, * use ada::url_pattern_regex::std_regex_provider * @return url_pattern instance */ -ada_warn_unused tl::expected parse_url_pattern( - std::variant input, - const std::string_view* base_url = nullptr, - const url_pattern_options* options = nullptr, - std::optional regex_provider = std::nullopt); +template + requires url_pattern_regex::derived_from_provider +ada_warn_unused tl::expected, errors> +parse_url_pattern(std::variant input, + const std::string_view* base_url = nullptr, + const url_pattern_options* options = nullptr, + std::optional provider = std::nullopt); /** * Computes a href string from a file path. The function assumes diff --git a/include/ada/parser.h b/include/ada/parser.h index 60bf69122..733636932 100644 --- a/include/ada/parser.h +++ b/include/ada/parser.h @@ -17,6 +17,8 @@ namespace ada { struct url_aggregator; struct url; +template + requires url_pattern_regex::derived_from_provider class url_pattern; struct url_pattern_options; struct url_pattern_init; @@ -52,10 +54,13 @@ extern template url_aggregator parse_url_impl( extern template url parse_url_impl(std::string_view user_input, const url* base_url); -tl::expected parse_url_pattern_impl( - std::variant input, - const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); +template + requires url_pattern_regex::derived_from_provider +tl::expected, errors> +parse_url_pattern_impl(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + regex_provider&& provider); } // namespace ada::parser diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index 66f7991c3..8a768e8af 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -222,9 +222,13 @@ struct url_aggregator : url_base { friend url_aggregator parser::parse_url_impl( std::string_view, const url_aggregator *); // url_pattern methods - friend tl::expected parse_url_pattern_impl( - std::variant input, - const std::string_view *base_url, const url_pattern_options *options); + template + requires url_pattern_regex::derived_from_provider + friend tl::expected, errors> + parse_url_pattern_impl(std::variant input, + const std::string_view *base_url, + const url_pattern_options *options); std::string buffer{}; url_components components{}; diff --git a/include/ada/url_pattern-inl.h b/include/ada/url_pattern-inl.h index 2ad9e0af4..23fb7dbf3 100644 --- a/include/ada/url_pattern-inl.h +++ b/include/ada/url_pattern-inl.h @@ -24,7 +24,10 @@ inline bool url_pattern_component_result::operator==( return input == other.input && groups == other.groups; } -inline std::string url_pattern_component::to_string() const { +template + requires url_pattern_regex::derived_from_provider +std::string url_pattern_component::to_string() + const { #ifdef ADA_HAS_FORMAT return std::format(R"({{"pattern": "{}", "has_regexp_groups": {}}})", pattern, has_regexp_groups ? "true" : "false" //, @@ -34,9 +37,11 @@ inline std::string url_pattern_component::to_string() const { #endif } -inline url_pattern_component_result -url_pattern_component::create_component_match_result( - std::string_view input, const std::smatch& exec_result) { +template + requires url_pattern_regex::derived_from_provider +url_pattern_component_result url_pattern_component:: + create_component_match_result(std::string_view input, + const std::smatch& exec_result) { // Let result be a new URLPatternComponentResult. // Set result["input"] to input. // Let groups be a record. @@ -70,7 +75,9 @@ url_pattern_component::create_component_match_result( return result; } -inline std::string url_pattern::to_string() const { +template + requires url_pattern_regex::derived_from_provider +std::string url_pattern::to_string() const { #ifdef ADA_HAS_FORMAT return std::format( R"({{"protocol_component": "{}", "username_component": {}, "password_component": {}, "hostname_component": {}, "port_component": {}, "pathname_component": {}, "search_component": {}, "hash_component": {}, "ignore_case": {}}})", @@ -84,42 +91,70 @@ inline std::string url_pattern::to_string() const { #endif } -inline std::string_view url_pattern::get_protocol() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_protocol() const + ada_lifetime_bound { // Return this's associated URL pattern's protocol component's pattern string. return protocol_component.pattern; } -inline std::string_view url_pattern::get_username() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_username() const + ada_lifetime_bound { // Return this's associated URL pattern's username component's pattern string. return username_component.pattern; } -inline std::string_view url_pattern::get_password() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_password() const + ada_lifetime_bound { // Return this's associated URL pattern's password component's pattern string. return password_component.pattern; } -inline std::string_view url_pattern::get_hostname() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_hostname() const + ada_lifetime_bound { // Return this's associated URL pattern's hostname component's pattern string. return hostname_component.pattern; } -inline std::string_view url_pattern::get_port() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_port() const + ada_lifetime_bound { // Return this's associated URL pattern's port component's pattern string. return port_component.pattern; } -inline std::string_view url_pattern::get_pathname() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_pathname() const + ada_lifetime_bound { // Return this's associated URL pattern's pathname component's pattern string. return pathname_component.pattern; } -inline std::string_view url_pattern::get_search() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_search() const + ada_lifetime_bound { // Return this's associated URL pattern's search component's pattern string. return search_component.pattern; } -inline std::string_view url_pattern::get_hash() const ada_lifetime_bound { +template + requires url_pattern_regex::derived_from_provider +std::string_view url_pattern::get_hash() const + ada_lifetime_bound { // Return this's associated URL pattern's hash component's pattern string. return hash_component.pattern; } - -inline bool url_pattern::ignore_case() const { return ignore_case_; } - -inline bool url_pattern::has_regexp_groups() const { +template + requires url_pattern_regex::derived_from_provider +bool url_pattern::ignore_case() const { + return ignore_case_; +} +template + requires url_pattern_regex::derived_from_provider +bool url_pattern::has_regexp_groups() const { // If this's associated URL pattern's has regexp groups, then return true. return protocol_component.has_regexp_groups || username_component.has_regexp_groups || diff --git a/include/ada/url_pattern.h b/include/ada/url_pattern.h index 3a5d0fcb4..105e393a5 100644 --- a/include/ada/url_pattern.h +++ b/include/ada/url_pattern.h @@ -19,11 +19,11 @@ namespace ada { namespace parser { template + typename url_pattern_options, typename regex_provider> tl::expected parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); + regex_provider&& provider); } // Important: C++20 allows us to use concept rather than `using` or `typedef @@ -207,19 +207,19 @@ struct url_pattern_component_result { #endif // ADA_TESTING }; +template + requires url_pattern_regex::derived_from_provider class url_pattern_component { public: url_pattern_component() = default; // This function explicitly takes a std::string because it is moved. // To avoid unnecessary copy, move each value while calling the constructor. - url_pattern_component(std::string&& new_pattern, std::regex&& new_regexp, - std::regex_constants::syntax_option_type new_flags, + url_pattern_component(std::string&& new_pattern, regex_type&& new_regexp, std::vector&& new_group_name_list, bool new_has_regexp_groups) : regexp(std::move(new_regexp)), pattern(std::move(new_pattern)), - flags(new_flags), group_name_list(new_group_name_list), has_regexp_groups(new_has_regexp_groups) {} @@ -227,7 +227,8 @@ class url_pattern_component { template static tl::expected compile( std::string_view input, F& encoding_callback, - url_pattern_compile_component_options& options); + url_pattern_compile_component_options& options, + const regex_provider& provider); // @see https://urlpattern.spec.whatwg.org/#create-a-component-match-result url_pattern_component_result create_component_match_result( @@ -235,9 +236,8 @@ class url_pattern_component { std::string to_string() const; - std::regex regexp{}; + regex_type regexp{}; std::string pattern{}; - std::regex_constants::syntax_option_type flags = std::regex::ECMAScript; std::vector group_name_list{}; bool has_regexp_groups = false; }; @@ -270,10 +270,13 @@ struct url_pattern_options { // defined in https://wicg.github.io/urlpattern. // More information about the URL Pattern syntax can be found at // https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API +template + requires url_pattern_regex::derived_from_provider class url_pattern { public: - explicit url_pattern(url_pattern_regex::provider&& regex_provider) - : regex_provider_(std::move(regex_provider)) {} + explicit url_pattern( + url_pattern_regex::provider&& new_regex_provider) + : regex_provider_(std::move(new_regex_provider)) {} /** * @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-exec @@ -294,48 +297,48 @@ class url_pattern { const url_pattern_input& input, std::string_view* base_url_string); // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol - std::string_view get_protocol() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-username - std::string_view get_username() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_username() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-password - std::string_view get_password() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_password() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname - std::string_view get_hostname() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-port - std::string_view get_port() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_port() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname - std::string_view get_pathname() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_pathname() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-search - std::string_view get_search() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_search() const ada_lifetime_bound; // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash - std::string_view get_hash() const ada_lifetime_bound; + [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound; // If ignoreCase is true, the JavaScript regular expression created for each // pattern must use the `vi` flag. Otherwise, they must use the `v` flag. - bool ignore_case() const; + [[nodiscard]] bool ignore_case() const; // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups - bool has_regexp_groups() const; + [[nodiscard]] bool has_regexp_groups() const; - std::string to_string() const; + [[nodiscard]] std::string to_string() const; - url_pattern_component protocol_component{}; - url_pattern_component username_component{}; - url_pattern_component password_component{}; - url_pattern_component hostname_component{}; - url_pattern_component port_component{}; - url_pattern_component pathname_component{}; - url_pattern_component search_component{}; - url_pattern_component hash_component{}; + url_pattern_component protocol_component{}; + url_pattern_component username_component{}; + url_pattern_component password_component{}; + url_pattern_component hostname_component{}; + url_pattern_component port_component{}; + url_pattern_component pathname_component{}; + url_pattern_component search_component{}; + url_pattern_component hash_component{}; bool ignore_case_ = false; - url_pattern_regex::provider regex_provider_; + regex_provider regex_provider_; template + typename url_pattern_options, typename regex_provider_> friend tl::expected parser::parse_url_pattern_impl( std::variant input, const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider); + regex_provider_&& provider); }; } // namespace ada diff --git a/include/ada/url_pattern_helpers-inl.h b/include/ada/url_pattern_helpers-inl.h index 37311bb2b..6a5cfe479 100644 --- a/include/ada/url_pattern_helpers-inl.h +++ b/include/ada/url_pattern_helpers-inl.h @@ -39,20 +39,26 @@ inline std::string to_string(token_type type) { } } -inline void constructor_string_parser::rewind() { +template + requires url_pattern_regex::derived_from_provider +void constructor_string_parser::rewind() { // Set parser’s token index to parser’s component start. token_index = component_start; // Set parser’s token increment to 0. token_increment = 0; } -inline bool constructor_string_parser::is_hash_prefix() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_hash_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index and "#". return is_non_special_pattern_char(token_index, "#"); } -inline bool constructor_string_parser::is_search_prefix() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_search_prefix() { // If result of running is a non-special pattern char given parser, parser’s // token index and "?" is true, then return true. if (is_non_special_pattern_char(token_index, "?")) { @@ -84,8 +90,10 @@ inline bool constructor_string_parser::is_search_prefix() { previous_token->type == token_type::ASTERISK); } -inline bool constructor_string_parser::is_non_special_pattern_char( - size_t index, std::string_view value) { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser:: + is_non_special_pattern_char(size_t index, std::string_view value) { // Let token be the result of running get a safe token given parser and index. auto token = get_safe_token(index); ADA_ASSERT_TRUE(token); @@ -105,7 +113,11 @@ inline bool constructor_string_parser::is_non_special_pattern_char( token->type == token_type::INVALID_CHAR; } -inline const Token* constructor_string_parser::get_safe_token(size_t index) { +template + requires url_pattern_regex::derived_from_provider +const Token* +constructor_string_parser::get_safe_token( + size_t index) { // If index is less than parser’s token list's size, then return parser’s // token list[index]. if (index < token_list.size()) [[likely]] { @@ -123,19 +135,28 @@ inline const Token* constructor_string_parser::get_safe_token(size_t index) { return &token_list.back(); } -inline bool constructor_string_parser::is_group_open() const { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_group_open() + const { // If parser’s token list[parser’s token index]'s type is "open", then return // true. return token_list[token_index].type == token_type::OPEN; } -inline bool constructor_string_parser::is_group_close() const { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_group_close() + const { // If parser’s token list[parser’s token index]'s type is "close", then return // true. return token_list[token_index].type == token_type::CLOSE; } -inline bool constructor_string_parser::next_is_authority_slashes() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::next_is_authority_slashes() { // If the result of running is a non-special pattern char given parser, // parser’s token index + 1, and "/" is false, then return false. if (!is_non_special_pattern_char(token_index + 1, "/")) { @@ -149,14 +170,19 @@ inline bool constructor_string_parser::next_is_authority_slashes() { return true; } -inline bool constructor_string_parser::is_protocol_suffix() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_protocol_suffix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); } -inline void constructor_string_parser::change_state(State new_state, - size_t skip) { +template + requires url_pattern_regex::derived_from_provider +void constructor_string_parser::change_state( + State new_state, size_t skip) { // If parser’s state is not "init", not "authority", and not "done", then set // parser’s result[parser’s state] to the result of running make a component // string given parser. @@ -254,7 +280,10 @@ inline void constructor_string_parser::change_state(State new_state, token_increment = 0; } -inline std::string constructor_string_parser::make_component_string() { +template + requires url_pattern_regex::derived_from_provider +std::string +constructor_string_parser::make_component_string() { // Assert: parser’s token index is less than parser’s token list's size. ADA_ASSERT_TRUE(token_index < token_list.size()); @@ -273,37 +302,52 @@ inline std::string constructor_string_parser::make_component_string() { end_index - component_start_input_index); } -inline bool constructor_string_parser::is_an_identity_terminator() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_an_identity_terminator() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "@". return is_non_special_pattern_char(token_index, "@"); } -inline bool constructor_string_parser::is_pathname_start() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_pathname_start() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "/". return is_non_special_pattern_char(token_index, "/"); } -inline bool constructor_string_parser::is_password_prefix() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_password_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); } -inline bool constructor_string_parser::is_an_ipv6_open() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_an_ipv6_open() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "[". return is_non_special_pattern_char(token_index, "["); } -inline bool constructor_string_parser::is_an_ipv6_close() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_an_ipv6_close() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and "]". return is_non_special_pattern_char(token_index, "]"); } -inline bool constructor_string_parser::is_port_prefix() { +template + requires url_pattern_regex::derived_from_provider +bool constructor_string_parser::is_port_prefix() { // Return the result of running is a non-special pattern char given parser, // parser’s token index, and ":". return is_non_special_pattern_char(token_index, ":"); diff --git a/include/ada/url_pattern_helpers.h b/include/ada/url_pattern_helpers.h index 4d9c29f65..6865d2332 100644 --- a/include/ada/url_pattern_helpers.h +++ b/include/ada/url_pattern_helpers.h @@ -139,6 +139,8 @@ class Tokenizer { }; // @see https://urlpattern.spec.whatwg.org/#constructor-string-parser +template + requires url_pattern_regex::derived_from_provider struct constructor_string_parser { explicit constructor_string_parser(std::string_view new_input, std::vector&& new_token_list) @@ -321,8 +323,10 @@ bool is_ipv6_address(std::string_view input) noexcept; // @see // https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme +template + requires url_pattern_regex::derived_from_provider bool protocol_component_matches_special_scheme( - ada::url_pattern_component& input); + ada::url_pattern_component& input); // @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string std::string convert_modifier_to_string(url_pattern_part_modifier modifier); diff --git a/include/ada/url_pattern_regex.h b/include/ada/url_pattern_regex.h index 725efd8fc..550c27372 100644 --- a/include/ada/url_pattern_regex.h +++ b/include/ada/url_pattern_regex.h @@ -5,23 +5,40 @@ #ifndef ADA_URL_PATTERN_REGEX_H #define ADA_URL_PATTERN_REGEX_H +#include #include namespace ada::url_pattern_regex { +template class provider { - struct type {}; - - std::optional create_regex_instance(std::string_view pattern, - bool ignore_case); - - std::optional> regex_search(std::string_view input, std::string_view pattern); + public: + using regex_type = T; + + virtual ~provider() = default; + virtual std::optional create_instance(std::string_view pattern, + bool ignore_case) = 0; + virtual std::optional> regex_search( + std::string_view input, const regex_type& pattern) = 0; + virtual bool regex_match(std::string_view input, + const regex_type& pattern) = 0; }; -class std_regex_provider : public provider { - +template +concept derived_from_provider = + std::is_base_of_v, derived_class>; + +class std_regex_provider : public provider { + public: + std_regex_provider() = default; + using regex_type = std::regex; + std::optional create_instance(std::string_view pattern, + bool ignore_case) override; + std::optional> regex_search( + std::string_view input, const regex_type& pattern) override; + bool regex_match(std::string_view input, const regex_type& pattern) override; }; } // namespace ada::url_pattern_regex -#endif // ADA_URL_PATTERN_REGEX_H +#endif // ADA_URL_PATTERN_REGEX_H diff --git a/src/implementation.cpp b/src/implementation.cpp index 14476f5c9..876bc1ba0 100644 --- a/src/implementation.cpp +++ b/src/implementation.cpp @@ -79,13 +79,16 @@ ada_warn_unused std::string to_string(ada::encoding_type type) { } } -ada_warn_unused tl::expected parse_url_pattern( - std::variant input, - const std::string_view* base_url, const url_pattern_options* options, - std::optional regex_provider) { - return parser::parse_url_pattern_impl( +template + requires url_pattern_regex::derived_from_provider +ada_warn_unused tl::expected, errors> +parse_url_pattern(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + std::optional provider) { + return parser::parse_url_pattern_impl( std::move(input), base_url, options, - regex_provider.value_or(url_pattern_regex::std_regex_provider())); + provider.value_or(url_pattern_regex::std_regex_provider())); } } // namespace ada diff --git a/src/parser.cpp b/src/parser.cpp index 31a283019..e699b2536 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -898,18 +898,21 @@ result_type parse_url_impl(std::string_view user_input, return url; } -tl::expected parse_url_pattern_impl( - std::variant input, - const std::string_view* base_url, const url_pattern_options* options, - url_pattern_regex::provider&& regex_provider) { +template + requires url_pattern_regex::derived_from_provider +tl::expected, errors> +parse_url_pattern_impl(std::variant input, + const std::string_view* base_url, + const url_pattern_options* options, + regex_provider&& provider) { // Let init be null. url_pattern_init init; // If input is a scalar value string then: if (std::holds_alternative(input)) { // Set init to the result of running parse a constructor string given input. - auto parse_result = url_pattern_helpers::constructor_string_parser::parse( - std::get(input)); + auto parse_result = url_pattern_helpers::constructor_string_parser< + regex_provider, regex_type>::parse(std::get(input)); if (!parse_result) { ada_log("constructor_string_parser::parse failed"); return tl::unexpected(parse_result.error()); @@ -984,15 +987,16 @@ tl::expected parse_url_pattern_impl( } // Let urlPattern be a new URL pattern. - auto url_pattern_ = url_pattern(std::move(regex_provider)); + auto url_pattern_ = url_pattern(std::move(provider)); // Set urlPattern’s protocol component to the result of compiling a component // given processedInit["protocol"], canonicalize a protocol, and default // options. - auto protocol_component = url_pattern_component::compile( - processed_init->protocol.value(), - url_pattern_helpers::canonicalize_protocol, - url_pattern_compile_component_options::DEFAULT); + auto protocol_component = + url_pattern_component::compile( + processed_init->protocol.value(), + url_pattern_helpers::canonicalize_protocol, + url_pattern_compile_component_options::DEFAULT); if (!protocol_component) { ada_log("url_pattern_component::compile failed for protocol ", processed_init->protocol.value()); @@ -1003,10 +1007,11 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s username component to the result of compiling a component // given processedInit["username"], canonicalize a username, and default // options. - auto username_component = url_pattern_component::compile( - processed_init->username.value(), - url_pattern_helpers::canonicalize_username, - url_pattern_compile_component_options::DEFAULT); + auto username_component = + url_pattern_component::compile( + processed_init->username.value(), + url_pattern_helpers::canonicalize_username, + url_pattern_compile_component_options::DEFAULT); if (!username_component) { ada_log("url_pattern_component::compile failed for username ", processed_init->username.value()); @@ -1017,10 +1022,11 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s password component to the result of compiling a component // given processedInit["password"], canonicalize a password, and default // options. - auto password_component = url_pattern_component::compile( - processed_init->password.value(), - url_pattern_helpers::canonicalize_password, - url_pattern_compile_component_options::DEFAULT); + auto password_component = + url_pattern_component::compile( + processed_init->password.value(), + url_pattern_helpers::canonicalize_password, + url_pattern_compile_component_options::DEFAULT); if (!password_component) { ada_log("url_pattern_component::compile failed for password ", processed_init->password.value()); @@ -1039,10 +1045,11 @@ tl::expected parse_url_pattern_impl( // then set urlPattern’s hostname component to the result of compiling a // component given processedInit["hostname"], canonicalize an IPv6 hostname, // and hostname options. - auto hostname_component = url_pattern_component::compile( - processed_init->hostname.value(), - url_pattern_helpers::canonicalize_ipv6_hostname, - url_pattern_compile_component_options::DEFAULT); + auto hostname_component = + url_pattern_component::compile( + processed_init->hostname.value(), + url_pattern_helpers::canonicalize_ipv6_hostname, + url_pattern_compile_component_options::DEFAULT); if (!hostname_component) { ada_log("url_pattern_component::compile failed for ipv6 hostname ", processed_init->hostname.value()); @@ -1053,10 +1060,11 @@ tl::expected parse_url_pattern_impl( // Otherwise, set urlPattern’s hostname component to the result of compiling // a component given processedInit["hostname"], canonicalize a hostname, and // hostname options. - auto hostname_component = url_pattern_component::compile( - processed_init->hostname.value(), - url_pattern_helpers::canonicalize_hostname, - url_pattern_compile_component_options::HOSTNAME); + auto hostname_component = + url_pattern_component::compile( + processed_init->hostname.value(), + url_pattern_helpers::canonicalize_hostname, + url_pattern_compile_component_options::HOSTNAME); if (!hostname_component) { ada_log("url_pattern_component::compile failed for hostname ", processed_init->hostname.value()); @@ -1067,9 +1075,10 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s port component to the result of compiling a component // given processedInit["port"], canonicalize a port, and default options. - auto port_component = url_pattern_component::compile( - processed_init->port.value(), url_pattern_helpers::canonicalize_port, - url_pattern_compile_component_options::DEFAULT); + auto port_component = + url_pattern_component::compile( + processed_init->port.value(), url_pattern_helpers::canonicalize_port, + url_pattern_compile_component_options::DEFAULT); if (!port_component) { ada_log("url_pattern_component::compile failed for port ", processed_init->port.value()); @@ -1099,9 +1108,10 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s pathname component to the result of compiling a // component given processedInit["pathname"], canonicalize a pathname, and // pathCompileOptions. - auto pathname_component = url_pattern_component::compile( - processed_init->pathname.value(), - url_pattern_helpers::canonicalize_pathname, path_compile_options); + auto pathname_component = + url_pattern_component::compile( + processed_init->pathname.value(), + url_pattern_helpers::canonicalize_pathname, path_compile_options); if (!pathname_component) { ada_log("url_pattern_component::compile failed for pathname ", processed_init->pathname.value()); @@ -1112,9 +1122,10 @@ tl::expected parse_url_pattern_impl( // Otherwise set urlPattern’s pathname component to the result of compiling // a component given processedInit["pathname"], canonicalize an opaque // pathname, and compileOptions. - auto pathname_component = url_pattern_component::compile( - processed_init->pathname.value(), - url_pattern_helpers::canonicalize_opaque_pathname, compile_options); + auto pathname_component = + url_pattern_component::compile( + processed_init->pathname.value(), + url_pattern_helpers::canonicalize_opaque_pathname, compile_options); if (!pathname_component) { ada_log("url_pattern_component::compile failed for opaque pathname ", processed_init->pathname.value()); @@ -1125,9 +1136,10 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s search component to the result of compiling a component // given processedInit["search"], canonicalize a search, and compileOptions. - auto search_component = url_pattern_component::compile( - processed_init->search.value(), url_pattern_helpers::canonicalize_search, - compile_options); + auto search_component = + url_pattern_component::compile( + processed_init->search.value(), + url_pattern_helpers::canonicalize_search, compile_options); if (!search_component) { ada_log("url_pattern_component::compile failed for search ", processed_init->search.value()); @@ -1137,9 +1149,10 @@ tl::expected parse_url_pattern_impl( // Set urlPattern’s hash component to the result of compiling a component // given processedInit["hash"], canonicalize a hash, and compileOptions. - auto hash_component = url_pattern_component::compile( - processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, - compile_options); + auto hash_component = + url_pattern_component::compile( + processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, + compile_options); if (!hash_component) { ada_log("url_pattern_component::compile failed for hash ", processed_init->hash.value()); diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index 95ec41ded..25ddbb58c 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -450,10 +450,14 @@ std::string url_pattern_init::to_string() const { return answer; } +template + requires url_pattern_regex::derived_from_provider template -tl::expected url_pattern_component::compile( +tl::expected, errors> +url_pattern_component::compile( std::string_view input, F& encoding_callback, - url_pattern_compile_component_options& options) { + url_pattern_compile_component_options& options, + const regex_provider& provider) { ada_log("url_pattern_component::compile input: ", input); // Let part list be the result of running parse a pattern string given input, // options, and encoding callback. @@ -473,13 +477,6 @@ tl::expected url_pattern_component::compile( ada_log("regular expression string: ", regular_expression_string); - // Let flags be an empty string. - // If options’s ignore case is true then set flags to "vi". - // Otherwise set flags to "v" - auto flags = options.ignore_case - ? std::regex::icase | std::regex_constants::ECMAScript - : std::regex_constants::ECMAScript; - // Let pattern string be the result of running generate a pattern // string given part list and options. auto pattern_string = @@ -488,12 +485,10 @@ tl::expected url_pattern_component::compile( // Let regular expression be RegExpCreate(regular expression string, // flags). If this throws an exception, catch it, and throw a // TypeError. - std::regex regular_expression; - try { - regular_expression = std::regex(regular_expression_string, flags); - } catch (std::regex_error& error) { - (void)error; - ada_log("std::regex_error: ", error.what()); + auto regular_expression = + provider.create_instance(regular_expression_string, options.ignore_case); + + if (!regular_expression) { return tl::unexpected(errors::type_error); } @@ -507,20 +502,25 @@ tl::expected url_pattern_component::compile( // Return a new component whose pattern string is pattern string, regular // expression is regular expression, group name list is name list, and has // regexp groups is has regexp groups. - return url_pattern_component(std::move(pattern_string), - std::move(regular_expression), flags, - std::move(name_list), has_regexp_groups); + return url_pattern_component( + std::move(pattern_string), std::move(regular_expression), + std::move(name_list), has_regexp_groups); } -result> url_pattern::exec( - const url_pattern_input& input, std::string_view* base_url = nullptr) { +template + requires url_pattern_regex::derived_from_provider +result> +url_pattern::exec(const url_pattern_input& input, + std::string_view* base_url) { // Return the result of match given this's associated URL pattern, input, and // baseURL if given. return match(input, base_url); } -result url_pattern::test(const url_pattern_input& input, - std::string_view* base_url = nullptr) { +template + requires url_pattern_regex::derived_from_provider +result url_pattern::test( + const url_pattern_input& input, std::string_view* base_url) { // TODO: Optimization opportunity. Rather than returning `url_pattern_result` // Implement a fast path just like `can_parse()` in ada_url. // Let result be the result of match given this's associated URL pattern, @@ -532,7 +532,10 @@ result url_pattern::test(const url_pattern_input& input, return tl::unexpected(errors::type_error); } -result> url_pattern::match( +template + requires url_pattern_regex::derived_from_provider +result> +url_pattern::match( const url_pattern_input& input, std::string_view* base_url_string) { std::string protocol{}; std::string username{}; diff --git a/src/url_pattern_helpers.cpp b/src/url_pattern_helpers.cpp index 56927635b..37dac8938 100644 --- a/src/url_pattern_helpers.cpp +++ b/src/url_pattern_helpers.cpp @@ -189,17 +189,22 @@ std::string generate_segment_wildcard_regexp( ada_log("generate_segment_wildcard_regexp result: ", result); return result; } - +template + requires url_pattern_regex::derived_from_provider bool protocol_component_matches_special_scheme( - url_pattern_component& component) { + url_pattern_component& component) { auto regex = component.regexp; + // TODO: Use provider.regex_match return std::regex_match("http", regex) || std::regex_match("https", regex) || std::regex_match("ws", regex) || std::regex_match("wss", regex) || std::regex_match("ftp", regex); } -inline std::optional -constructor_string_parser::compute_protocol_matches_special_scheme_flag() { +template + requires url_pattern_regex::derived_from_provider +inline std::optional constructor_string_parser< + regex_provider, + regex_type>::compute_protocol_matches_special_scheme_flag() { ada_log( "constructor_string_parser::compute_protocol_matches_special_scheme_" "flag"); @@ -208,9 +213,10 @@ constructor_string_parser::compute_protocol_matches_special_scheme_flag() { auto protocol_string = make_component_string(); // Let protocol component be the result of compiling a component given // protocol string, canonicalize a protocol, and default options. - auto protocol_component = url_pattern_component::compile( - protocol_string, canonicalize_protocol, - url_pattern_compile_component_options::DEFAULT); + auto protocol_component = + url_pattern_component::compile( + protocol_string, canonicalize_protocol, + url_pattern_compile_component_options::DEFAULT); if (!protocol_component) { ada_log("url_pattern_component::compile failed for protocol_string ", protocol_string); @@ -470,7 +476,10 @@ tl::expected canonicalize_hash(std::string_view input) { return tl::unexpected(errors::type_error); } -tl::expected constructor_string_parser::parse( +template + requires url_pattern_regex::derived_from_provider +tl::expected +constructor_string_parser::parse( std::string_view input) { ada_log("constructor_string_parser::parse input=", input); // Let parser be a new constructor string parser whose input is input and @@ -564,7 +573,8 @@ tl::expected constructor_string_parser::parse( if (parser.is_protocol_suffix()) { // Run compute protocol matches a special scheme flag given parser. if (const auto error = - parser.compute_protocol_matches_special_scheme_flag()) { + parser.template compute_protocol_matches_special_scheme_flag< + regex_type>()) { ada_log("compute_protocol_matches_special_scheme_flag failed"); return tl::unexpected(*error); } diff --git a/src/url_pattern_regex.cpp b/src/url_pattern_regex.cpp index e69de29bb..7528e000a 100644 --- a/src/url_pattern_regex.cpp +++ b/src/url_pattern_regex.cpp @@ -0,0 +1,34 @@ +#include +#include "ada/url_pattern_regex.h" + +namespace ada::url_pattern_regex { +std::optional std_regex_provider::create_instance( + std::string_view pattern, bool ignore_case) { + // Let flags be an empty string. + // If options’s ignore case is true then set flags to "vi". + // Otherwise set flags to "v" + auto flags = ignore_case + ? std::regex::icase | std::regex_constants::ECMAScript + : std::regex_constants::ECMAScript; + try { + return std::regex(pattern.data(), pattern.size(), flags); + } catch (const std::regex_error& e) { + (void)e; + ada_log("std_regex_provider::create_instance failed:", e.what()); + return std::nullopt; + } +} + +std::optional> std_regex_provider::regex_search( + std::string_view input, const std::regex& pattern) { + (void)input; + (void)pattern; + return {}; +} + +bool std_regex_provider::regex_match(std::string_view input, + const std::regex& pattern) { + return std::regex_match(input.data(), input.begin(), pattern); +} + +} // namespace ada::url_pattern_regex diff --git a/tests/wpt_urlpattern_tests.cpp b/tests/wpt_urlpattern_tests.cpp index 89bea3a13..ac33279da 100644 --- a/tests/wpt_urlpattern_tests.cpp +++ b/tests/wpt_urlpattern_tests.cpp @@ -29,16 +29,20 @@ TEST(wpt_urlpattern_tests, parse_pattern_string_basic_tests) { } TEST(wpt_urlpattern_tests, compile_basic_tests) { - auto protocol_component = ada::url_pattern_component::compile( - "*", ada::url_pattern_helpers::canonicalize_protocol, - ada::url_pattern_compile_component_options::DEFAULT); + auto provider = ada::url_pattern_regex::std_regex_provider(); + auto protocol_component = ada::url_pattern_component< + ada::url_pattern_regex::std_regex_provider, + std::regex>::compile("*", ada::url_pattern_helpers::canonicalize_protocol, + ada::url_pattern_compile_component_options::DEFAULT, + provider); ASSERT_TRUE(protocol_component); } TEST(wpt_urlpattern_tests, basic_tests) { auto init = ada::url_pattern_init{}; init.pathname = "/books"; - auto url = ada::parse_url_pattern(init); + auto url = ada::parse_url_pattern(init); ASSERT_TRUE(url); ASSERT_EQ(url->get_protocol(), "*"); ASSERT_EQ(url->get_hostname(), "*"); @@ -55,8 +59,9 @@ TEST(wpt_urlpattern_tests, basic_tests) { // Tests are taken from WPT // https://github.com/web-platform-tests/wpt/blob/0c1d19546fd4873bb9f4147f0bbf868e7b4f91b7/urlpattern/resources/urlpattern-hasregexpgroups-tests.js TEST(wpt_urlpattern_tests, has_regexp_groups) { - auto create_init = [](std::string_view component, - std::string value) -> ada::url_pattern_init { + auto create_init = [](std::string_view component, std::string value) + -> ada::url_pattern_init { if (component == "protocol") return {.protocol = value}; if (component == "username") return {.username = value}; if (component == "password") return {.password = value}; @@ -224,7 +229,7 @@ parse_pattern_field(ondemand::array& patterns) { return std::tuple(*init_str, base_url, options); } -tl::expected parse_pattern( +tl::expected, ada::errors> parse_pattern( std::variant& init_variant, std::optional& base_url, std::optional& options) {