From e2764171ccdd8c00b1341b57c454cf8f3b2ce5ab Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Sun, 13 Oct 2024 14:44:42 +0100 Subject: [PATCH] move some invalid exponent detection into rustc_session This PR allows integer and float suffixes that begin with 'e' (or 'E') to pass the lexer. Before, an 'e' that wasn't followed by a valid exponent was rejected by the lexer. This rejection still happens in the parser, but now proc macro authors have the opportunity to interpret the suffix, enabling tokens like `1em` to be used in macros. Diagnoistics are also marginally improved. Exponents that contain arbitrarily long suffixes are handled without read-ahead by tracking the exponent start in case of invalid exponent, so the suffix start is correct. Also adds tests for various edge cases. Co-authored-by: Vadim Petrochenkov --- compiler/rustc_lexer/src/lib.rs | 100 +++++++++++++----- compiler/rustc_session/messages.ftl | 2 + compiler/rustc_session/src/errors.rs | 15 +++ .../ui/consts/const-eval/issue-104390.stderr | 72 ++++++------- tests/ui/consts/issue-91434.stderr | 12 +-- ...de-confusable-in-float-literal-expt.stderr | 12 +-- .../ui/lexer/custom-suffixes-exponent-like.rs | 16 +++ .../custom-suffixes-exponent-like.stderr | 34 ++++++ tests/ui/lexer/custom-suffixes.rs | 22 ++++ 9 files changed, 208 insertions(+), 77 deletions(-) create mode 100644 tests/ui/lexer/custom-suffixes-exponent-like.rs create mode 100644 tests/ui/lexer/custom-suffixes-exponent-like.stderr create mode 100644 tests/ui/lexer/custom-suffixes.rs diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index bf18845a0830d..08dc1c4a3107e 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -194,7 +194,7 @@ pub enum DocStyle { pub enum LiteralKind { /// `12_u8`, `0o100`, `0b120i99`, `1f32`. Int { base: Base, empty_int: bool }, - /// `12.34f32`, `1e3`, but not `1f32`. + /// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`. Float { base: Base, empty_exponent: bool }, /// `'a'`, `'\\'`, `'''`, `';` Char { terminated: bool }, @@ -409,8 +409,8 @@ impl Cursor<'_> { // Numeric literal. c @ '0'..='9' => { - let literal_kind = self.number(c); - let suffix_start = self.pos_within_token(); + let (literal_kind, suffix_start) = self.number(c); + let suffix_start = suffix_start.unwrap_or(self.pos_within_token()); self.eat_literal_suffix(); TokenKind::Literal { kind: literal_kind, suffix_start } } @@ -606,7 +606,9 @@ impl Cursor<'_> { } } - fn number(&mut self, first_digit: char) -> LiteralKind { + /// Parses a number and in `.1` returns the offset of the literal suffix if + /// different from the current position on return. + fn number(&mut self, first_digit: char) -> (LiteralKind, Option) { debug_assert!('0' <= self.prev() && self.prev() <= '9'); let mut base = Base::Decimal; if first_digit == '0' { @@ -616,21 +618,21 @@ impl Cursor<'_> { base = Base::Binary; self.bump(); if !self.eat_decimal_digits() { - return Int { base, empty_int: true }; + return (Int { base, empty_int: true }, None); } } 'o' => { base = Base::Octal; self.bump(); if !self.eat_decimal_digits() { - return Int { base, empty_int: true }; + return (Int { base, empty_int: true }, None); } } 'x' => { base = Base::Hexadecimal; self.bump(); if !self.eat_hexadecimal_digits() { - return Int { base, empty_int: true }; + return (Int { base, empty_int: true }, None); } } // Not a base prefix; consume additional digits. @@ -642,40 +644,79 @@ impl Cursor<'_> { '.' | 'e' | 'E' => {} // Just a 0. - _ => return Int { base, empty_int: false }, + _ => return (Int { base, empty_int: false }, None), } } else { // No base prefix, parse number in the usual way. self.eat_decimal_digits(); }; - match self.first() { + match (self.first(), self.second()) { // Don't be greedy if this is actually an // integer literal followed by field/method access or a range pattern // (`0..2` and `12.foo()`) - '.' if self.second() != '.' && !is_id_start(self.second()) => { - // might have stuff after the ., and if it does, it needs to start - // with a number + ('.', second) if second != '.' && !is_id_start(second) => { self.bump(); + self.eat_decimal_digits(); + let mut empty_exponent = false; - if self.first().is_ascii_digit() { - self.eat_decimal_digits(); - match self.first() { - 'e' | 'E' => { - self.bump(); - empty_exponent = !self.eat_float_exponent(); - } - _ => (), + let suffix_start = match (self.first(), self.second()) { + ('e' | 'E', '_') => self.eat_underscore_exponent(), + ('e' | 'E', '0'..='9' | '+' | '-') => { + // Definitely an exponent (which still can be empty). + self.bump(); + empty_exponent = !self.eat_float_exponent(); + None } + _ => None, + }; + (Float { base, empty_exponent }, suffix_start) + } + ('e' | 'E', '_') => { + match self.eat_underscore_exponent() { + Some(suffix_start) => { + // The suffix begins at `e`, meaning the number is an integer. + (Int { base, empty_int: false }, Some(suffix_start)) + } + None => (Float { base, empty_exponent: false }, None), } - Float { base, empty_exponent } } - 'e' | 'E' => { + ('e' | 'E', '0'..='9' | '+' | '-') => { + // Definitely an exponent (which still can be empty). self.bump(); let empty_exponent = !self.eat_float_exponent(); - Float { base, empty_exponent } + (Float { base, empty_exponent }, None) } - _ => Int { base, empty_int: false }, + _ => (Int { base, empty_int: false }, None), + } + } + + /// Try to find and eat an exponent + /// + /// Assumes the first character is `e`/`E` and second is `_`, and consumes + /// `e`/`E` followed by all consecutive `_`s. + /// + /// Returns `Some` if no exponent was found. In this case, the suffix is partially + /// consumed, and began at the return value. + fn eat_underscore_exponent(&mut self) -> Option { + debug_assert!(matches!(self.first(), 'e' | 'E')); + debug_assert!(matches!(self.second(), '_')); + let suffix_start = self.pos_within_token(); + + // check if series of `_` is ended by a digit. If yes + // include it in the number as exponent. If no include + // it in suffix. + self.bump(); + while matches!(self.first(), '_') { + self.bump(); + } + // If we find a digit, then the exponential was valid + // so the suffix will start at the cursor as usual. + if self.first().is_ascii_digit() { + self.eat_decimal_digits(); + None + } else { + Some(suffix_start) } } @@ -924,6 +965,7 @@ impl Cursor<'_> { } } + /// Returns `true` if a digit was consumed (rather than just '_'s). fn eat_decimal_digits(&mut self) -> bool { let mut has_digits = false; loop { @@ -961,20 +1003,20 @@ impl Cursor<'_> { /// Eats the float exponent. Returns true if at least one digit was met, /// and returns false otherwise. fn eat_float_exponent(&mut self) -> bool { - debug_assert!(self.prev() == 'e' || self.prev() == 'E'); + debug_assert!(matches!(self.prev(), 'e' | 'E')); if self.first() == '-' || self.first() == '+' { self.bump(); } self.eat_decimal_digits() } - // Eats the suffix of the literal, e.g. "u8". + /// Eats the suffix of the literal, e.g. "u8". fn eat_literal_suffix(&mut self) { - self.eat_identifier(); + self.eat_identifier() } - // Eats the identifier. Note: succeeds on `_`, which isn't a valid - // identifier. + /// Eats the identifier. Note: succeeds on `_`, which isn't a valid + /// identifier. fn eat_identifier(&mut self) { if !is_id_start(self.first()) { return; diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index 74b8087e07769..3e6dd12377419 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled +session_empty_float_exponent = expected at least one digit in exponent + session_expr_parentheses_needed = parentheses are required to parse this as an expression session_failed_to_create_profiler = failed to create profiler: {$err} diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index 71d8dbe44fed0..5d64a02731b36 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -377,6 +377,10 @@ pub fn report_lit_error( s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) } + fn looks_like_empty_exponent(s: &str) -> bool { + s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E')) + } + // Try to lowercase the prefix if the prefix and suffix are valid. fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option { let mut chars = suffix.chars(); @@ -409,6 +413,8 @@ pub fn report_lit_error( if looks_like_width_suffix(&['i', 'u'], suf) { // If it looks like a width, try to be helpful. dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() }) + } else if looks_like_empty_exponent(suf) { + dcx.emit_err(EmptyFloatExponent { span }) } else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) { dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed }) } else { @@ -420,6 +426,8 @@ pub fn report_lit_error( if looks_like_width_suffix(&['f'], suf) { // If it looks like a width, try to be helpful. dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() }) + } else if looks_like_empty_exponent(suf) { + dcx.emit_err(EmptyFloatExponent { span }) } else { dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() }) } @@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored; #[note] #[note(session_soft_float_deprecated_issue)] pub(crate) struct SoftFloatDeprecated; + +#[derive(Diagnostic)] +#[diag(session_empty_float_exponent)] +pub(crate) struct EmptyFloatExponent { + #[primary_span] + pub span: Span, +} diff --git a/tests/ui/consts/const-eval/issue-104390.stderr b/tests/ui/consts/const-eval/issue-104390.stderr index 4c425ecfc1308..55fbe1a10907c 100644 --- a/tests/ui/consts/const-eval/issue-104390.stderr +++ b/tests/ui/consts/const-eval/issue-104390.stderr @@ -1,39 +1,3 @@ -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:1:27 - | -LL | fn f1() -> impl Sized { & 2E } - | ^^ - -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:2:28 - | -LL | fn f2() -> impl Sized { && 2E } - | ^^ - -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:3:29 - | -LL | fn f3() -> impl Sized { &'a 2E } - | ^^ - -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:5:34 - | -LL | fn f4() -> impl Sized { &'static 2E } - | ^^ - -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:7:28 - | -LL | fn f5() -> impl Sized { *& 2E } - | ^^ - -error: expected at least one digit in exponent - --> $DIR/issue-104390.rs:8:29 - | -LL | fn f6() -> impl Sized { &'_ 2E } - | ^^ - error: borrow expressions cannot be annotated with lifetimes --> $DIR/issue-104390.rs:3:25 | @@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E } LL + fn f6() -> impl Sized { &2E } | +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:1:27 + | +LL | fn f1() -> impl Sized { & 2E } + | ^^ + +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:2:28 + | +LL | fn f2() -> impl Sized { && 2E } + | ^^ + +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:3:29 + | +LL | fn f3() -> impl Sized { &'a 2E } + | ^^ + +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:5:34 + | +LL | fn f4() -> impl Sized { &'static 2E } + | ^^ + +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:7:28 + | +LL | fn f5() -> impl Sized { *& 2E } + | ^^ + +error: expected at least one digit in exponent + --> $DIR/issue-104390.rs:8:29 + | +LL | fn f6() -> impl Sized { &'_ 2E } + | ^^ + error: aborting due to 9 previous errors diff --git a/tests/ui/consts/issue-91434.stderr b/tests/ui/consts/issue-91434.stderr index 08d3ad77053d5..2e2e19958ba57 100644 --- a/tests/ui/consts/issue-91434.stderr +++ b/tests/ui/consts/issue-91434.stderr @@ -1,15 +1,15 @@ -error: expected at least one digit in exponent - --> $DIR/issue-91434.rs:2:11 - | -LL | [9; [[9E; h]]]; - | ^^ - error[E0425]: cannot find value `h` in this scope --> $DIR/issue-91434.rs:2:15 | LL | [9; [[9E; h]]]; | ^ not found in this scope +error: expected at least one digit in exponent + --> $DIR/issue-91434.rs:2:11 + | +LL | [9; [[9E; h]]]; + | ^^ + error: aborting due to 2 previous errors For more information about this error, try `rustc --explain E0425`. diff --git a/tests/ui/did_you_mean/issue-49746-unicode-confusable-in-float-literal-expt.stderr b/tests/ui/did_you_mean/issue-49746-unicode-confusable-in-float-literal-expt.stderr index 664bf69b9ebc5..353d5c4b5411c 100644 --- a/tests/ui/did_you_mean/issue-49746-unicode-confusable-in-float-literal-expt.stderr +++ b/tests/ui/did_you_mean/issue-49746-unicode-confusable-in-float-literal-expt.stderr @@ -1,9 +1,3 @@ -error: expected at least one digit in exponent - --> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47 - | -LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻² - | ^^^^^^ - error: unknown start of token: \u{2212} --> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53 | @@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹ LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻² | +error: expected at least one digit in exponent + --> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47 + | +LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻² + | ^^^^^^ + error: aborting due to 2 previous errors diff --git a/tests/ui/lexer/custom-suffixes-exponent-like.rs b/tests/ui/lexer/custom-suffixes-exponent-like.rs new file mode 100644 index 0000000000000..1807ed7573b56 --- /dev/null +++ b/tests/ui/lexer/custom-suffixes-exponent-like.rs @@ -0,0 +1,16 @@ +const _A: f64 = 1em; + //~^ ERROR invalid suffix `em` for number literal +const _B: f64 = 1e0m; + //~^ ERROR invalid suffix `m` for float literal +const _C: f64 = 1e_______________0m; + //~^ ERROR invalid suffix `m` for float literal +const _D: f64 = 1e_______________m; + //~^ ERROR invalid suffix `e_______________m` for number literal + +// All the above patterns should not generate an error when used in a macro +macro_rules! do_nothing { + ($($toks:tt)*) => {}; +} +do_nothing!(1em 1e0m 1e_______________0m 1e_______________m); + +fn main() {} diff --git a/tests/ui/lexer/custom-suffixes-exponent-like.stderr b/tests/ui/lexer/custom-suffixes-exponent-like.stderr new file mode 100644 index 0000000000000..366b57a421e34 --- /dev/null +++ b/tests/ui/lexer/custom-suffixes-exponent-like.stderr @@ -0,0 +1,34 @@ +error: invalid suffix `em` for number literal + --> $DIR/custom-suffixes-exponent-like.rs:1:17 + | +LL | const _A: f64 = 1em; + | ^^^ invalid suffix `em` + | + = help: the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) + +error: invalid suffix `m` for float literal + --> $DIR/custom-suffixes-exponent-like.rs:3:17 + | +LL | const _B: f64 = 1e0m; + | ^^^^ invalid suffix `m` + | + = help: valid suffixes are `f32` and `f64` + +error: invalid suffix `m` for float literal + --> $DIR/custom-suffixes-exponent-like.rs:5:17 + | +LL | const _C: f64 = 1e_______________0m; + | ^^^^^^^^^^^^^^^^^^^ invalid suffix `m` + | + = help: valid suffixes are `f32` and `f64` + +error: invalid suffix `e_______________m` for number literal + --> $DIR/custom-suffixes-exponent-like.rs:7:17 + | +LL | const _D: f64 = 1e_______________m; + | ^^^^^^^^^^^^^^^^^^ invalid suffix `e_______________m` + | + = help: the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) + +error: aborting due to 4 previous errors + diff --git a/tests/ui/lexer/custom-suffixes.rs b/tests/ui/lexer/custom-suffixes.rs new file mode 100644 index 0000000000000..c97537b577980 --- /dev/null +++ b/tests/ui/lexer/custom-suffixes.rs @@ -0,0 +1,22 @@ +//@ check-pass + +// This tests different kinds of valid suffixes. + +fn main() { + const _A: f64 = 1.; + const _B: f64 = 1f64; + const _C: f64 = 1.0f64; + const _D: f64 = 1e6; + const _E: f64 = 1.0e9; + const _F: f64 = 1e-6; + const _G: f64 = 1.0e-6; + const _H: f64 = 1.0e06; + const _I: f64 = 1.0e+6; + // these ones are perhaps more suprising. + const _J: f64 = 1.0e0________________________6; + const _K: f64 = 1.0e________________________6; + const _L: f64 = 1.0e+________________________6; + const _M: f64 = 1.0e-________________________6; + const _N: f64 = 1.0e-________________________9; + const _O: f64 = 1e_______________0f64; +}