Skip to content

Commit

Permalink
Auto merge of rust-lang#131656 - richard-uk1:move_empty_exponent_to_r…
Browse files Browse the repository at this point in the history
…ustc_session, r=<try>

move some invalid exponent detection into rustc_session

This PR moves part of the exponent checks from `rustc_lexer`/`rustc_parser` into `rustc_session`.

This change does not affect which programs are accepted by the complier, or the diagnostics that are reported, with one main exception. That exception is that floats or ints with suffixes beginning with `e` are rejected *after* the token stream is passed to proc macros, rather than being rejected by the parser as was the case. This gives proc macro authors more consistent access to numeric literals: currently a proc macro could interpret `1m` or `30s` but not `7eggs` or `3em`. After this change all are handled the same. The lexer will still reject input if it contains `e` followed by a number, `+`/`-`, or `_` if they are not followed by a valid integer literal (number + `_`), but this doesn't affect macro authors who just want to access alpha suffixes.

This PR is a continuation of rust-lang#79912. It is also solving exactly the same problem as [rust-lang#111628](rust-lang#111628).

Exponents that contain arbitrarily long underscore suffixes are handled without read-ahead by tracking the exponent start in case of invalid exponent, so the suffix start is correct. This is very much an edge-case (the user would have to write something like `1e_______________23`) but nevertheless it is handled correctly.

Also adds tests for various edge cases and improves diagnostics marginally.

r: `@petrochenkov,` since they reviewed rust-lang#79912.
  • Loading branch information
bors committed Feb 28, 2025
2 parents 60493b8 + e276417 commit c0ae197
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 77 deletions.
100 changes: 71 additions & 29 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pub enum DocStyle {
pub enum LiteralKind {
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
/// `12.34f32`, `1e3`, but not `1f32`.
/// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`.
Float { base: Base, empty_exponent: bool },
/// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
Expand Down Expand Up @@ -409,8 +409,8 @@ impl Cursor<'_> {

// Numeric literal.
c @ '0'..='9' => {
let literal_kind = self.number(c);
let suffix_start = self.pos_within_token();
let (literal_kind, suffix_start) = self.number(c);
let suffix_start = suffix_start.unwrap_or(self.pos_within_token());
self.eat_literal_suffix();
TokenKind::Literal { kind: literal_kind, suffix_start }
}
Expand Down Expand Up @@ -606,7 +606,9 @@ impl Cursor<'_> {
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
/// Parses a number and in `.1` returns the offset of the literal suffix if
/// different from the current position on return.
fn number(&mut self, first_digit: char) -> (LiteralKind, Option<u32>) {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
if first_digit == '0' {
Expand All @@ -616,21 +618,21 @@ impl Cursor<'_> {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'o' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'x' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
// Not a base prefix; consume additional digits.
Expand All @@ -642,40 +644,79 @@ impl Cursor<'_> {
'.' | 'e' | 'E' => {}

// Just a 0.
_ => return Int { base, empty_int: false },
_ => return (Int { base, empty_int: false }, None),
}
} else {
// No base prefix, parse number in the usual way.
self.eat_decimal_digits();
};

match self.first() {
match (self.first(), self.second()) {
// Don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
'.' if self.second() != '.' && !is_id_start(self.second()) => {
// might have stuff after the ., and if it does, it needs to start
// with a number
('.', second) if second != '.' && !is_id_start(second) => {
self.bump();
self.eat_decimal_digits();

let mut empty_exponent = false;
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
match self.first() {
'e' | 'E' => {
self.bump();
empty_exponent = !self.eat_float_exponent();
}
_ => (),
let suffix_start = match (self.first(), self.second()) {
('e' | 'E', '_') => self.eat_underscore_exponent(),
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
empty_exponent = !self.eat_float_exponent();
None
}
_ => None,
};
(Float { base, empty_exponent }, suffix_start)
}
('e' | 'E', '_') => {
match self.eat_underscore_exponent() {
Some(suffix_start) => {
// The suffix begins at `e`, meaning the number is an integer.
(Int { base, empty_int: false }, Some(suffix_start))
}
None => (Float { base, empty_exponent: false }, None),
}
Float { base, empty_exponent }
}
'e' | 'E' => {
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
let empty_exponent = !self.eat_float_exponent();
Float { base, empty_exponent }
(Float { base, empty_exponent }, None)
}
_ => Int { base, empty_int: false },
_ => (Int { base, empty_int: false }, None),
}
}

/// Try to find and eat an exponent
///
/// Assumes the first character is `e`/`E` and second is `_`, and consumes
/// `e`/`E` followed by all consecutive `_`s.
///
/// Returns `Some` if no exponent was found. In this case, the suffix is partially
/// consumed, and began at the return value.
fn eat_underscore_exponent(&mut self) -> Option<u32> {
debug_assert!(matches!(self.first(), 'e' | 'E'));
debug_assert!(matches!(self.second(), '_'));
let suffix_start = self.pos_within_token();

// check if series of `_` is ended by a digit. If yes
// include it in the number as exponent. If no include
// it in suffix.
self.bump();
while matches!(self.first(), '_') {
self.bump();
}
// If we find a digit, then the exponential was valid
// so the suffix will start at the cursor as usual.
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
None
} else {
Some(suffix_start)
}
}

Expand Down Expand Up @@ -924,6 +965,7 @@ impl Cursor<'_> {
}
}

/// Returns `true` if a digit was consumed (rather than just '_'s).
fn eat_decimal_digits(&mut self) -> bool {
let mut has_digits = false;
loop {
Expand Down Expand Up @@ -961,20 +1003,20 @@ impl Cursor<'_> {
/// Eats the float exponent. Returns true if at least one digit was met,
/// and returns false otherwise.
fn eat_float_exponent(&mut self) -> bool {
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
debug_assert!(matches!(self.prev(), 'e' | 'E'));
if self.first() == '-' || self.first() == '+' {
self.bump();
}
self.eat_decimal_digits()
}

// Eats the suffix of the literal, e.g. "u8".
/// Eats the suffix of the literal, e.g. "u8".
fn eat_literal_suffix(&mut self) {
self.eat_identifier();
self.eat_identifier()
}

// Eats the identifier. Note: succeeds on `_`, which isn't a valid
// identifier.
/// Eats the identifier. Note: succeeds on `_`, which isn't a valid
/// identifier.
fn eat_identifier(&mut self) {
if !is_id_start(self.first()) {
return;
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at
session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
session_empty_float_exponent = expected at least one digit in exponent
session_expr_parentheses_needed = parentheses are required to parse this as an expression
session_failed_to_create_profiler = failed to create profiler: {$err}
Expand Down
15 changes: 15 additions & 0 deletions compiler/rustc_session/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,10 @@ pub fn report_lit_error(
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
}

fn looks_like_empty_exponent(s: &str) -> bool {
s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E'))
}

// Try to lowercase the prefix if the prefix and suffix are valid.
fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
let mut chars = suffix.chars();
Expand Down Expand Up @@ -409,6 +413,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['i', 'u'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) {
dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed })
} else {
Expand All @@ -420,6 +426,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['f'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else {
dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() })
}
Expand Down Expand Up @@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored;
#[note]
#[note(session_soft_float_deprecated_issue)]
pub(crate) struct SoftFloatDeprecated;

#[derive(Diagnostic)]
#[diag(session_empty_float_exponent)]
pub(crate) struct EmptyFloatExponent {
#[primary_span]
pub span: Span,
}
72 changes: 36 additions & 36 deletions tests/ui/consts/const-eval/issue-104390.stderr
Original file line number Diff line number Diff line change
@@ -1,39 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: borrow expressions cannot be annotated with lifetimes
--> $DIR/issue-104390.rs:3:25
|
Expand Down Expand Up @@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E }
LL + fn f6() -> impl Sized { &2E }
|

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: aborting due to 9 previous errors

12 changes: 6 additions & 6 deletions tests/ui/consts/issue-91434.stderr
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error[E0425]: cannot find value `h` in this scope
--> $DIR/issue-91434.rs:2:15
|
LL | [9; [[9E; h]]];
| ^ not found in this scope

error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error: aborting due to 2 previous errors

For more information about this error, try `rustc --explain E0425`.
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: unknown start of token: \u{2212}
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53
|
Expand All @@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹
LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻²
|

error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: aborting due to 2 previous errors

16 changes: 16 additions & 0 deletions tests/ui/lexer/custom-suffixes-exponent-like.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const _A: f64 = 1em;
//~^ ERROR invalid suffix `em` for number literal
const _B: f64 = 1e0m;
//~^ ERROR invalid suffix `m` for float literal
const _C: f64 = 1e_______________0m;
//~^ ERROR invalid suffix `m` for float literal
const _D: f64 = 1e_______________m;
//~^ ERROR invalid suffix `e_______________m` for number literal

// All the above patterns should not generate an error when used in a macro
macro_rules! do_nothing {
($($toks:tt)*) => {};
}
do_nothing!(1em 1e0m 1e_______________0m 1e_______________m);

fn main() {}
Loading

0 comments on commit c0ae197

Please sign in to comment.