From 4314dff1ff51c95c20613ac631ac21018900f7a3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 12 Dec 2023 10:02:50 +1100 Subject: [PATCH] Delay string literal unescaping. Currently string literals are unescaped twice. - Once during lexing in `cook_quoted`/`cook_c_string`/`cook_common`. This one just checks for errors. - Again in `LitKind::from_token_lit`, which is mostly called when lowering AST to HIR, but also in a few other places during expansion. This one actually constructs the unescaped string. It also has error checking code, but that code handling the error cases is actually dead (and has several bugs) because the check during lexing catches all errors! This commit removes the checking during lexing, and fixes up `LitKind::from_token_lit` so it properly does both checking and construction. This is a language change: some programs now compile that previously did not. For example, it is now possible for macros to be passed "invalid" string literals like "\a\b\c". This is a continuation of a trend of delaying semantic error checking of literals to after expansion, e.g. #102944 did this for some cases for numeric literals, and the detection of NUL chars in C string literals is already delayed in this way. XXX: have Session::report_lit_errors? XXX: have LitKind::from_token_lit so you don't need the .0? Things to note: - `LitError` has a new `EscapeError` variant. - `LitKind::from_token_lit`'s return value changed, to produce multiple errors/warnings, and also to handle lexer warnings. This latter case is annoying but necessary to preserve existing warning behaviour. - `report_lit_error` becomes `report_lit_errors`, in order to handle multiple errors in a single string literal. Notes about test changes: - `tests/rustdoc-ui/ignore-block-help.rs`: this relies on a parsing error occurring. The error present was an unescaping error, which is now delayed to after parsing. So the commit changes it to an "unterminated character literal" error which continues to occurs during parsing. - Several tests had unescaping errors combined with unterminated literal errors. The former are now delayed but the latter remain as lexing errors. So the unterminated literal part needed to be split into a separate test file otherwise compilation would end before the other errors were reported. - issue-62913.rs: The structure and output changed a bit. Issue #62913 was about an ICE due to an unterminated string literal, so the new version should be good enough. - literals-are-validated-before-expansion.rs: this tests exactly the behaviour that has been changed, and so was removed XXX: insert a new test covering more of that - A couple of other test produce the same errors, just in a different order. --- compiler/rustc_ast/src/attr/mod.rs | 3 +- compiler/rustc_ast/src/util/literal.rs | 266 ++++++++++++------ compiler/rustc_ast_lowering/src/expr.rs | 11 +- compiler/rustc_ast_lowering/src/format.rs | 4 +- compiler/rustc_ast_lowering/src/lib.rs | 2 +- compiler/rustc_builtin_macros/src/concat.rs | 65 ++--- .../rustc_builtin_macros/src/concat_bytes.rs | 16 +- compiler/rustc_expand/src/base.rs | 42 +-- compiler/rustc_expand/src/mbe/metavar_expr.rs | 2 +- compiler/rustc_lexer/src/unescape.rs | 2 +- compiler/rustc_parse/src/lexer/mod.rs | 91 +----- .../src/lexer/unescape_error_reporting.rs | 16 +- compiler/rustc_parse/src/parser/expr.rs | 37 ++- compiler/rustc_parse/src/parser/mod.rs | 2 +- compiler/rustc_parse/src/validate_attr.rs | 15 +- .../clippy_lints/src/almost_complete_range.rs | 4 +- .../clippy/clippy_lints/src/int_plus_one.rs | 2 +- .../src/literal_representation.rs | 4 +- .../clippy/clippy_lints/src/misc_early/mod.rs | 2 +- tests/rustdoc-ui/ignore-block-help.rs | 4 +- tests/rustdoc-ui/ignore-block-help.stderr | 4 +- tests/ui/fmt/format-string-error-2.stderr | 12 +- tests/ui/lexer/lex-bad-char-literals-7.rs | 3 - tests/ui/lexer/lex-bad-char-literals-7.stderr | 9 +- tests/ui/lexer/lex-bad-char-literals-8.rs | 4 + tests/ui/lexer/lex-bad-char-literals-8.stderr | 9 + tests/ui/parser/byte-literals-2.rs | 3 + tests/ui/parser/byte-literals-2.stderr | 9 + tests/ui/parser/byte-literals.rs | 1 - tests/ui/parser/byte-literals.stderr | 9 +- tests/ui/parser/byte-string-literals-2.rs | 3 + tests/ui/parser/byte-string-literals-2.stderr | 11 + tests/ui/parser/byte-string-literals.rs | 1 - tests/ui/parser/byte-string-literals.stderr | 11 +- tests/ui/parser/issues/issue-62913.rs | 9 +- tests/ui/parser/issues/issue-62913.stderr | 22 +- ...literals-are-validated-before-expansion.rs | 10 - ...rals-are-validated-before-expansion.stderr | 18 -- .../parser/raw/raw-byte-string-literals-2.rs | 3 + .../raw/raw-byte-string-literals-2.stderr | 8 + .../ui/parser/raw/raw-byte-string-literals.rs | 1 - .../raw/raw-byte-string-literals.stderr | 8 +- .../parser/unicode-control-codepoints.stderr | 168 +++++------ 43 files changed, 488 insertions(+), 438 deletions(-) create mode 100644 tests/ui/lexer/lex-bad-char-literals-8.rs create mode 100644 tests/ui/lexer/lex-bad-char-literals-8.stderr create mode 100644 tests/ui/parser/byte-literals-2.rs create mode 100644 tests/ui/parser/byte-literals-2.stderr create mode 100644 tests/ui/parser/byte-string-literals-2.rs create mode 100644 tests/ui/parser/byte-string-literals-2.stderr delete mode 100644 tests/ui/parser/macro/literals-are-validated-before-expansion.rs delete mode 100644 tests/ui/parser/macro/literals-are-validated-before-expansion.stderr create mode 100644 tests/ui/parser/raw/raw-byte-string-literals-2.rs create mode 100644 tests/ui/parser/raw/raw-byte-string-literals-2.stderr diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 98138cedb24db..367ae9253402c 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -240,7 +240,7 @@ impl AttrArgsEq { match self { AttrArgsEq::Ast(expr) => match expr.kind { ExprKind::Lit(token_lit) => { - LitKind::from_token_lit(token_lit).ok().and_then(|lit| lit.str()) + LitKind::from_token_lit(token_lit).0.ok().and_then(|lit| lit.str()) } _ => None, }, @@ -426,6 +426,7 @@ impl MetaItemKind { ExprKind::Lit(token_lit) => { // Turn failures to `None`, we'll get parse errors elsewhere. MetaItemLit::from_token_lit(token_lit, expr.span) + .0 .ok() .map(|lit| MetaItemKind::NameValue(lit)) } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index c4c85570484cf..962690df8f55d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,11 +3,11 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, - Mode, + byte_from_char, unescape_c_string, unescape_literal, CStrUnit, EscapeError, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -33,6 +33,14 @@ pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { #[derive(Debug)] pub enum LitError { LexerError, + EscapeError { + mode: Mode, + // Length before the string content, e.g. 1 for "a", 5 for br##"a"## + prefix_len: u32, + // The range is the byte range of the bad character, using a zero index. + range: Range, + err: EscapeError, + }, InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, @@ -41,155 +49,247 @@ pub enum LitError { } impl LitKind { - /// Converts literal token into a semantic literal. - pub fn from_token_lit(lit: token::Lit) -> Result { + /// Converts literal token into a semantic literal. The return value has + /// two parts: + /// - The `Result` indicates success or failure. + /// - The `Vec` contains all found errors and warnings. + /// + /// If we only had to deal with errors, we could use the more obvious + /// `Result>`; on failure the caller would just + /// print errors and take the error path and stop early. But it's possible + /// to succeed without errors but with one or more warnings, and in that + /// case the caller should print the warnings, but also proceed with a + /// valid `LitKind`. This return type facilitates that. + pub fn from_token_lit(lit: token::Lit) -> (Result, Vec) { let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); + // Note: we return a single error here. We could instead continue + // processing, possibly returning multiple errors. + return (Err(()), vec![LitError::InvalidSuffix]); } - Ok(match kind { + let mut errs = vec![]; + let mut has_fatal = false; + + let res = match kind { token::Bool => { assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) + Ok(LitKind::Bool(symbol == kw::True)) } token::Byte => { - return unescape_byte(symbol.as_str()) - .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + let mode = Mode::Byte; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Byte(byte_from_char(res.unwrap()))) } else { Err(()) } } token::Char => { - return unescape_char(symbol.as_str()) - .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + let mode = Mode::Char; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // ' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Char(res.unwrap())) } else { Err(()) } } // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), + token::Integer => { + return match integer_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } + token::Float => { + return match float_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. + let mode = Mode::Str; let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be // hot in programs with many long strings. unescape_literal( s, - Mode::Str, + mode, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |range, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // " + range, + err, + }); } }, ); - error?; - Symbol::intern(&buf) + if !has_fatal { + Ok(LitKind::Str(Symbol::intern(&buf), ast::StrStyle::Cooked)) + } else { + Err(()) + } } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) + Ok(LitKind::Str(symbol, ast::StrStyle::Cooked)) + } } token::StrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + let mode = Mode::RawStr; + let s = symbol.as_str(); + unescape_literal(s, mode, &mut |range, unescaped_char| match unescaped_char { + Ok(_) => {} + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2 + n as u32, // r", r#", r##", etc. + range, + err, + }); } }); - error?; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) + if !has_fatal { Ok(LitKind::Str(symbol, ast::StrStyle::Raw(n))) } else { Err(()) } } token::ByteStr => { + let mode = Mode::ByteStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b" + range, + err, + }); } }); - error?; - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + Ok(LitKind::ByteStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::ByteStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can convert the symbol directly to a `Lrc` on success. + let mode = Mode::RawByteStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // br", br#", br##", etc. + range, + err, + }); } }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + if !has_fatal { + Ok(LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))) + } else { + Err(()) + } } token::CStr => { + let mode = Mode::CStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // c" + range, + err, + }); } }); - error?; - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::CStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. + // can convert the symbol directly to a `Lrc` (after appending a nul char) on + // success. + let mode = Mode::RawCStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // cr", cr#", cr##", etc. + range, + err, + }); } }); - error?; - let mut buf = s.to_owned().into_bytes(); - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + if !has_fatal { + let mut buf = s.to_owned().into_bytes(); + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Raw(n))) + } else { + Err(()) + } } - token::Err => LitKind::Err, - }) + token::Err => Ok(LitKind::Err), + }; + (res, errs) } } @@ -257,14 +357,20 @@ impl fmt::Display for LitKind { } impl MetaItemLit { - /// Converts a token literal into a meta item literal. - pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { - symbol: token_lit.symbol, - suffix: token_lit.suffix, - kind: LitKind::from_token_lit(token_lit)?, - span, - }) + /// Converts a token literal into a meta item literal. See + /// `LitKind::from_token_lit` for an explanation of the return type. + pub fn from_token_lit( + token_lit: token::Lit, + span: Span, + ) -> (Result, Vec) { + let (res, errs) = LitKind::from_token_lit(token_lit); + let res = match res { + Ok(kind) => { + Ok(MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, kind, span }) + } + Err(()) => Err(()), + }; + (res, errs) } /// Cheaply converts a meta item literal into a token literal. @@ -290,7 +396,7 @@ impl MetaItemLit { /// Converts an arbitrary token into meta item literal. pub fn from_token(token: &Token) -> Option { token::Lit::from_token(token) - .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok()) + .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).0.ok()) } } diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 635bc945cb1e9..f9587f66cc1cf 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => { - let lit_kind = match LitKind::from_token_lit(*token_lit) { + let (result, errs) = LitKind::from_token_lit(*token_lit); + let lit_kind = match result { Ok(lit_kind) => lit_kind, - Err(err) => { - report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span); - LitKind::Err - } + Err(()) => LitKind::Err, }; + report_lit_errors(&self.tcx.sess.parse_sess, errs, *token_lit, e.span); let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind)); hir::ExprKind::Lit(lit) } diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 6a82005c44842..1e7b7cc150935 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -127,11 +127,11 @@ fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> { && let ExprKind::Lit(lit) = arg.kind { if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind - && let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit) + && let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit).0 { literal = Some(s); } else if let token::LitKind::Integer = lit.kind - && let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit) + && let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit).0 { literal = Some(Symbol::intern(&n.to_string())); } diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index d9663d50c595c..d38bd9dd1ff84 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -948,7 +948,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> { // In valid code the value always ends up as a single literal. Otherwise, a dummy // literal suffices because the error is handled elsewhere. let lit = if let ExprKind::Lit(token_lit) = expr.kind - && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span) + && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span).0 { lit } else { diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6c83e8868bd31..21be6e0fdc19d 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::symbol::Symbol; use crate::errors; @@ -19,48 +19,49 @@ pub fn expand_concat( let mut has_errors = false; for e in es { match e.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { - accumulator.push_str(s.as_str()); - } - Ok(ast::LitKind::Char(c)) => { - accumulator.push(c); - } - Ok(ast::LitKind::Int(i, _)) => { - accumulator.push_str(&i.to_string()); - } - Ok(ast::LitKind::Bool(b)) => { - accumulator.push_str(&b.to_string()); - } - Ok(ast::LitKind::CStr(..)) => { - cx.emit_err(errors::ConcatCStrLit { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { - cx.emit_err(errors::ConcatBytestr { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Err) => { - has_errors = true; - } - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); - has_errors = true; + ast::ExprKind::Lit(token_lit) => { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { + Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { + accumulator.push_str(s.as_str()); + } + Ok(ast::LitKind::Char(c)) => { + accumulator.push(c); + } + Ok(ast::LitKind::Int(i, _)) => { + accumulator.push_str(&i.to_string()); + } + Ok(ast::LitKind::Bool(b)) => { + accumulator.push_str(&b.to_string()); + } + Ok(ast::LitKind::CStr(..)) => { + cx.emit_err(errors::ConcatCStrLit { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { + cx.emit_err(errors::ConcatBytestr { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Err) | Err(()) => { + has_errors = true; + } } - }, + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span); + } // We also want to allow negative numeric literals. ast::ExprKind::Unary(ast::UnOp::Neg, ref expr) if let ast::ExprKind::Lit(token_lit) = expr.kind => { - match ast::LitKind::from_token_lit(token_lit) { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")), Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); + Err(()) => { has_errors = true; } _ => missing_literal.push(e.span), } + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span); } ast::ExprKind::IncludedBytes(..) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 4ae328160f0c8..085207a49a5f5 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::Span; use crate::errors; @@ -17,7 +17,8 @@ fn invalid_type_err( ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob, }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); - match ast::LitKind::from_token_lit(token_lit) { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { Ok(ast::LitKind::CStr(_, _)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. @@ -60,10 +61,9 @@ fn invalid_type_err( cx.emit_err(ConcatBytesNonU8 { span }); } Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, span); - } + Err(()) => {} } + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, span); } fn handle_array_element( @@ -80,7 +80,7 @@ fn handle_array_element( *has_errors = true; None } - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { + ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 { Ok(ast::LitKind::Int( val, ast::LitIntType::Unsuffixed | ast::LitIntType::Unsigned(ast::UintTy::U8), @@ -141,7 +141,7 @@ pub fn expand_concat_bytes( ast::ExprKind::Repeat(expr, count) => { if let ast::ExprKind::Lit(token_lit) = count.value.kind && let Ok(ast::LitKind::Int(count_val, _)) = - ast::LitKind::from_token_lit(token_lit) + ast::LitKind::from_token_lit(token_lit).0 { if let Some(elem) = handle_array_element(cx, &mut has_errors, &mut missing_literals, expr) @@ -154,7 +154,7 @@ pub fn expand_concat_bytes( cx.emit_err(errors::ConcatBytesBadRepeat { span: count.value.span }); } } - &ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { + &ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 { Ok(ast::LitKind::Byte(val)) => { accumulator.push(val); } diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index c9bbea47185b7..0371aff80dade 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1235,26 +1235,28 @@ pub fn expr_to_spanned_string<'a>( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); Err(match expr.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(..)) => { - let mut err = cx.struct_span_err(expr.span, err_msg); - let span = expr.span.shrink_to_lo(); - err.span_suggestion( - span.with_hi(span.lo() + BytePos(1)), - "consider removing the leading `b`", - "", - Applicability::MaybeIncorrect, - ); - Some((err, true)) - } - Ok(ast::LitKind::Err) => None, - Err(err) => { - parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); - None - } - _ => Some((cx.struct_span_err(expr.span, err_msg), false)), - }, + ast::ExprKind::Lit(token_lit) => { + let (lit_kind, errs) = ast::LitKind::from_token_lit(token_lit); + let res = match lit_kind { + Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), + Ok(ast::LitKind::ByteStr(..)) => { + let mut err = cx.struct_span_err(expr.span, err_msg); + let span = expr.span.shrink_to_lo(); + err.span_suggestion( + span.with_hi(span.lo() + BytePos(1)), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + Ok(ast::LitKind::Err) => None, + Err(()) => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }; + parser::report_lit_errors(&cx.sess.parse_sess, errs, token_lit, expr.span); + res + } ast::ExprKind::Err => None, _ => Some((cx.struct_span_err(expr.span, err_msg), false)), }) diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs index c29edc3dc9f29..5c532693bbcb4 100644 --- a/compiler/rustc_expand/src/mbe/metavar_expr.rs +++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs @@ -119,7 +119,7 @@ fn parse_depth<'sess>( .span_diagnostic .struct_span_err(span, "meta-variable expression depth must be a literal")); }; - if let Ok(lit_kind) = LitKind::from_token_lit(*lit) + if let Ok(lit_kind) = LitKind::from_token_lit(*lit).0 && let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind && let Ok(n_usize) = usize::try_from(n_u128) { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 7c8065f3cb9b9..4941531e3ce44 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -347,7 +347,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { + let res: Result = match c { '\\' => { match chars.clone().next() { Some('\n') => { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index b1dc1f98777a7..40a1e8703554f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -8,7 +6,6 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ @@ -21,10 +18,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span}; mod diagnostics; mod tokentrees; -mod unescape_error_reporting; +pub(crate) mod unescape_error_reporting; mod unicode_chars; -use unescape_error_reporting::{emit_unescape_error, escaped_char}; +use unescape_error_reporting::escaped_char; // This type is used a lot. Make sure it doesn't unintentionally get bigger. // @@ -409,7 +406,7 @@ impl<'a> StringReader<'a> { error_code!(E0762), ) } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_quoted(token::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -419,7 +416,7 @@ impl<'a> StringReader<'a> { error_code!(E0763), ) } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_quoted(token::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -429,7 +426,7 @@ impl<'a> StringReader<'a> { error_code!(E0765), ) } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_quoted(token::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -439,7 +436,7 @@ impl<'a> StringReader<'a> { error_code!(E0766), ) } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_quoted(token::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -449,13 +446,13 @@ impl<'a> StringReader<'a> { error_code!(E0767), ) } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_quoted(token::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_quoted(kind, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -464,7 +461,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -473,7 +470,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -693,82 +690,18 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_common( + fn cook_quoted( &self, kind: token::LitKind, - mode: Mode, start: BytePos, end: BytePos, prefix_len: u32, postfix_len: u32, - unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { - let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape(lit_content, mode, &mut |range, result| { - // Here we only check for errors. The actual unescaping is done later. - if let Err(err) = result { - let span_with_quotes = self.mk_sp(start, end); - let (start, end) = (range.start as u32, range.end as u32); - let lo = content_start + BytePos(start); - let hi = lo + BytePos(end - start); - let span = self.mk_sp(lo, hi); - if err.is_fatal() { - has_fatal_err = true; - } - emit_unescape_error( - &self.sess.span_diagnostic, - lit_content, - span_with_quotes, - span, - mode, - range, - err, - ); - } - }); - - // We normally exclude the quotes for the symbol, but for errors we - // include it because it results in clearer error messages. - if !has_fatal_err { - (kind, Symbol::intern(lit_content)) - } else { - (token::Err, self.symbol_from_to(start, end)) - } - } - - fn cook_quoted( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) - } - - fn cook_c_string( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) + (kind, Symbol::intern(lit_content)) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 65a46ec6c476b..ea8c588cc03cc 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -11,26 +11,32 @@ use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, pub(crate) fn emit_unescape_error( handler: &Handler, - // interior part of the literal, without quotes + // interior part of the literal, between quotes lit: &str, // full span of the literal, including quotes and any prefix full_lit_span: Span, - // span of the error part of the literal - err_span: Span, mode: Mode, + prefix_len: u32, // range of the error inside `lit` range: Range, error: EscapeError, ) { + let (start, end) = (range.start as u32, range.end as u32); + let lo = full_lit_span.lo() + BytePos(prefix_len) + BytePos(start); + let hi = lo + BytePos(end - start); + let err_span = full_lit_span.with_lo(lo).with_hi(hi); + debug!( - "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", - lit, full_lit_span, mode, range, error + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}", + lit, full_lit_span, err_span, mode, range, error ); + let last_char = || { let c = lit[range.clone()].chars().next_back().unwrap(); let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; + match error { EscapeError::LoneSurrogateUnicodeEscape => { handler diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 338cb209afe2f..eb72e3bdf17c3 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use super::{ }; use crate::errors; +use crate::lexer::unescape_error_reporting::emit_unescape_error; use crate::maybe_recover_from_interpolated_ty_qpath; use ast::mut_visit::{noop_visit_expr, MutVisitor}; use ast::{CoroutineKind, GenBlockKind, Pat, Path, PathSegment}; @@ -2048,26 +2049,31 @@ impl<'a> Parser<'a> { let token = recovered.as_ref().unwrap_or(&self.token); match token::Lit::from_token(token) { Some(lit) => { - match MetaItemLit::from_token_lit(lit, token.span) { + let (res, errs) = MetaItemLit::from_token_lit(lit, token.span); + let (res, span) = match res { Ok(lit) => { + let span = token.uninterpolated_span(); self.bump(); - Some(lit) + (lit, span) } - Err(err) => { + Err(()) => { let span = token.uninterpolated_span(); self.bump(); - report_lit_error(self.sess, err, lit, span); // Pack possible quotes and prefixes from the original literal into // the error literal's symbol so they can be pretty-printed faithfully. let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); let symbol = Symbol::intern(&suffixless_lit.to_string()); let lit = token::Lit::new(token::Err, symbol, lit.suffix); - Some( + ( MetaItemLit::from_token_lit(lit, span) + .0 .unwrap_or_else(|_| unreachable!()), + span, ) } - } + }; + report_lit_errors(self.sess, errs, lit, span); + Some(res) } None => None, } @@ -3659,7 +3665,13 @@ impl<'a> Parser<'a> { } } -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { +pub fn report_lit_errors(sess: &ParseSess, errs: Vec, lit: token::Lit, span: Span) { + for err in errs { + report_lit_error(sess, err, lit, span); + } +} + +fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) @@ -3692,6 +3704,17 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: // `LexerError` is an error, but it was already reported // by lexer, so here we don't report it the second time. LitError::LexerError => {} + LitError::EscapeError { mode, prefix_len, range, err } => { + emit_unescape_error( + &sess.span_diagnostic, + symbol.as_str(), + span, + mode, + prefix_len, + range, + err, + ); + } LitError::InvalidSuffix => { if let Some(suffix) = suffix { sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2acbf0e948c7b..f22d459a44028 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,7 +11,7 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; -pub use crate::parser::expr::report_lit_error; +pub use crate::parser::expr::report_lit_errors; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index 81055431f649d..a3c2782a1805b 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -1,14 +1,13 @@ //! Meta-syntax validation logic of attributes for post-expansion. -use crate::{errors, parse_in}; +use crate::{errors, parse_in, parser}; use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; -use rustc_ast::MetaItemKind; use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem}; +use rustc_ast::{LitKind, MetaItemKind, MetaItemLit}; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; -use rustc_session::errors::report_lit_error; use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, Symbol}; @@ -52,7 +51,7 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta } AttrArgs::Eq(_, AttrArgsEq::Ast(expr)) => { if let ast::ExprKind::Lit(token_lit) = expr.kind { - let res = ast::MetaItemLit::from_token_lit(token_lit, expr.span); + let (res, errs) = ast::MetaItemLit::from_token_lit(token_lit, expr.span); let res = match res { Ok(lit) => { if token_lit.suffix.is_some() { @@ -69,17 +68,17 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta MetaItemKind::NameValue(lit) } } - Err(err) => { - report_lit_error(sess, err, token_lit, expr.span); - let lit = ast::MetaItemLit { + Err(()) => { + let lit = MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: ast::LitKind::Err, + kind: LitKind::Err, span: expr.span, }; MetaItemKind::NameValue(lit) } }; + parser::report_lit_errors(sess, errs, token_lit, expr.span); res } else { // Example cases: diff --git a/src/tools/clippy/clippy_lints/src/almost_complete_range.rs b/src/tools/clippy/clippy_lints/src/almost_complete_range.rs index 57a5cd8fba818..98e3a5c5b1e21 100644 --- a/src/tools/clippy/clippy_lints/src/almost_complete_range.rs +++ b/src/tools/clippy/clippy_lints/src/almost_complete_range.rs @@ -76,8 +76,8 @@ fn check_range(cx: &EarlyContext<'_>, span: Span, start: &Expr, end: &Expr, sugg && let ExprKind::Lit(end_token_lit) = end.peel_parens().kind && matches!( ( - LitKind::from_token_lit(start_token_lit), - LitKind::from_token_lit(end_token_lit), + LitKind::from_token_lit(start_token_lit).0, + LitKind::from_token_lit(end_token_lit).0, ), ( Ok(LitKind::Byte(b'a') | LitKind::Char('a')), diff --git a/src/tools/clippy/clippy_lints/src/int_plus_one.rs b/src/tools/clippy/clippy_lints/src/int_plus_one.rs index b8e0eef7c7e9e..ad9ea6c9adac4 100644 --- a/src/tools/clippy/clippy_lints/src/int_plus_one.rs +++ b/src/tools/clippy/clippy_lints/src/int_plus_one.rs @@ -54,7 +54,7 @@ enum Side { impl IntPlusOne { #[expect(clippy::cast_sign_loss)] fn check_lit(token_lit: token::Lit, target_value: i128) -> bool { - if let Ok(LitKind::Int(value, ..)) = LitKind::from_token_lit(token_lit) { + if let Ok(LitKind::Int(value, ..)) = LitKind::from_token_lit(token_lit).0 { return value == (target_value as u128); } false diff --git a/src/tools/clippy/clippy_lints/src/literal_representation.rs b/src/tools/clippy/clippy_lints/src/literal_representation.rs index f33151cf4c591..417db31896dd7 100644 --- a/src/tools/clippy/clippy_lints/src/literal_representation.rs +++ b/src/tools/clippy/clippy_lints/src/literal_representation.rs @@ -255,7 +255,7 @@ impl LiteralDigitGrouping { fn check_lit(self, cx: &EarlyContext<'_>, lit: token::Lit, span: Span) { if let Some(src) = snippet_opt(cx, span) - && let Ok(lit_kind) = LitKind::from_token_lit(lit) + && let Ok(lit_kind) = LitKind::from_token_lit(lit).0 && let Some(mut num_lit) = NumericLiteral::from_lit_kind(&src, &lit_kind) { if !Self::check_for_mistyped_suffix(cx, span, &mut num_lit) { @@ -469,7 +469,7 @@ impl DecimalLiteralRepresentation { } fn check_lit(self, cx: &EarlyContext<'_>, lit: token::Lit, span: Span) { // Lint integral literals. - if let Ok(lit_kind) = LitKind::from_token_lit(lit) + if let Ok(lit_kind) = LitKind::from_token_lit(lit).0 && let LitKind::Int(val, _) = lit_kind && let Some(src) = snippet_opt(cx, span) && let Some(num_lit) = NumericLiteral::from_lit_kind(&src, &lit_kind) diff --git a/src/tools/clippy/clippy_lints/src/misc_early/mod.rs b/src/tools/clippy/clippy_lints/src/misc_early/mod.rs index abe5b00e888a4..66e6ff7c01db2 100644 --- a/src/tools/clippy/clippy_lints/src/misc_early/mod.rs +++ b/src/tools/clippy/clippy_lints/src/misc_early/mod.rs @@ -431,7 +431,7 @@ impl MiscEarlyLints { _ => return, }; - let lit_kind = LitKind::from_token_lit(lit); + let lit_kind = LitKind::from_token_lit(lit).0; if let Ok(LitKind::Int(value, lit_int_type)) = lit_kind { let suffix = match lit_int_type { LitIntType::Signed(ty) => ty.name_str(), diff --git a/tests/rustdoc-ui/ignore-block-help.rs b/tests/rustdoc-ui/ignore-block-help.rs index 86f6a2868fb56..fb27d954f9a5a 100644 --- a/tests/rustdoc-ui/ignore-block-help.rs +++ b/tests/rustdoc-ui/ignore-block-help.rs @@ -1,10 +1,10 @@ // check-pass /// ```ignore (to-prevent-tidy-error) -/// let heart = '❤️'; +/// let unterminated = ' /// ``` //~^^^ WARNING could not parse code block //~| NOTE on by default -//~| NOTE character literal may only contain one codepoint +//~| NOTE unterminated character literal //~| HELP `ignore` code blocks require valid Rust code pub struct X; diff --git a/tests/rustdoc-ui/ignore-block-help.stderr b/tests/rustdoc-ui/ignore-block-help.stderr index a30ea51dd8a7f..f5ed287a99834 100644 --- a/tests/rustdoc-ui/ignore-block-help.stderr +++ b/tests/rustdoc-ui/ignore-block-help.stderr @@ -3,7 +3,7 @@ warning: could not parse code block as Rust code | LL | /// ```ignore (to-prevent-tidy-error) | _____^ -LL | | /// let heart = '❤️'; +LL | | /// let unterminated = ' LL | | /// ``` | |_______^ | @@ -12,7 +12,7 @@ help: `ignore` code blocks require valid Rust code for syntax highlighting; mark | LL | /// ```ignore (to-prevent-tidy-error) | ^^^ - = note: error from rustc: character literal may only contain one codepoint + = note: error from rustc: unterminated character literal = note: `#[warn(rustdoc::invalid_rust_codeblocks)]` on by default warning: 1 warning emitted diff --git a/tests/ui/fmt/format-string-error-2.stderr b/tests/ui/fmt/format-string-error-2.stderr index dfd24bf60ad52..50ead59e4e911 100644 --- a/tests/ui/fmt/format-string-error-2.stderr +++ b/tests/ui/fmt/format-string-error-2.stderr @@ -1,9 +1,3 @@ -error: incorrect unicode escape sequence - --> $DIR/format-string-error-2.rs:77:20 - | -LL | println!("\x7B}\u8 {", 1); - | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` - error: invalid format string: expected `'}'`, found `'a'` --> $DIR/format-string-error-2.rs:5:5 | @@ -155,6 +149,12 @@ LL | println!("\x7B}\u{8} {", 1); | = note: if you intended to print `{`, you can escape it using `{{` +error: incorrect unicode escape sequence + --> $DIR/format-string-error-2.rs:77:20 + | +LL | println!("\x7B}\u8 {", 1); + | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` + error: invalid format string: unmatched `}` found --> $DIR/format-string-error-2.rs:81:21 | diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs index c675df2f3ccd0..55484a610141b 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.rs +++ b/tests/ui/lexer/lex-bad-char-literals-7.rs @@ -7,7 +7,4 @@ fn main() { // Next two are OK, but may befool error recovery let _ = '/'; let _ = b'/'; - - let _ = ' hello // here's a comment - //~^ ERROR: unterminated character literal } diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr index 255b9c6899999..16ba7676932fd 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.stderr +++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr @@ -10,12 +10,5 @@ error: empty unicode escape LL | let _: char = '\u{}'; | ^^^^ this escape must have at least 1 hex digit -error[E0762]: unterminated character literal - --> $DIR/lex-bad-char-literals-7.rs:11:13 - | -LL | let _ = ' hello // here's a comment - | ^^^^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-char-literals-8.rs b/tests/ui/lexer/lex-bad-char-literals-8.rs new file mode 100644 index 0000000000000..6c8cbd3a82a85 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.rs @@ -0,0 +1,4 @@ +fn main() { + let _ = ' hello // here's a comment + //~^ ERROR: unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-char-literals-8.stderr b/tests/ui/lexer/lex-bad-char-literals-8.stderr new file mode 100644 index 0000000000000..04c95df0d0601 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.stderr @@ -0,0 +1,9 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-char-literals-8.rs:2:13 + | +LL | let _ = ' hello // here's a comment + | ^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/parser/byte-literals-2.rs b/tests/ui/parser/byte-literals-2.rs new file mode 100644 index 0000000000000..fb9e2ac69944a --- /dev/null +++ b/tests/ui/parser/byte-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b'a //~ ERROR unterminated byte constant [E0763] +} diff --git a/tests/ui/parser/byte-literals-2.stderr b/tests/ui/parser/byte-literals-2.stderr new file mode 100644 index 0000000000000..f0e042ad605db --- /dev/null +++ b/tests/ui/parser/byte-literals-2.stderr @@ -0,0 +1,9 @@ +error[E0763]: unterminated byte constant + --> $DIR/byte-literals-2.rs:2:6 + | +LL | b'a + | ^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-literals.rs b/tests/ui/parser/byte-literals.rs index 896dc1a1a5fba..963a0bb608d84 100644 --- a/tests/ui/parser/byte-literals.rs +++ b/tests/ui/parser/byte-literals.rs @@ -8,5 +8,4 @@ pub fn main() { b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped b'é'; //~ ERROR non-ASCII character in byte literal - b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 5b414c8927e2c..97805e01db49f 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -43,12 +43,5 @@ help: if you meant to use the unicode code point for 'é', use a \xHH escape LL | b'\xE9'; | ~~~~ -error[E0763]: unterminated byte constant - --> $DIR/byte-literals.rs:11:6 - | -LL | b'a - | ^^^^ - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors -For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-string-literals-2.rs b/tests/ui/parser/byte-string-literals-2.rs new file mode 100644 index 0000000000000..7eb52b854e358 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b"a //~ ERROR unterminated double quote byte string +} diff --git a/tests/ui/parser/byte-string-literals-2.stderr b/tests/ui/parser/byte-string-literals-2.stderr new file mode 100644 index 0000000000000..6fdb3c64ba783 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.stderr @@ -0,0 +1,11 @@ +error[E0766]: unterminated double quote byte string + --> $DIR/byte-string-literals-2.rs:2:6 + | +LL | b"a + | ______^ +LL | | } + | |__^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/byte-string-literals.rs b/tests/ui/parser/byte-string-literals.rs index 30a4f50c4e40b..c14488dcb6689 100644 --- a/tests/ui/parser/byte-string-literals.rs +++ b/tests/ui/parser/byte-string-literals.rs @@ -5,5 +5,4 @@ pub fn main() { b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` b"é"; //~ ERROR non-ASCII character in byte string literal br##"é"##; //~ ERROR non-ASCII character in raw byte string literal - b"a //~ ERROR unterminated double quote byte string } diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 655b6998e85ff..2a2830c346825 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -37,14 +37,5 @@ error: non-ASCII character in raw byte string literal LL | br##"é"##; | ^ must be ASCII -error[E0766]: unterminated double quote byte string - --> $DIR/byte-string-literals.rs:8:6 - | -LL | b"a - | ______^ -LL | | } - | |__^ - -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors -For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/issues/issue-62913.rs b/tests/ui/parser/issues/issue-62913.rs index a55ef5ac71030..c77ef61a97b10 100644 --- a/tests/ui/parser/issues/issue-62913.rs +++ b/tests/ui/parser/issues/issue-62913.rs @@ -1,4 +1,5 @@ -"\u\\" -//~^ ERROR incorrect unicode escape sequence -//~| ERROR invalid trailing slash in literal -//~| ERROR expected item, found `"\u\"` +fn main() { + _ = "\u\\"; + //~^ ERROR incorrect unicode escape sequence + //~| ERROR invalid trailing slash in literal +} diff --git a/tests/ui/parser/issues/issue-62913.stderr b/tests/ui/parser/issues/issue-62913.stderr index c33e46837287f..bee6dd4580037 100644 --- a/tests/ui/parser/issues/issue-62913.stderr +++ b/tests/ui/parser/issues/issue-62913.stderr @@ -1,24 +1,16 @@ error: incorrect unicode escape sequence - --> $DIR/issue-62913.rs:1:2 + --> $DIR/issue-62913.rs:2:10 | -LL | "\u\" - | ^^^ incorrect unicode escape sequence +LL | _ = "\u\"; + | ^^^ incorrect unicode escape sequence | = help: format of unicode escape sequences is `\u{...}` error: invalid trailing slash in literal - --> $DIR/issue-62913.rs:1:5 + --> $DIR/issue-62913.rs:2:13 | -LL | "\u\" - | ^ invalid trailing slash in literal +LL | _ = "\u\"; + | ^ invalid trailing slash in literal -error: expected item, found `"\u\"` - --> $DIR/issue-62913.rs:1:1 - | -LL | "\u\" - | ^^^^^^ expected item - | - = note: for a full list of items that can appear in modules, see - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs b/tests/ui/parser/macro/literals-are-validated-before-expansion.rs deleted file mode 100644 index c3fc754b5567f..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs +++ /dev/null @@ -1,10 +0,0 @@ -macro_rules! black_hole { - ($($tt:tt)*) => {} -} - -fn main() { - black_hole! { '\u{FFFFFF}' } - //~^ ERROR: invalid unicode character escape - black_hole! { "this is surrogate: \u{DAAA}" } - //~^ ERROR: invalid unicode character escape -} diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr b/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr deleted file mode 100644 index e874f62497ea8..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr +++ /dev/null @@ -1,18 +0,0 @@ -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:6:20 - | -LL | black_hole! { '\u{FFFFFF}' } - | ^^^^^^^^^^ invalid escape - | - = help: unicode escape must be at most 10FFFF - -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:8:39 - | -LL | black_hole! { "this is surrogate: \u{DAAA}" } - | ^^^^^^^^ invalid escape - | - = help: unicode escape must not be a surrogate - -error: aborting due to 2 previous errors - diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.rs b/tests/ui/parser/raw/raw-byte-string-literals-2.rs new file mode 100644 index 0000000000000..8ffda513dbf6f --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation +} diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.stderr b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr new file mode 100644 index 0000000000000..b4151eeef7017 --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr @@ -0,0 +1,8 @@ +error: found invalid character; only `#` is allowed in raw string delimitation: ~ + --> $DIR/raw-byte-string-literals-2.rs:2:5 + | +LL | br##~"a"~##; + | ^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/parser/raw/raw-byte-string-literals.rs b/tests/ui/parser/raw/raw-byte-string-literals.rs index 1b859fee596ad..3f91c381a9039 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.rs +++ b/tests/ui/parser/raw/raw-byte-string-literals.rs @@ -3,5 +3,4 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string br"é"; //~ ERROR non-ASCII character in raw byte string literal - br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70ae..2a4073243cbca 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,5 @@ error: non-ASCII character in raw byte string literal LL | br"é"; | ^ must be ASCII -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:5 - | -LL | br##~"a"~##; - | ^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fc071a9419142..806e222507f6e 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,87 +1,3 @@ -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{202e}' - | -help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes - | -LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2069}' - | -help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{202e}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2069}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - error: unicode codepoint changing visible direction of text present in comment --> $DIR/unicode-control-codepoints.rs:2:5 | @@ -188,5 +104,89 @@ LL | | * ''); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + error: aborting due to 17 previous errors