diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 851ab46345fd1..2590fbfb52062 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -240,7 +240,7 @@ impl AttrArgsEq { match self { AttrArgsEq::Ast(expr) => match expr.kind { ExprKind::Lit(token_lit) => { - LitKind::from_token_lit(token_lit).ok().and_then(|lit| lit.str()) + LitKind::from_token_lit(token_lit).0.ok().and_then(|lit| lit.str()) } _ => None, }, @@ -426,6 +426,7 @@ impl MetaItemKind { ExprKind::Lit(token_lit) => { // Turn failures to `None`, we'll get parse errors elsewhere. MetaItemLit::from_token_lit(token_lit, expr.span) + .0 .ok() .map(|lit| MetaItemKind::NameValue(lit)) } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index c4c85570484cf..262c1bb81d06e 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,11 +3,11 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, - Mode, + byte_from_char, unescape_c_string, unescape_literal, CStrUnit, EscapeError, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -33,6 +33,14 @@ pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { #[derive(Debug)] pub enum LitError { LexerError, + EscapeError { + mode: Mode, + // Length before the string content, e.g. 1 for "a", 5 for br##"a"## + prefix_len: u32, + // The range is the byte range of the bad character, using a zero index. + range: Range, + err: EscapeError, + }, InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, @@ -41,155 +49,227 @@ pub enum LitError { } impl LitKind { - /// Converts literal token into a semantic literal. - pub fn from_token_lit(lit: token::Lit) -> Result { + /// Converts literal token into a semantic literal. The return value has + /// two parts: + /// - The `Result` indicates success or failure. + /// - The `Vec` contains all found errors and warnings. + /// + /// If we only had to deal with errors, we could use the more obvious + /// `Result>`; on failure the caller would just + /// print errors and take the error path and stop early. But it's possible + /// to succeed without errors but with one or more warnings, and in that + /// case the caller should print the warnings, but also proceed with a + /// valid `LitKind`. This return type facilitates that. + pub fn from_token_lit(lit: token::Lit) -> (Result, Vec) { let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); + // Note: we return a single error here. We could instead continue + // processing, possibly returning multiple errors. + return (Err(()), vec![LitError::InvalidSuffix]); } - Ok(match kind { + let mut errs = vec![]; + let mut has_fatal = false; + + let res = match kind { token::Bool => { assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) + Ok(LitKind::Bool(symbol == kw::True)) } token::Byte => { - return unescape_byte(symbol.as_str()) - .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + let mode = Mode::Byte; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { mode, prefix_len: 2, range, err }); + } + } + }); + if !has_fatal { Ok(LitKind::Byte(byte_from_char(res.unwrap()))) } else { Err(()) } } token::Char => { - return unescape_char(symbol.as_str()) - .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + let mode = Mode::Char; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { mode, prefix_len: 1, range, err }); + } + } + }); + if !has_fatal { Ok(LitKind::Char(res.unwrap())) } else { Err(()) } } // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), + token::Integer => { + return match integer_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } + token::Float => { + return match float_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. + let mode = Mode::Str; let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be // hot in programs with many long strings. unescape_literal( s, - Mode::Str, + mode, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |range, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, + range, + err, + }); } }, ); - error?; - Symbol::intern(&buf) + if !has_fatal { + Ok(LitKind::Str(Symbol::intern(&buf), ast::StrStyle::Cooked)) + } else { + Err(()) + } } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) + Ok(LitKind::Str(symbol, ast::StrStyle::Cooked)) + } } token::StrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + let mode = Mode::RawStr; + let s = symbol.as_str(); + unescape_literal(s, mode, &mut |range, unescaped_char| match unescaped_char { + Ok(_) => {} + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2 + n as u32, + range, + err, + }); } }); - error?; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) + if !has_fatal { Ok(LitKind::Str(symbol, ast::StrStyle::Raw(n))) } else { Err(()) } } token::ByteStr => { + let mode = Mode::ByteStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { mode, prefix_len: 2, range, err }); } }); - error?; - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + Ok(LitKind::ByteStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::ByteStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can convert the symbol directly to a `Lrc` on success. + let mode = Mode::RawByteStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, + range, + err, + }); } }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + if !has_fatal { + Ok(LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))) + } else { + Err(()) + } } token::CStr => { + let mode = Mode::CStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { mode, prefix_len: 2, range, err }); } }); - error?; - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::CStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. + // can convert the symbol directly to a `Lrc` (after appending a nul char) on + // success. + let mode = Mode::RawCStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, + range, + err, + }); } }); - error?; - let mut buf = s.to_owned().into_bytes(); - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + if !has_fatal { + let mut buf = s.to_owned().into_bytes(); + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Raw(n))) + } else { + Err(()) + } } - token::Err => LitKind::Err, - }) + token::Err => Ok(LitKind::Err), + }; + (res, errs) } } @@ -257,14 +337,20 @@ impl fmt::Display for LitKind { } impl MetaItemLit { - /// Converts a token literal into a meta item literal. - pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { - symbol: token_lit.symbol, - suffix: token_lit.suffix, - kind: LitKind::from_token_lit(token_lit)?, - span, - }) + /// Converts a token literal into a meta item literal. See + /// `LitKind::from_token_lit` for an explanation of the return type. + pub fn from_token_lit( + token_lit: token::Lit, + span: Span, + ) -> (Result, Vec) { + let (res, errs) = LitKind::from_token_lit(token_lit); + let res = match res { + Ok(kind) => { + Ok(MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, kind, span }) + } + Err(()) => Err(()), + }; + (res, errs) } /// Cheaply converts a meta item literal into a token literal. @@ -290,7 +376,7 @@ impl MetaItemLit { /// Converts an arbitrary token into meta item literal. pub fn from_token(token: &Token) -> Option { token::Lit::from_token(token) - .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok()) + .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).0.ok()) } } diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 635bc945cb1e9..f9587f66cc1cf 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => { - let lit_kind = match LitKind::from_token_lit(*token_lit) { + let (result, errs) = LitKind::from_token_lit(*token_lit); + let lit_kind = match result { Ok(lit_kind) => lit_kind, - Err(err) => { - report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span); - LitKind::Err - } + Err(()) => LitKind::Err, }; + report_lit_errors(&self.tcx.sess.parse_sess, errs, *token_lit, e.span); let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind)); hir::ExprKind::Lit(lit) } diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 6a82005c44842..1e7b7cc150935 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -127,11 +127,11 @@ fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> { && let ExprKind::Lit(lit) = arg.kind { if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind - && let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit) + && let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit).0 { literal = Some(s); } else if let token::LitKind::Integer = lit.kind - && let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit) + && let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit).0 { literal = Some(Symbol::intern(&n.to_string())); } diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index d9663d50c595c..d38bd9dd1ff84 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -948,7 +948,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> { // In valid code the value always ends up as a single literal. Otherwise, a dummy // literal suffices because the error is handled elsewhere. let lit = if let ExprKind::Lit(token_lit) = expr.kind - && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span) + && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span).0 { lit } else { diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6c83e8868bd31..21be6e0fdc19d 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::symbol::Symbol; use crate::errors; @@ -19,48 +19,49 @@ pub fn expand_concat( let mut has_errors = false; for e in es { match e.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { - accumulator.push_str(s.as_str()); - } - Ok(ast::LitKind::Char(c)) => { - accumulator.push(c); - } - Ok(ast::LitKind::Int(i, _)) => { - accumulator.push_str(&i.to_string()); - } - Ok(ast::LitKind::Bool(b)) => { - accumulator.push_str(&b.to_string()); - } - Ok(ast::LitKind::CStr(..)) => { - cx.emit_err(errors::ConcatCStrLit { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { - cx.emit_err(errors::ConcatBytestr { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Err) => { - has_errors = true; - } - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); - has_errors = true; + ast::ExprKind::Lit(token_lit) => { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { + Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { + accumulator.push_str(s.as_str()); + } + Ok(ast::LitKind::Char(c)) => { + accumulator.push(c); + } + Ok(ast::LitKind::Int(i, _)) => { + accumulator.push_str(&i.to_string()); + } + Ok(ast::LitKind::Bool(b)) => { + accumulator.push_str(&b.to_string()); + } + Ok(ast::LitKind::CStr(..)) => { + cx.emit_err(errors::ConcatCStrLit { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { + cx.emit_err(errors::ConcatBytestr { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Err) | Err(()) => { + has_errors = true; + } } - }, + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span); + } // We also want to allow negative numeric literals. ast::ExprKind::Unary(ast::UnOp::Neg, ref expr) if let ast::ExprKind::Lit(token_lit) = expr.kind => { - match ast::LitKind::from_token_lit(token_lit) { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")), Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); + Err(()) => { has_errors = true; } _ => missing_literal.push(e.span), } + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span); } ast::ExprKind::IncludedBytes(..) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 4ae328160f0c8..085207a49a5f5 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::report_lit_errors; use rustc_span::Span; use crate::errors; @@ -17,7 +17,8 @@ fn invalid_type_err( ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob, }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); - match ast::LitKind::from_token_lit(token_lit) { + let (res, errs) = ast::LitKind::from_token_lit(token_lit); + match res { Ok(ast::LitKind::CStr(_, _)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. @@ -60,10 +61,9 @@ fn invalid_type_err( cx.emit_err(ConcatBytesNonU8 { span }); } Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, span); - } + Err(()) => {} } + report_lit_errors(&cx.sess.parse_sess, errs, token_lit, span); } fn handle_array_element( @@ -80,7 +80,7 @@ fn handle_array_element( *has_errors = true; None } - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { + ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 { Ok(ast::LitKind::Int( val, ast::LitIntType::Unsuffixed | ast::LitIntType::Unsigned(ast::UintTy::U8), @@ -141,7 +141,7 @@ pub fn expand_concat_bytes( ast::ExprKind::Repeat(expr, count) => { if let ast::ExprKind::Lit(token_lit) = count.value.kind && let Ok(ast::LitKind::Int(count_val, _)) = - ast::LitKind::from_token_lit(token_lit) + ast::LitKind::from_token_lit(token_lit).0 { if let Some(elem) = handle_array_element(cx, &mut has_errors, &mut missing_literals, expr) @@ -154,7 +154,7 @@ pub fn expand_concat_bytes( cx.emit_err(errors::ConcatBytesBadRepeat { span: count.value.span }); } } - &ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { + &ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 { Ok(ast::LitKind::Byte(val)) => { accumulator.push(val); } diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index c9bbea47185b7..0371aff80dade 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1235,26 +1235,28 @@ pub fn expr_to_spanned_string<'a>( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); Err(match expr.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(..)) => { - let mut err = cx.struct_span_err(expr.span, err_msg); - let span = expr.span.shrink_to_lo(); - err.span_suggestion( - span.with_hi(span.lo() + BytePos(1)), - "consider removing the leading `b`", - "", - Applicability::MaybeIncorrect, - ); - Some((err, true)) - } - Ok(ast::LitKind::Err) => None, - Err(err) => { - parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); - None - } - _ => Some((cx.struct_span_err(expr.span, err_msg), false)), - }, + ast::ExprKind::Lit(token_lit) => { + let (lit_kind, errs) = ast::LitKind::from_token_lit(token_lit); + let res = match lit_kind { + Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), + Ok(ast::LitKind::ByteStr(..)) => { + let mut err = cx.struct_span_err(expr.span, err_msg); + let span = expr.span.shrink_to_lo(); + err.span_suggestion( + span.with_hi(span.lo() + BytePos(1)), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + Ok(ast::LitKind::Err) => None, + Err(()) => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }; + parser::report_lit_errors(&cx.sess.parse_sess, errs, token_lit, expr.span); + res + } ast::ExprKind::Err => None, _ => Some((cx.struct_span_err(expr.span, err_msg), false)), }) diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs index 7cb279a981203..4f985dd9d660b 100644 --- a/compiler/rustc_expand/src/mbe/metavar_expr.rs +++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs @@ -119,7 +119,7 @@ fn parse_depth<'sess>( .span_diagnostic .struct_span_err(span, "meta-variable expression depth must be a literal")); }; - if let Ok(lit_kind) = LitKind::from_token_lit(*lit) + if let Ok(lit_kind) = LitKind::from_token_lit(*lit).0 && let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind && let Ok(n_usize) = usize::try_from(n_u128) { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 7c8065f3cb9b9..4941531e3ce44 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -347,7 +347,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { + let res: Result = match c { '\\' => { match chars.clone().next() { Some('\n') => { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 92df2da8710da..26f6447406c22 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -8,7 +6,6 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ @@ -21,10 +18,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span}; mod diagnostics; mod tokentrees; -mod unescape_error_reporting; +pub(crate) mod unescape_error_reporting; mod unicode_chars; -use unescape_error_reporting::{emit_unescape_error, escaped_char}; +use unescape_error_reporting::escaped_char; // This type is used a lot. Make sure it doesn't unintentionally get bigger. // @@ -409,7 +406,7 @@ impl<'a> StringReader<'a> { error_code!(E0762), ) } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_quoted(token::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -419,7 +416,7 @@ impl<'a> StringReader<'a> { error_code!(E0763), ) } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_quoted(token::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -429,7 +426,7 @@ impl<'a> StringReader<'a> { error_code!(E0765), ) } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_quoted(token::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -439,7 +436,7 @@ impl<'a> StringReader<'a> { error_code!(E0766), ) } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_quoted(token::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -449,13 +446,13 @@ impl<'a> StringReader<'a> { error_code!(E0767), ) } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_quoted(token::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_quoted(kind, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -464,7 +461,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -473,7 +470,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -693,82 +690,18 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_common( + fn cook_quoted( &self, kind: token::LitKind, - mode: Mode, start: BytePos, end: BytePos, prefix_len: u32, postfix_len: u32, - unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { - let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape(lit_content, mode, &mut |range, result| { - // Here we only check for errors. The actual unescaping is done later. - if let Err(err) = result { - let span_with_quotes = self.mk_sp(start, end); - let (start, end) = (range.start as u32, range.end as u32); - let lo = content_start + BytePos(start); - let hi = lo + BytePos(end - start); - let span = self.mk_sp(lo, hi); - if err.is_fatal() { - has_fatal_err = true; - } - emit_unescape_error( - &self.sess.span_diagnostic, - lit_content, - span_with_quotes, - span, - mode, - range, - err, - ); - } - }); - - // We normally exclude the quotes for the symbol, but for errors we - // include it because it results in clearer error messages. - if !has_fatal_err { - (kind, Symbol::intern(lit_content)) - } else { - (token::Err, self.symbol_from_to(start, end)) - } - } - - fn cook_quoted( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) - } - - fn cook_c_string( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) + (kind, Symbol::intern(lit_content)) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 65a46ec6c476b..ea8c588cc03cc 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -11,26 +11,32 @@ use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, pub(crate) fn emit_unescape_error( handler: &Handler, - // interior part of the literal, without quotes + // interior part of the literal, between quotes lit: &str, // full span of the literal, including quotes and any prefix full_lit_span: Span, - // span of the error part of the literal - err_span: Span, mode: Mode, + prefix_len: u32, // range of the error inside `lit` range: Range, error: EscapeError, ) { + let (start, end) = (range.start as u32, range.end as u32); + let lo = full_lit_span.lo() + BytePos(prefix_len) + BytePos(start); + let hi = lo + BytePos(end - start); + let err_span = full_lit_span.with_lo(lo).with_hi(hi); + debug!( - "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", - lit, full_lit_span, mode, range, error + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}", + lit, full_lit_span, err_span, mode, range, error ); + let last_char = || { let c = lit[range.clone()].chars().next_back().unwrap(); let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; + match error { EscapeError::LoneSurrogateUnicodeEscape => { handler diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index e8e2cec62cc09..40e932ae4f57a 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use super::{ }; use crate::errors; +use crate::lexer::unescape_error_reporting::emit_unescape_error; use crate::maybe_recover_from_interpolated_ty_qpath; use ast::mut_visit::{noop_visit_expr, MutVisitor}; use ast::{CoroutineKind, GenBlockKind, Pat, Path, PathSegment}; @@ -2048,26 +2049,31 @@ impl<'a> Parser<'a> { let token = recovered.as_ref().unwrap_or(&self.token); match token::Lit::from_token(token) { Some(lit) => { - match MetaItemLit::from_token_lit(lit, token.span) { + let (res, errs) = MetaItemLit::from_token_lit(lit, token.span); + let (res, span) = match res { Ok(lit) => { + let span = token.uninterpolated_span(); self.bump(); - Some(lit) + (lit, span) } - Err(err) => { + Err(()) => { let span = token.uninterpolated_span(); self.bump(); - report_lit_error(self.sess, err, lit, span); // Pack possible quotes and prefixes from the original literal into // the error literal's symbol so they can be pretty-printed faithfully. let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); let symbol = Symbol::intern(&suffixless_lit.to_string()); let lit = token::Lit::new(token::Err, symbol, lit.suffix); - Some( + ( MetaItemLit::from_token_lit(lit, span) + .0 .unwrap_or_else(|_| unreachable!()), + span, ) } - } + }; + report_lit_errors(self.sess, errs, lit, span); + Some(res) } None => None, } @@ -3659,7 +3665,13 @@ impl<'a> Parser<'a> { } } -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { +pub fn report_lit_errors(sess: &ParseSess, errs: Vec, lit: token::Lit, span: Span) { + for err in errs { + report_lit_error(sess, err, lit, span); + } +} + +fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) @@ -3692,6 +3704,17 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: // `LexerError` is an error, but it was already reported // by lexer, so here we don't report it the second time. LitError::LexerError => {} + LitError::EscapeError { mode, prefix_len, range, err } => { + emit_unescape_error( + &sess.span_diagnostic, + symbol.as_str(), + span, + mode, + prefix_len, + range, + err, + ); + } LitError::InvalidSuffix => { if let Some(suffix) = suffix { sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index d1fdebd90c1d5..792aaabaabd7e 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,7 +11,7 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; -pub use crate::parser::expr::report_lit_error; +pub use crate::parser::expr::report_lit_errors; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index cbe75b3dab6ee..94246445a050c 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -1,11 +1,11 @@ //! Meta-syntax validation logic of attributes for post-expansion. -use crate::{errors, parse_in}; +use crate::{errors, parse_in, parser}; use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; -use rustc_ast::MetaItemKind; use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem}; +use rustc_ast::{LitKind, MetaItemKind, MetaItemLit}; use rustc_ast_pretty::pprust; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; @@ -51,27 +51,46 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta MetaItemKind::List(nmis) } AttrArgs::Eq(_, AttrArgsEq::Ast(expr)) => { - if let ast::ExprKind::Lit(token_lit) = expr.kind - && let Ok(lit) = ast::MetaItemLit::from_token_lit(token_lit, expr.span) - { - if token_lit.suffix.is_some() { - let mut err = sess.span_diagnostic.struct_span_err( - expr.span, - "suffixed literals are not allowed in attributes", - ); - err.help( - "instead of using a suffixed literal (`1u8`, `1.0f32`, etc.), \ - use an unsuffixed version (`1`, `1.0`, etc.)", - ); - return Err(err); - } else { - MetaItemKind::NameValue(lit) - } + if let ast::ExprKind::Lit(token_lit) = expr.kind { + // njn: pull this change out into a precursor? + // njn: also change to "unexpected non-literal expression", + // or add a "expected a literal expression" + let (res, errs) = ast::MetaItemLit::from_token_lit(token_lit, expr.span); + let res = match res { + Ok(lit) => { + if token_lit.suffix.is_some() { + let mut err = sess.span_diagnostic.struct_span_err( + expr.span, + "suffixed literals are not allowed in attributes", + ); + err.help( + "instead of using a suffixed literal (`1u8`, `1.0f32`, etc.), \ + use an unsuffixed version (`1`, `1.0`, etc.)", + ); + return Err(err); + } else { + MetaItemKind::NameValue(lit) + } + } + Err(()) => { + let lit = MetaItemLit { + symbol: token_lit.symbol, + suffix: token_lit.suffix, + kind: LitKind::Err, + span: expr.span, + }; + MetaItemKind::NameValue(lit) + } + }; + parser::report_lit_errors(sess, errs, token_lit, expr.span); + res } else { - // The non-error case can happen with e.g. `#[foo = 1+1]`. The error case can - // happen with e.g. `#[foo = include_str!("nonexistent-file.rs")]`; in that - // case we delay the error because an earlier error will have already been - // reported. + // Example cases: + // - `#[foo = 1+1]`: results in `ast::ExprKind::BinOp`. + // - `#[foo = include_str!("nonexistent-file.rs")]`: + // results in `ast::ExprKind::Err`. In that case we delay + // the error because an earlier error will have already + // been reported. let msg = format!("unexpected expression: `{}`", pprust::expr_to_string(expr)); let mut err = sess.span_diagnostic.struct_span_err(expr.span, msg); if let ast::ExprKind::Err = expr.kind { diff --git a/src/tools/clippy/clippy_lints/src/almost_complete_range.rs b/src/tools/clippy/clippy_lints/src/almost_complete_range.rs index 57a5cd8fba818..98e3a5c5b1e21 100644 --- a/src/tools/clippy/clippy_lints/src/almost_complete_range.rs +++ b/src/tools/clippy/clippy_lints/src/almost_complete_range.rs @@ -76,8 +76,8 @@ fn check_range(cx: &EarlyContext<'_>, span: Span, start: &Expr, end: &Expr, sugg && let ExprKind::Lit(end_token_lit) = end.peel_parens().kind && matches!( ( - LitKind::from_token_lit(start_token_lit), - LitKind::from_token_lit(end_token_lit), + LitKind::from_token_lit(start_token_lit).0, + LitKind::from_token_lit(end_token_lit).0, ), ( Ok(LitKind::Byte(b'a') | LitKind::Char('a')), diff --git a/src/tools/clippy/clippy_lints/src/int_plus_one.rs b/src/tools/clippy/clippy_lints/src/int_plus_one.rs index b8e0eef7c7e9e..ad9ea6c9adac4 100644 --- a/src/tools/clippy/clippy_lints/src/int_plus_one.rs +++ b/src/tools/clippy/clippy_lints/src/int_plus_one.rs @@ -54,7 +54,7 @@ enum Side { impl IntPlusOne { #[expect(clippy::cast_sign_loss)] fn check_lit(token_lit: token::Lit, target_value: i128) -> bool { - if let Ok(LitKind::Int(value, ..)) = LitKind::from_token_lit(token_lit) { + if let Ok(LitKind::Int(value, ..)) = LitKind::from_token_lit(token_lit).0 { return value == (target_value as u128); } false diff --git a/src/tools/clippy/clippy_lints/src/literal_representation.rs b/src/tools/clippy/clippy_lints/src/literal_representation.rs index f33151cf4c591..417db31896dd7 100644 --- a/src/tools/clippy/clippy_lints/src/literal_representation.rs +++ b/src/tools/clippy/clippy_lints/src/literal_representation.rs @@ -255,7 +255,7 @@ impl LiteralDigitGrouping { fn check_lit(self, cx: &EarlyContext<'_>, lit: token::Lit, span: Span) { if let Some(src) = snippet_opt(cx, span) - && let Ok(lit_kind) = LitKind::from_token_lit(lit) + && let Ok(lit_kind) = LitKind::from_token_lit(lit).0 && let Some(mut num_lit) = NumericLiteral::from_lit_kind(&src, &lit_kind) { if !Self::check_for_mistyped_suffix(cx, span, &mut num_lit) { @@ -469,7 +469,7 @@ impl DecimalLiteralRepresentation { } fn check_lit(self, cx: &EarlyContext<'_>, lit: token::Lit, span: Span) { // Lint integral literals. - if let Ok(lit_kind) = LitKind::from_token_lit(lit) + if let Ok(lit_kind) = LitKind::from_token_lit(lit).0 && let LitKind::Int(val, _) = lit_kind && let Some(src) = snippet_opt(cx, span) && let Some(num_lit) = NumericLiteral::from_lit_kind(&src, &lit_kind) diff --git a/src/tools/clippy/clippy_lints/src/misc_early/mod.rs b/src/tools/clippy/clippy_lints/src/misc_early/mod.rs index abe5b00e888a4..66e6ff7c01db2 100644 --- a/src/tools/clippy/clippy_lints/src/misc_early/mod.rs +++ b/src/tools/clippy/clippy_lints/src/misc_early/mod.rs @@ -431,7 +431,7 @@ impl MiscEarlyLints { _ => return, }; - let lit_kind = LitKind::from_token_lit(lit); + let lit_kind = LitKind::from_token_lit(lit).0; if let Ok(LitKind::Int(value, lit_int_type)) = lit_kind { let suffix = match lit_int_type { LitIntType::Signed(ty) => ty.name_str(), diff --git a/tests/rustdoc-ui/ignore-block-help.rs b/tests/rustdoc-ui/ignore-block-help.rs index 86f6a2868fb56..fb27d954f9a5a 100644 --- a/tests/rustdoc-ui/ignore-block-help.rs +++ b/tests/rustdoc-ui/ignore-block-help.rs @@ -1,10 +1,10 @@ // check-pass /// ```ignore (to-prevent-tidy-error) -/// let heart = '❤️'; +/// let unterminated = ' /// ``` //~^^^ WARNING could not parse code block //~| NOTE on by default -//~| NOTE character literal may only contain one codepoint +//~| NOTE unterminated character literal //~| HELP `ignore` code blocks require valid Rust code pub struct X; diff --git a/tests/rustdoc-ui/ignore-block-help.stderr b/tests/rustdoc-ui/ignore-block-help.stderr index a30ea51dd8a7f..f5ed287a99834 100644 --- a/tests/rustdoc-ui/ignore-block-help.stderr +++ b/tests/rustdoc-ui/ignore-block-help.stderr @@ -3,7 +3,7 @@ warning: could not parse code block as Rust code | LL | /// ```ignore (to-prevent-tidy-error) | _____^ -LL | | /// let heart = '❤️'; +LL | | /// let unterminated = ' LL | | /// ``` | |_______^ | @@ -12,7 +12,7 @@ help: `ignore` code blocks require valid Rust code for syntax highlighting; mark | LL | /// ```ignore (to-prevent-tidy-error) | ^^^ - = note: error from rustc: character literal may only contain one codepoint + = note: error from rustc: unterminated character literal = note: `#[warn(rustdoc::invalid_rust_codeblocks)]` on by default warning: 1 warning emitted diff --git a/tests/ui/fmt/format-string-error-2.stderr b/tests/ui/fmt/format-string-error-2.stderr index dfd24bf60ad52..50ead59e4e911 100644 --- a/tests/ui/fmt/format-string-error-2.stderr +++ b/tests/ui/fmt/format-string-error-2.stderr @@ -1,9 +1,3 @@ -error: incorrect unicode escape sequence - --> $DIR/format-string-error-2.rs:77:20 - | -LL | println!("\x7B}\u8 {", 1); - | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` - error: invalid format string: expected `'}'`, found `'a'` --> $DIR/format-string-error-2.rs:5:5 | @@ -155,6 +149,12 @@ LL | println!("\x7B}\u{8} {", 1); | = note: if you intended to print `{`, you can escape it using `{{` +error: incorrect unicode escape sequence + --> $DIR/format-string-error-2.rs:77:20 + | +LL | println!("\x7B}\u8 {", 1); + | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` + error: invalid format string: unmatched `}` found --> $DIR/format-string-error-2.rs:81:21 | diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs index c675df2f3ccd0..55484a610141b 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.rs +++ b/tests/ui/lexer/lex-bad-char-literals-7.rs @@ -7,7 +7,4 @@ fn main() { // Next two are OK, but may befool error recovery let _ = '/'; let _ = b'/'; - - let _ = ' hello // here's a comment - //~^ ERROR: unterminated character literal } diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr index 255b9c6899999..16ba7676932fd 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.stderr +++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr @@ -10,12 +10,5 @@ error: empty unicode escape LL | let _: char = '\u{}'; | ^^^^ this escape must have at least 1 hex digit -error[E0762]: unterminated character literal - --> $DIR/lex-bad-char-literals-7.rs:11:13 - | -LL | let _ = ' hello // here's a comment - | ^^^^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-char-literals-8.rs b/tests/ui/lexer/lex-bad-char-literals-8.rs new file mode 100644 index 0000000000000..6c8cbd3a82a85 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.rs @@ -0,0 +1,4 @@ +fn main() { + let _ = ' hello // here's a comment + //~^ ERROR: unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-char-literals-8.stderr b/tests/ui/lexer/lex-bad-char-literals-8.stderr new file mode 100644 index 0000000000000..04c95df0d0601 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.stderr @@ -0,0 +1,9 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-char-literals-8.rs:2:13 + | +LL | let _ = ' hello // here's a comment + | ^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/parser/bad-lit-suffixes.rs b/tests/ui/parser/bad-lit-suffixes.rs index 8cb9ef7e0c921..c614f49388574 100644 --- a/tests/ui/parser/bad-lit-suffixes.rs +++ b/tests/ui/parser/bad-lit-suffixes.rs @@ -28,11 +28,12 @@ fn main() { } #[rustc_dummy = "string"suffix] -//~^ ERROR unexpected expression: `"string"suffix` +//~^ ERROR suffixes on string literals are invalid fn f() {} #[must_use = "string"suffix] -//~^ ERROR unexpected expression: `"string"suffix` +//~^ ERROR suffixes on string literals are invalid +//~| ERROR malformed `must_use` attribute input fn g() {} #[link(name = "string"suffix)] diff --git a/tests/ui/parser/bad-lit-suffixes.stderr b/tests/ui/parser/bad-lit-suffixes.stderr index 756f99ab12c82..b5dacdf7d0d3d 100644 --- a/tests/ui/parser/bad-lit-suffixes.stderr +++ b/tests/ui/parser/bad-lit-suffixes.stderr @@ -10,26 +10,39 @@ error: suffixes on string literals are invalid LL | "C"suffix | ^^^^^^^^^ invalid suffix `suffix` -error: unexpected expression: `"string"suffix` +error: suffixes on string literals are invalid --> $DIR/bad-lit-suffixes.rs:30:17 | LL | #[rustc_dummy = "string"suffix] - | ^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^ invalid suffix `suffix` -error: unexpected expression: `"string"suffix` +error: suffixes on string literals are invalid --> $DIR/bad-lit-suffixes.rs:34:14 | LL | #[must_use = "string"suffix] - | ^^^^^^^^^^^^^^ + | ^^^^^^^^^^^^^^ invalid suffix `suffix` + +error: malformed `must_use` attribute input + --> $DIR/bad-lit-suffixes.rs:34:1 + | +LL | #[must_use = "string"suffix] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: the following are the possible correct uses + | +LL | #[must_use = "reason"] + | +LL | #[must_use] + | error: suffixes on string literals are invalid - --> $DIR/bad-lit-suffixes.rs:38:15 + --> $DIR/bad-lit-suffixes.rs:39:15 | LL | #[link(name = "string"suffix)] | ^^^^^^^^^^^^^^ invalid suffix `suffix` error: invalid suffix `suffix` for number literal - --> $DIR/bad-lit-suffixes.rs:42:41 + --> $DIR/bad-lit-suffixes.rs:43:41 | LL | #[rustc_layout_scalar_valid_range_start(0suffix)] | ^^^^^^^ invalid suffix `suffix` @@ -136,5 +149,5 @@ LL | 1.0e10suffix; | = help: valid suffixes are `f32` and `f64` -error: aborting due to 20 previous errors +error: aborting due to 21 previous errors diff --git a/tests/ui/parser/byte-literals-2.rs b/tests/ui/parser/byte-literals-2.rs new file mode 100644 index 0000000000000..fb9e2ac69944a --- /dev/null +++ b/tests/ui/parser/byte-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b'a //~ ERROR unterminated byte constant [E0763] +} diff --git a/tests/ui/parser/byte-literals-2.stderr b/tests/ui/parser/byte-literals-2.stderr new file mode 100644 index 0000000000000..f0e042ad605db --- /dev/null +++ b/tests/ui/parser/byte-literals-2.stderr @@ -0,0 +1,9 @@ +error[E0763]: unterminated byte constant + --> $DIR/byte-literals-2.rs:2:6 + | +LL | b'a + | ^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-literals.rs b/tests/ui/parser/byte-literals.rs index 896dc1a1a5fba..963a0bb608d84 100644 --- a/tests/ui/parser/byte-literals.rs +++ b/tests/ui/parser/byte-literals.rs @@ -8,5 +8,4 @@ pub fn main() { b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped b'é'; //~ ERROR non-ASCII character in byte literal - b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 5b414c8927e2c..97805e01db49f 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -43,12 +43,5 @@ help: if you meant to use the unicode code point for 'é', use a \xHH escape LL | b'\xE9'; | ~~~~ -error[E0763]: unterminated byte constant - --> $DIR/byte-literals.rs:11:6 - | -LL | b'a - | ^^^^ - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors -For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-string-literals-2.rs b/tests/ui/parser/byte-string-literals-2.rs new file mode 100644 index 0000000000000..7eb52b854e358 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b"a //~ ERROR unterminated double quote byte string +} diff --git a/tests/ui/parser/byte-string-literals-2.stderr b/tests/ui/parser/byte-string-literals-2.stderr new file mode 100644 index 0000000000000..6fdb3c64ba783 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.stderr @@ -0,0 +1,11 @@ +error[E0766]: unterminated double quote byte string + --> $DIR/byte-string-literals-2.rs:2:6 + | +LL | b"a + | ______^ +LL | | } + | |__^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/byte-string-literals.rs b/tests/ui/parser/byte-string-literals.rs index 30a4f50c4e40b..c14488dcb6689 100644 --- a/tests/ui/parser/byte-string-literals.rs +++ b/tests/ui/parser/byte-string-literals.rs @@ -5,5 +5,4 @@ pub fn main() { b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` b"é"; //~ ERROR non-ASCII character in byte string literal br##"é"##; //~ ERROR non-ASCII character in raw byte string literal - b"a //~ ERROR unterminated double quote byte string } diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 655b6998e85ff..2a2830c346825 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -37,14 +37,5 @@ error: non-ASCII character in raw byte string literal LL | br##"é"##; | ^ must be ASCII -error[E0766]: unterminated double quote byte string - --> $DIR/byte-string-literals.rs:8:6 - | -LL | b"a - | ______^ -LL | | } - | |__^ - -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors -For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/issues/issue-104620.rs b/tests/ui/parser/issues/issue-104620.rs index f49476c44084c..fd0916b44110a 100644 --- a/tests/ui/parser/issues/issue-104620.rs +++ b/tests/ui/parser/issues/issue-104620.rs @@ -1,4 +1,4 @@ #![feature(rustc_attrs)] -#![rustc_dummy=5z] //~ ERROR unexpected expression: `5z` +#![rustc_dummy=5z] //~ ERROR invalid suffix `z` for number literal fn main() {} diff --git a/tests/ui/parser/issues/issue-104620.stderr b/tests/ui/parser/issues/issue-104620.stderr index fa20b5f8b1625..040c63a5fbfbf 100644 --- a/tests/ui/parser/issues/issue-104620.stderr +++ b/tests/ui/parser/issues/issue-104620.stderr @@ -1,8 +1,10 @@ -error: unexpected expression: `5z` +error: invalid suffix `z` for number literal --> $DIR/issue-104620.rs:3:16 | LL | #![rustc_dummy=5z] - | ^^ + | ^^ invalid suffix `z` + | + = help: the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) error: aborting due to 1 previous error diff --git a/tests/ui/parser/issues/issue-62913.rs b/tests/ui/parser/issues/issue-62913.rs index a55ef5ac71030..c77ef61a97b10 100644 --- a/tests/ui/parser/issues/issue-62913.rs +++ b/tests/ui/parser/issues/issue-62913.rs @@ -1,4 +1,5 @@ -"\u\\" -//~^ ERROR incorrect unicode escape sequence -//~| ERROR invalid trailing slash in literal -//~| ERROR expected item, found `"\u\"` +fn main() { + _ = "\u\\"; + //~^ ERROR incorrect unicode escape sequence + //~| ERROR invalid trailing slash in literal +} diff --git a/tests/ui/parser/issues/issue-62913.stderr b/tests/ui/parser/issues/issue-62913.stderr index c33e46837287f..bee6dd4580037 100644 --- a/tests/ui/parser/issues/issue-62913.stderr +++ b/tests/ui/parser/issues/issue-62913.stderr @@ -1,24 +1,16 @@ error: incorrect unicode escape sequence - --> $DIR/issue-62913.rs:1:2 + --> $DIR/issue-62913.rs:2:10 | -LL | "\u\" - | ^^^ incorrect unicode escape sequence +LL | _ = "\u\"; + | ^^^ incorrect unicode escape sequence | = help: format of unicode escape sequences is `\u{...}` error: invalid trailing slash in literal - --> $DIR/issue-62913.rs:1:5 + --> $DIR/issue-62913.rs:2:13 | -LL | "\u\" - | ^ invalid trailing slash in literal +LL | _ = "\u\"; + | ^ invalid trailing slash in literal -error: expected item, found `"\u\"` - --> $DIR/issue-62913.rs:1:1 - | -LL | "\u\" - | ^^^^^^ expected item - | - = note: for a full list of items that can appear in modules, see - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs b/tests/ui/parser/macro/literals-are-validated-before-expansion.rs deleted file mode 100644 index c3fc754b5567f..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs +++ /dev/null @@ -1,10 +0,0 @@ -macro_rules! black_hole { - ($($tt:tt)*) => {} -} - -fn main() { - black_hole! { '\u{FFFFFF}' } - //~^ ERROR: invalid unicode character escape - black_hole! { "this is surrogate: \u{DAAA}" } - //~^ ERROR: invalid unicode character escape -} diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr b/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr deleted file mode 100644 index e874f62497ea8..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr +++ /dev/null @@ -1,18 +0,0 @@ -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:6:20 - | -LL | black_hole! { '\u{FFFFFF}' } - | ^^^^^^^^^^ invalid escape - | - = help: unicode escape must be at most 10FFFF - -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:8:39 - | -LL | black_hole! { "this is surrogate: \u{DAAA}" } - | ^^^^^^^^ invalid escape - | - = help: unicode escape must not be a surrogate - -error: aborting due to 2 previous errors - diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.rs b/tests/ui/parser/raw/raw-byte-string-literals-2.rs new file mode 100644 index 0000000000000..8ffda513dbf6f --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation +} diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.stderr b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr new file mode 100644 index 0000000000000..b4151eeef7017 --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr @@ -0,0 +1,8 @@ +error: found invalid character; only `#` is allowed in raw string delimitation: ~ + --> $DIR/raw-byte-string-literals-2.rs:2:5 + | +LL | br##~"a"~##; + | ^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/parser/raw/raw-byte-string-literals.rs b/tests/ui/parser/raw/raw-byte-string-literals.rs index 1b859fee596ad..3f91c381a9039 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.rs +++ b/tests/ui/parser/raw/raw-byte-string-literals.rs @@ -3,5 +3,4 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string br"é"; //~ ERROR non-ASCII character in raw byte string literal - br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70ae..2a4073243cbca 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,5 @@ error: non-ASCII character in raw byte string literal LL | br"é"; | ^ must be ASCII -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:5 - | -LL | br##~"a"~##; - | ^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fc071a9419142..806e222507f6e 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,87 +1,3 @@ -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{202e}' - | -help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes - | -LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2069}' - | -help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{202e}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2069}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - error: unicode codepoint changing visible direction of text present in comment --> $DIR/unicode-control-codepoints.rs:2:5 | @@ -188,5 +104,89 @@ LL | | * ''); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + error: aborting due to 17 previous errors