From ad062b72c004078ca97aaa0e9afe5e1f67ae36eb Mon Sep 17 00:00:00 2001 From: andrew <> Date: Fri, 31 Jan 2025 02:20:31 +0900 Subject: [PATCH 1/3] New macro formatting getting cluse from whitespace --- crates/genemichaels-lib/src/sg_general.rs | 136 ++++++++++----------- crates/genemichaels-lib/tests/roundtrip.rs | 11 +- 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/crates/genemichaels-lib/src/sg_general.rs b/crates/genemichaels-lib/src/sg_general.rs index 4e4e054..21abc44 100644 --- a/crates/genemichaels-lib/src/sg_general.rs +++ b/crates/genemichaels-lib/src/sg_general.rs @@ -321,6 +321,7 @@ pub(crate) fn append_macro_body( sg: &mut SplitGroupBuilder, tokens: TokenStream, ) { + // Try to parse entire macro like a function call if let Ok(exprs) = syn::parse2::(quote!{ f(#tokens) }) { @@ -331,6 +332,8 @@ pub(crate) fn append_macro_body( return; } } + + // Try to parse entire macro like a block if let Ok(block) = syn::parse2::(quote!{ { #tokens @@ -348,27 +351,17 @@ pub(crate) fn append_macro_body( } } - #[derive(PartialEq)] - enum ConsecMode { - // Start, joining punct (.) - ConnectForward, - // Idents, literals - NoConnect, - // Other punctuation - Punct, - } - - // Split token stream into "expressions" using `;` and `,` and then try to - // re-evaluate each expression to use normal formatting. + // Split token stream into "expressions" (/substream) using `;` and `,` and then + // try to format each expression. let mut substreams: Vec<(Vec, Option)> = vec![]; { let mut top = vec![]; for t in tokens { let (push, break_) = match &t { - proc_macro2::TokenTree::Punct(p) if matches!(p.as_char(), ';' | ',') => { + TokenTree::Punct(p) if matches!(p.as_char(), ';' | ',') => { (false, Some(Some(p.clone()))) }, - proc_macro2::TokenTree::Group(g) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => { + TokenTree::Group(g) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => { (true, Some(None)) }, _ => { @@ -403,6 +396,8 @@ pub(crate) fn append_macro_body( } let tokens = TokenStream::from_iter(sub.0); let punct = sub.1; + + // Try to parse current expression/substream as a function call if let Ok(exprs) = syn::parse2::(quote!{ f(#tokens #punct) }) { @@ -420,6 +415,8 @@ pub(crate) fn append_macro_body( break 'nextsub; } } + + // Try to parse current expression/substream as a block if let Ok(block) = syn::parse2::(quote!{ { #tokens #punct @@ -436,11 +433,37 @@ pub(crate) fn append_macro_body( break 'nextsub; } } + + // Freeform formatting { - let mut mode = ConsecMode::ConnectForward; + /// Identify punctuation that connects things tightly + fn is_pull_next_punct(p: &Punct) -> bool { + return match p.as_char() { + '.' => true, + '$' => true, + '`' => true, + '#' => true, + _ => false, + }; + } + + // With exceptions, the default heterogenous adjacent token tree behavior is to + // push. For punctuation-adjacent, it depends on the punctuation type. + fn is_hetero_push(prev: &Option) -> bool { + return match &prev { + Some(prev) => match prev { + TokenTree::Group(_) => false, + TokenTree::Ident(_) | TokenTree::Literal(_) => true, + TokenTree::Punct(punct) => !is_pull_next_punct(&punct), + }, + None => false, + }; + } + + let mut previous: Option = None; for t in tokens { - match t { - proc_macro2::TokenTree::Group(g) => { + match &t { + TokenTree::Group(g) => { append_whitespace(out, base_indent, sg, g.span_open().start()); sg.child({ let mut sg = new_sg(out); @@ -454,11 +477,8 @@ pub(crate) fn append_macro_body( }), g.stream()); }, proc_macro2::Delimiter::Brace => { - match mode { - ConsecMode::ConnectForward => { }, - _ => { - sg.seg(out, " "); - }, + if is_hetero_push(&previous) { + sg.seg(out, " "); } append_macro_body_bracketed(out, &indent, &mut sg, &MacroDelimiter::Brace({ let mut delim = Brace::default(); @@ -480,66 +500,42 @@ pub(crate) fn append_macro_body( } sg.build(out) }); - mode = ConsecMode::NoConnect; }, - proc_macro2::TokenTree::Ident(i) => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); - }, + TokenTree::Ident(i) => { + if is_hetero_push(&previous) { + sg.seg(out, " "); } append_whitespace(out, base_indent, sg, i.span().start()); sg.seg(out, &i.to_string()); - mode = ConsecMode::NoConnect; }, - proc_macro2::TokenTree::Punct(p) => match p.as_char() { - '\'' | '$' | '#' => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); - }, - } - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::ConnectForward; - }, - ':' => { - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::Punct; - }, - '.' => { - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::ConnectForward; - }, - _ => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect => { - sg.seg(out, " "); + TokenTree::Punct(p) => { + if match &previous { + Some(previous) => match previous { + TokenTree::Group(_) | + TokenTree::Ident(_) | + TokenTree::Literal(_) => match p.as_char() { + ':' => false, + '*' => false, + _ => true, }, - ConsecMode::Punct => { }, - } - append_whitespace(out, base_indent, sg, p.span().start()); - sg.seg(out, &p.to_string()); - mode = ConsecMode::Punct; - }, - }, - proc_macro2::TokenTree::Literal(l) => { - match mode { - ConsecMode::ConnectForward => { }, - ConsecMode::NoConnect | ConsecMode::Punct => { - sg.seg(out, " "); + TokenTree::Punct(prev_p) => prev_p.span().end() != p.span().start(), }, + None => false, + } { + sg.seg(out, " "); + } + append_whitespace(out, base_indent, sg, p.span().start()); + sg.seg(out, &p.to_string()); + }, + TokenTree::Literal(l) => { + if is_hetero_push(&previous) { + sg.seg(out, " "); } append_whitespace(out, base_indent, sg, l.span().start()); sg.seg(out, &l.to_string()); - mode = ConsecMode::NoConnect; }, } + previous = Some(t); } if let Some(suf) = punct { append_whitespace(out, base_indent, sg, suf.span().start()); diff --git a/crates/genemichaels-lib/tests/roundtrip.rs b/crates/genemichaels-lib/tests/roundtrip.rs index faeec93..7251a8e 100644 --- a/crates/genemichaels-lib/tests/roundtrip.rs +++ b/crates/genemichaels-lib/tests/roundtrip.rs @@ -1,5 +1,4 @@ #![cfg(test)] - use genemichaels_lib::{ format_str, FormatConfig, @@ -105,8 +104,8 @@ fn rt_pat_field1() { #[test] fn rt_macro1() { - rt(r#"macro_rules! err(($l: expr, $($args: tt) *) => { - log!($l, slog::Level::Error, "", $($args) *) + rt(r#"macro_rules! err(($l: expr, $($args: tt)*) => { + log!($l, slog::Level::Error, "", $($args)*) }); "#); } @@ -137,6 +136,12 @@ fn rt_macro_star_equal() { "#); } +#[test] +fn rt_macro_star_equal_gt() { + rt(r#"x!(a* => b); +"#); +} + #[test] fn rt_comments_end() { rt(r#"const X: i32 = 7; From 847c8ca5d39395b14e494cea6b3f15d37f17d259 Mon Sep 17 00:00:00 2001 From: andrew <> Date: Fri, 31 Jan 2025 02:33:03 +0900 Subject: [PATCH 2/3] Revert pulling on star - could be multiplication --- crates/genemichaels-lib/src/sg_general.rs | 1 - crates/genemichaels-lib/tests/roundtrip.rs | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/genemichaels-lib/src/sg_general.rs b/crates/genemichaels-lib/src/sg_general.rs index 21abc44..9496ae9 100644 --- a/crates/genemichaels-lib/src/sg_general.rs +++ b/crates/genemichaels-lib/src/sg_general.rs @@ -515,7 +515,6 @@ pub(crate) fn append_macro_body( TokenTree::Ident(_) | TokenTree::Literal(_) => match p.as_char() { ':' => false, - '*' => false, _ => true, }, TokenTree::Punct(prev_p) => prev_p.span().end() != p.span().start(), diff --git a/crates/genemichaels-lib/tests/roundtrip.rs b/crates/genemichaels-lib/tests/roundtrip.rs index 7251a8e..a72ed99 100644 --- a/crates/genemichaels-lib/tests/roundtrip.rs +++ b/crates/genemichaels-lib/tests/roundtrip.rs @@ -104,8 +104,8 @@ fn rt_pat_field1() { #[test] fn rt_macro1() { - rt(r#"macro_rules! err(($l: expr, $($args: tt)*) => { - log!($l, slog::Level::Error, "", $($args)*) + rt(r#"macro_rules! err(($l: expr, $($args: tt) *) => { + log!($l, slog::Level::Error, "", $($args) *) }); "#); } @@ -138,7 +138,7 @@ fn rt_macro_star_equal() { #[test] fn rt_macro_star_equal_gt() { - rt(r#"x!(a* => b); + rt(r#"x!(a * => b); "#); } From 936a41e0016f2fc50be11e0873052d21b7e708ab Mon Sep 17 00:00:00 2001 From: andrew <> Date: Fri, 31 Jan 2025 22:30:22 +0900 Subject: [PATCH 3/3] Fix shebang check, a few more comments, tweaking macro style to reduce codebase changes --- crates/genemichaels-lib/src/lib.rs | 2 +- crates/genemichaels-lib/src/sg_general.rs | 13 +++++++------ crates/genemichaels-lib/src/whitespace.rs | 10 +++++++++- crates/genemichaels-lib/tests/roundtrip.rs | 17 ++++++++++++++++- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/crates/genemichaels-lib/src/lib.rs b/crates/genemichaels-lib/src/lib.rs index 11d2abc..cb33ddb 100644 --- a/crates/genemichaels-lib/src/lib.rs +++ b/crates/genemichaels-lib/src/lib.rs @@ -503,7 +503,7 @@ pub fn format_str(source: &str, config: &FormatConfig) -> Result o + 1, None => source.len(), diff --git a/crates/genemichaels-lib/src/sg_general.rs b/crates/genemichaels-lib/src/sg_general.rs index 9496ae9..86d213a 100644 --- a/crates/genemichaels-lib/src/sg_general.rs +++ b/crates/genemichaels-lib/src/sg_general.rs @@ -440,8 +440,8 @@ pub(crate) fn append_macro_body( fn is_pull_next_punct(p: &Punct) -> bool { return match p.as_char() { '.' => true, + '\'' => true, '$' => true, - '`' => true, '#' => true, _ => false, }; @@ -449,10 +449,10 @@ pub(crate) fn append_macro_body( // With exceptions, the default heterogenous adjacent token tree behavior is to // push. For punctuation-adjacent, it depends on the punctuation type. - fn is_hetero_push(prev: &Option) -> bool { + fn is_hetero_push_next(prev: &Option) -> bool { return match &prev { Some(prev) => match prev { - TokenTree::Group(_) => false, + TokenTree::Group(_) => true, TokenTree::Ident(_) | TokenTree::Literal(_) => true, TokenTree::Punct(punct) => !is_pull_next_punct(&punct), }, @@ -477,7 +477,7 @@ pub(crate) fn append_macro_body( }), g.stream()); }, proc_macro2::Delimiter::Brace => { - if is_hetero_push(&previous) { + if is_hetero_push_next(&previous) { sg.seg(out, " "); } append_macro_body_bracketed(out, &indent, &mut sg, &MacroDelimiter::Brace({ @@ -502,7 +502,7 @@ pub(crate) fn append_macro_body( }); }, TokenTree::Ident(i) => { - if is_hetero_push(&previous) { + if is_hetero_push_next(&previous) { sg.seg(out, " "); } append_whitespace(out, base_indent, sg, i.span().start()); @@ -515,6 +515,7 @@ pub(crate) fn append_macro_body( TokenTree::Ident(_) | TokenTree::Literal(_) => match p.as_char() { ':' => false, + '.' => false, _ => true, }, TokenTree::Punct(prev_p) => prev_p.span().end() != p.span().start(), @@ -527,7 +528,7 @@ pub(crate) fn append_macro_body( sg.seg(out, &p.to_string()); }, TokenTree::Literal(l) => { - if is_hetero_push(&previous) { + if is_hetero_push_next(&previous) { sg.seg(out, " "); } append_whitespace(out, base_indent, sg, l.span().start()); diff --git a/crates/genemichaels-lib/src/whitespace.rs b/crates/genemichaels-lib/src/whitespace.rs index e766bed..dbbafdb 100644 --- a/crates/genemichaels-lib/src/whitespace.rs +++ b/crates/genemichaels-lib/src/whitespace.rs @@ -39,6 +39,9 @@ fn unicode_len(text: &str) -> VisualLen { VisualLen(text.chars().count()) } +/// Identifies the start/stop locations of whitespace in a chunk of source. +/// Whitespace is grouped runs, but the `keep_max_blank_lines` parameter allows +/// splitting the groups. pub fn extract_whitespaces( keep_max_blank_lines: usize, source: &str, @@ -374,7 +377,12 @@ pub fn extract_whitespaces( ).map_err( |e| loga::err_with( "Error undoing syn parse transformations", - ea!(line = e.span().start().line, column = e.span().start().column, error = e.to_string()), + ea!( + line = e.span().start().line, + column = e.span().start().column, + error = e.to_string(), + source = source.lines().skip(e.span().start().line - 1).next().unwrap() + ), ), )?, ); diff --git a/crates/genemichaels-lib/tests/roundtrip.rs b/crates/genemichaels-lib/tests/roundtrip.rs index a72ed99..9de4f35 100644 --- a/crates/genemichaels-lib/tests/roundtrip.rs +++ b/crates/genemichaels-lib/tests/roundtrip.rs @@ -425,7 +425,22 @@ fn rt_self_type() { #[test] fn rt_skip_shebang() { - rt(r#"#!#[cfg(test)] + rt(r#"#!/bin/bash fn main() { } "#); } + +#[test] +fn rt_dontskip_modattrs() { + rt( + r#"#![allow( + clippy::too_many_arguments, + clippy::field_reassign_with_default, + clippy::never_loop, + clippy::derive_hash_xor_eq +)] + +fn main() { } +"#, + ); +}