diff --git a/CHANGELOG.md b/CHANGELOG.md index 4423b8f..3aedcf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## Unreleased +## `v0.12.2` + + * `Checkpoint`s for the `GreenNodeBuilder` can now be used across node boundaries, meaning you can use them to wrap (finished) nodes in addition to just tokens. + * A new method `Checkpoint::revert_to` has been added which resets a `GreenNodeBuilder` to the state it was in when the checkpoint was taken, allowing a parser to backtrack to the checkpoint. + +## `v0.12.1` + * Implement `Hash` and `Eq` for `ResolvedNode` and `ResolvedToken` ## `v0.12.0` diff --git a/Cargo.toml b/Cargo.toml index 3b7c718..c5ff1d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ resolver = "2" [workspace.package] edition = "2021" -version = "0.12.1" # when updating, also update `#![doc(html_root_url)]` and any inter-crate dependencies (such as `cstree`'s dependency on `cstree-derive`) +version = "0.12.2" # when updating, also update `#![doc(html_root_url)]` and any inter-crate dependencies (such as `cstree`'s dependency on `cstree-derive`) authors = [ "Domenic Quirl ", "Aleksey Kladov ", diff --git a/cstree/Cargo.toml b/cstree/Cargo.toml index 56ec836..08cc158 100644 --- a/cstree/Cargo.toml +++ b/cstree/Cargo.toml @@ -25,7 +25,7 @@ indexmap = "2.4.0" [dependencies.cstree_derive] path = "../cstree-derive" -version = "0.12.1" # must match the `cstree` version in the virtual workspace manifest +version = "0.12.2" # must match the `cstree` version in the virtual workspace manifest optional = true [dependencies.lasso] diff --git a/cstree/src/green/builder.rs b/cstree/src/green/builder.rs index cf4e967..2a6636b 100644 --- a/cstree/src/green/builder.rs +++ b/cstree/src/green/builder.rs @@ -242,7 +242,10 @@ where /// A checkpoint for maybe wrapping a node. See [`GreenNodeBuilder::checkpoint`] for details. #[derive(Clone, Copy, Debug)] -pub struct Checkpoint(usize); +pub struct Checkpoint { + parent_idx: usize, + child_idx: usize, +} /// A builder for green trees. /// Construct with [`new`](GreenNodeBuilder::new), [`with_cache`](GreenNodeBuilder::with_cache), or @@ -442,10 +445,12 @@ where self.children.push(node.into()); } - /// Prepare for maybe wrapping the next node with a surrounding node. + /// Take a snapshot of the builder's state, which can be used to retroactively insert surrounding nodes by calling + /// [`start_node_at`](GreenNodeBuilder::start_node_at), or for backtracking by allowing to + /// [`revert_to`](GreenNodeBuilder::revert_to) the returned checkpoint. /// - /// The way wrapping works is that you first get a checkpoint, then you add nodes and tokens as - /// normal, and then you *maybe* call [`start_node_at`](GreenNodeBuilder::start_node_at). + /// Checkpoints are _invalidated_ and can no longer be used if nodes older than (started before) the checkpoint are + /// finished, and also if the builder is reverted past them to an even earlier checkpoint. /// /// # Examples /// ``` @@ -469,27 +474,144 @@ where /// ``` #[inline] pub fn checkpoint(&self) -> Checkpoint { - Checkpoint(self.children.len()) + Checkpoint { + parent_idx: self.parents.len(), + child_idx: self.children.len(), + } } - /// Wrap the previous branch marked by [`checkpoint`](GreenNodeBuilder::checkpoint) in a new - /// branch and make it current. + /// Restore the builder's state to when the [`Checkpoint`] was taken. + /// + /// This is useful for backtracking parsers. It will delete any nodes that were started but not finished since the + /// checkpoint was taken, as well as any tokens that were recorded since. Checkpoints can be nested, but reverting + /// to a checkpoint invalidates all other checkpoints that were taken after it. + /// + /// ## Panics + /// + /// When using checkpoints, calls to [`start_node`](GreenNodeBuilder::start_node) and + /// [`finish_node`](GreenNodeBuilder::finish_node) must always be balanced in between the call + /// to [`checkpoint`](GreenNodeBuilder::checkpoint) and a call to either `revert_to` or + /// [`start_node_at`](GreenNodeBuilder::start_node_at). + /// Since it is impossible to revert the finishing of any node started before taking the checkpoint, this function + /// will panic if it detects that this is the case. + /// + /// Example: + /// ```rust + /// # use cstree::testing::*; + /// # use cstree::build::GreenNodeBuilder; + /// # struct Parser; + /// # impl Parser { + /// # fn peek(&self) -> Option { None } + /// # fn parse_expr(&mut self) {} + /// # } + /// # let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// # let mut parser = Parser; + /// let checkpoint = builder.checkpoint(); + /// parser.parse_expr(); + /// if let Some(Plus) = parser.peek() { + /// // 1 + 2 = Add(1, 2) + /// builder.start_node_at(checkpoint, Operation); + /// parser.parse_expr(); + /// builder.finish_node(); + /// } else { + /// builder.revert_to(checkpoint); + /// } + /// ``` + pub fn revert_to(&mut self, checkpoint: Checkpoint) { + let Checkpoint { parent_idx, child_idx } = checkpoint; + // NOTE: This doesn't catch scenarios where we've read more tokens since the previous revert + // (see test `misuse3`), but it'll catch simple cases of reverting to the past and then + // trying to revert to the future (see tests `misuse` and `misuse2`). + // + // See `start_node_at` for a general explanation of the conditions here. As opposed to `start_node_at`, + // unfinished nodes are fine here (since we're going to revert them), so a `parent_idx` less than + // `self.parents.len()` is valid. + assert!( + parent_idx <= self.parents.len(), + "checkpoint no longer valid, was `finish_node` called early or did you already `revert_to`?" + ); + assert!( + child_idx <= self.children.len(), + "checkpoint no longer valid after reverting to an earlier checkpoint" + ); + if let Some(&(_, first_child)) = self.parents.last() { + assert!( + child_idx >= first_child, + "checkpoint no longer valid, was an unmatched start_node_at called?" + ); + } + + self.parents.truncate(parent_idx); + self.children.truncate(child_idx); + } + + /// Start a node at the given [`checkpoint`](GreenNodeBuilder::checkpoint), wrapping all nodes + /// and tokens created since the checkpoint was taken. + /// + /// ## Panics + /// + /// When using checkpoints, calls to [`start_node`](GreenNodeBuilder::start_node) and + /// [`finish_node`](GreenNodeBuilder::finish_node) must always be balanced in between the call + /// to [`checkpoint`](GreenNodeBuilder::checkpoint) and a call to either + /// [`revert_to`](GreenNodeBuilder::revert_to) or `start_node_at`: + /// + /// It is impossible to start a node retroactively if one or more nodes started before taking + /// the checkpoint have already been finished when trying to call this function. This function + /// will panic if it detects that this is the case. + /// + /// This function will also panic if one or more nodes were started _after_ the checkpoint was + /// taken and were not yet finished, which would leave these nodes simultaneously inside and + /// outside the newly started node (neither node would contain the other). #[inline] pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: S) { - let Checkpoint(checkpoint) = checkpoint; + let Checkpoint { parent_idx, child_idx } = checkpoint; + // NOTE: Due to the requirement to balance `start_node` and `finish_node` in between the calls to `checkpoint` + // and this function, there must be exactly the same number of `self.parents` now as there were when the + // checkpoint was created (`start_node` creates a new parent and `finish_node` moves it to `self.children`). + // + // We assert both inequalities separately to give better error messages: if the checkpoint points past the end + // of `self.parents`, this indicates that a node outside the scope of the checkpoint was already finished (or + // reverted), while there being parents past the checkpointed index indicates that new nodes were started that + // were left unfinished. + assert!( + parent_idx <= self.parents.len(), + "checkpoint no longer valid, was `finish_node` called early or did you already `revert_to`?" + ); + assert!( + parent_idx >= self.parents.len(), + "checkpoint contains one or more unfinished nodes" + ); + + // NOTE: The checkpoint can only point past the end of children if there were children on the stack when it was + // taken that got removed before it was used. Since any nodes started after taking the checkpoint will + // point to a `first_child` >= the checkpoint's `child_idx`, they cannot remove such children when they + // are finished. The are only two ways to remove them: + // 1. to finish a node which was already started at the time of taking the checkpoint and which may thus point + // to an earlier child as its `first_child`, or + // 2. to `revert_to` an earlier checkpoint before trying to start a new node at this one. + // + // Since we require calls to `start_node` and `finish_node` to be balanced between taking and using the + // checkpoint, (1) will be caught by the assert against the `parent_idx` above. This will also catch (2) in + // cases where at least one node is started between the earlier checkpoint and this one. However, it may be the + // case that only tokens are recorded in between the two, in which case we need to check the `child_idx` too. assert!( - checkpoint <= self.children.len(), - "checkpoint no longer valid, was finish_node called early?" + child_idx <= self.children.len(), + "checkpoint no longer valid after reverting to an earlier checkpoint" ); + // NOTE: When `start_node` is called after `checkpoint`, the new node gets pushed to `parents`. + // However, `start_node` and `finish_node` must be balanced inbetween the calls to `checkpoint` and + // `start_node_at`, and `finish_node` causes the topmost parent to become a child instead. + // Therefore, the topmost parent _at the time of calling `start_node_at`_ must still be pointing + // "behind" the checkpoint in the list of children. if let Some(&(_, first_child)) = self.parents.last() { assert!( - checkpoint >= first_child, - "checkpoint no longer valid, was an unmatched start_node_at called?" + child_idx >= first_child, + "checkpoint no longer valid, was an unmatched `start_node` or `start_node_at` called?" ); } - self.parents.push((kind, checkpoint)); + self.parents.push((kind, child_idx)); } /// Complete building the tree. diff --git a/cstree/src/lib.rs b/cstree/src/lib.rs index 7b789a6..c79e70f 100644 --- a/cstree/src/lib.rs +++ b/cstree/src/lib.rs @@ -93,7 +93,7 @@ )] #![warn(missing_docs)] // Docs.rs -#![doc(html_root_url = "https://docs.rs/cstree/0.12.1")] +#![doc(html_root_url = "https://docs.rs/cstree/0.12.2")] #![cfg_attr(doc_cfg, feature(doc_cfg))] pub mod getting_started; diff --git a/cstree/tests/it/main.rs b/cstree/tests/it/main.rs index a952a23..024bc73 100644 --- a/cstree/tests/it/main.rs +++ b/cstree/tests/it/main.rs @@ -1,5 +1,6 @@ mod basic; mod regressions; +mod rollback; mod sendsync; #[cfg(feature = "serialize")] mod serde; @@ -7,7 +8,8 @@ mod serde; use cstree::{ build::{GreenNodeBuilder, NodeCache}, green::GreenNode, - interning::Interner, + interning::{Interner, Resolver}, + util::NodeOrToken, RawSyntaxKind, Syntax, }; @@ -78,3 +80,34 @@ where } from } + +#[track_caller] +pub fn assert_tree_eq( + (left, left_res): (&SyntaxNode, &impl Resolver), + (right, right_res): (&SyntaxNode, &impl Resolver), +) { + if left.green() == right.green() { + return; + } + + if left.kind() != right.kind() || left.children_with_tokens().len() != right.children_with_tokens().len() { + panic!("{} !=\n{}", left.debug(left_res, true), right.debug(right_res, true)) + } + + for elem in left.children_with_tokens().zip(right.children_with_tokens()) { + match elem { + (NodeOrToken::Node(ln), NodeOrToken::Node(rn)) => assert_tree_eq((ln, left_res), (rn, right_res)), + (NodeOrToken::Node(n), NodeOrToken::Token(t)) => { + panic!("{} != {}", n.debug(left_res, true), t.debug(right_res)) + } + (NodeOrToken::Token(t), NodeOrToken::Node(n)) => { + panic!("{} != {}", t.debug(left_res), n.debug(right_res, true)) + } + (NodeOrToken::Token(lt), NodeOrToken::Token(rt)) => { + if lt.syntax_kind() != rt.syntax_kind() || lt.resolve_text(left_res) != rt.resolve_text(right_res) { + panic!("{} != {}", lt.debug(left_res), rt.debug(right_res)) + } + } + } + } +} diff --git a/cstree/tests/it/rollback.rs b/cstree/tests/it/rollback.rs new file mode 100644 index 0000000..c826ce5 --- /dev/null +++ b/cstree/tests/it/rollback.rs @@ -0,0 +1,306 @@ +use super::*; +use cstree::interning::Resolver; + +type GreenNodeBuilder<'cache, 'interner> = cstree::build::GreenNodeBuilder<'cache, 'interner, SyntaxKind>; + +fn with_builder(f: impl FnOnce(&mut GreenNodeBuilder)) -> (SyntaxNode, impl Resolver) { + let mut builder = GreenNodeBuilder::new(); + f(&mut builder); + let (node, cache) = builder.finish(); + (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) +} + +#[test] +#[should_panic = "`left == right` failed"] +fn comparison_works() { + let (first, res1) = with_builder(|_| {}); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn no_rollback_token() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + let checkpoint = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.start_node_at(checkpoint, SyntaxKind(0)); + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn no_rollback_node() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(2)); + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.finish_node(); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + let checkpoint = builder.checkpoint(); + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.finish_node(); + builder.start_node_at(checkpoint, SyntaxKind(2)); + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +#[should_panic = "unfinished nodes"] +fn no_rollback_unfinished_node() { + let (second, res2) = with_builder(|builder| { + let checkpoint = builder.checkpoint(); + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.start_node_at(checkpoint, SyntaxKind(2)); + builder.finish_node(); + builder.finish_node(); + }); + println!("{}", second.debug(&res2, true)); +} + +#[test] +fn simple() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add a token, then remove it. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.revert_to(initial); + + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn nested() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + // Add two tokens, then remove both. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + + builder.finish_node(); + }); + + let (third, res3) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add two tokens, then remove one after the other. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let second = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(second); + builder.revert_to(initial); + + builder.finish_node(); + }); + + assert_tree_eq((&first, &res1), (&second, &res2)); + assert_tree_eq((&first, &res1), (&third, &res3)); +} + +#[test] +fn unfinished_node() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(2)); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(2)); + let checkpoint = builder.checkpoint(); + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.revert_to(checkpoint); + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +#[should_panic = "checkpoint no longer valid after reverting to an earlier checkpoint"] +fn misuse() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add two tokens, but remove them in the wrong order. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + builder.revert_to(new); + + builder.finish_node(); + }); + + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +#[should_panic = "did you already `revert_to`?"] +fn misuse2() { + with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Take two snapshots across a node boundary, but revert them in the wrong order. + let initial = builder.checkpoint(); + builder.start_node(SyntaxKind(3)); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + builder.revert_to(new); + + builder.finish_node(); + }); +} + +#[test] +fn misuse3() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(3), "no"); + builder.finish_node(); + }); + + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add two tokens, revert to the initial state, add three tokens, and try to revert to an earlier checkpoint. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + + // This is wrong, but there's not a whole lot the library can do about it. + builder.token(SyntaxKind(3), "no"); + builder.token(SyntaxKind(4), "bad"); + builder.token(SyntaxKind(4), "wrong"); + builder.revert_to(new); + + builder.finish_node(); + }); + + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +#[should_panic = "was `finish_node` called early or did you already `revert_to`"] +fn misuse_combined() { + with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Take two snapshots across a node boundary, revert to the earlier one but then try to start a node at the + // later one. + let initial = builder.checkpoint(); + builder.start_node(SyntaxKind(3)); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + builder.start_node_at(new, SyntaxKind(4)); + + builder.finish_node(); + }); +} + +#[test] +#[should_panic = "reverting to an earlier checkpoint"] +fn misuse_combined2() { + with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at + // the later one. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert_to(initial); + builder.start_node_at(new, SyntaxKind(3)); + + builder.finish_node(); + }); +} + +#[test] +fn revert_then_start() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.start_node(SyntaxKind(3)); + builder.token(SyntaxKind(2), "hello"); + builder.finish_node(); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at + // the later one. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.revert_to(initial); + builder.start_node_at(initial, SyntaxKind(3)); + builder.token(SyntaxKind(2), "hello"); + builder.finish_node(); + + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn start_then_revert() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(2), "hello"); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at + // the later one. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.start_node_at(initial, SyntaxKind(3)); + builder.revert_to(initial); + builder.token(SyntaxKind(2), "hello"); + + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} diff --git a/rustfmt.toml b/rustfmt.toml index 2351305..e193002 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,15 +1,15 @@ unstable_features = true -edition = "2018" +edition = "2021" -max_width = 120 +max_width = 120 comment_width = 120 wrap_comments = true format_code_in_doc_comments = true -format_macro_matchers = true +format_macro_matchers = true -imports_granularity= "Crate" +imports_granularity = "Crate" reorder_impl_items = true