From d5ef2b18b1e55ef986096a2cc6abe6a0b5f6213e Mon Sep 17 00:00:00 2001 From: never Date: Wed, 15 Nov 2023 18:02:24 +0800 Subject: [PATCH] feat: support incremental resolver Signed-off-by: never --- kclvm/ast/src/ast.rs | 12 ++ kclvm/query/src/query.rs | 1 + kclvm/runner/src/lib.rs | 2 +- kclvm/sema/src/advanced_resolver/mod.rs | 6 +- kclvm/sema/src/advanced_resolver/node.rs | 29 +++- kclvm/sema/src/pre_process/identifier.rs | 8 +- kclvm/sema/src/resolver/cache.rs | 209 +++++++++++++++++++++++ kclvm/sema/src/resolver/mod.rs | 17 +- kclvm/sema/src/resolver/tests.rs | 33 ++++ kclvm/tools/src/LSP/src/util.rs | 1 + kclvm/tools/src/lint/mod.rs | 1 + 11 files changed, 305 insertions(+), 14 deletions(-) create mode 100644 kclvm/sema/src/resolver/cache.rs diff --git a/kclvm/ast/src/ast.rs b/kclvm/ast/src/ast.rs index aa41bfff8..9bc80bd7a 100644 --- a/kclvm/ast/src/ast.rs +++ b/kclvm/ast/src/ast.rs @@ -162,6 +162,18 @@ impl Node { } } + pub fn node_with_pos_and_id(node: T, pos: PosTuple, id: AstIndex) -> Self { + Self { + id, + node, + filename: pos.0.clone(), + line: pos.1, + column: pos.2, + end_line: pos.3, + end_column: pos.4, + } + } + pub fn node_with_pos(node: T, pos: PosTuple) -> Self { Self { id: AstIndex::default(), diff --git a/kclvm/query/src/query.rs b/kclvm/query/src/query.rs index 7cfca5e84..8d6c2b63f 100644 --- a/kclvm/query/src/query.rs +++ b/kclvm/query/src/query.rs @@ -128,6 +128,7 @@ fn resolve_file(file: &str, code: Option<&str>) -> Result>> { resolve_val: true, ..Default::default() }, + None, ); match scope.main_scope() { Some(scope) => Ok(scope.clone()), diff --git a/kclvm/runner/src/lib.rs b/kclvm/runner/src/lib.rs index 7c86b07a5..abe10fab8 100644 --- a/kclvm/runner/src/lib.rs +++ b/kclvm/runner/src/lib.rs @@ -204,7 +204,7 @@ pub fn execute( let mut resolve_opts = Options::default(); resolve_opts.merge_program = false; // Resolve ast - let scope = resolve_program_with_opts(&mut program, resolve_opts); + let scope = resolve_program_with_opts(&mut program, resolve_opts, None); emit_compile_diag_to_string(sess, &scope, args.compile_only)?; return Ok("".to_string()); } diff --git a/kclvm/sema/src/advanced_resolver/mod.rs b/kclvm/sema/src/advanced_resolver/mod.rs index c33e60bfa..e55c797df 100644 --- a/kclvm/sema/src/advanced_resolver/mod.rs +++ b/kclvm/sema/src/advanced_resolver/mod.rs @@ -106,10 +106,13 @@ impl<'ctx> AdvancedResolver<'ctx> { for (name, modules) in advanced_resolver.ctx.program.pkgs.iter() { advanced_resolver.ctx.current_pkgpath = Some(name.clone()); if let Some(pkg_info) = advanced_resolver.gs.get_packages().get_package_info(name) { - advanced_resolver.enter_root_scope(name.clone(), pkg_info.pkg_filepath.clone()); if modules.is_empty() { continue; } + if !advanced_resolver.ctx.scopes.is_empty() { + advanced_resolver.ctx.scopes.clear(); + } + advanced_resolver.enter_root_scope(name.clone(), pkg_info.pkg_filepath.clone()); for module in modules.iter() { advanced_resolver.ctx.current_filename = Some(module.filename.clone()); advanced_resolver.walk_module(module); @@ -136,7 +139,6 @@ impl<'ctx> AdvancedResolver<'ctx> { fn enter_local_scope(&mut self, filepath: &str, start: Position, end: Position) { let parent = *self.ctx.scopes.last().unwrap(); - let local_scope = LocalSymbolScope::new(parent, start, end); let scope_ref = self.gs.get_scopes_mut().alloc_local_scope(local_scope); diff --git a/kclvm/sema/src/advanced_resolver/node.rs b/kclvm/sema/src/advanced_resolver/node.rs index a3fac45c1..e8f1851b7 100644 --- a/kclvm/sema/src/advanced_resolver/node.rs +++ b/kclvm/sema/src/advanced_resolver/node.rs @@ -136,11 +136,17 @@ impl<'ctx> MutSelfTypedResultWalker<'ctx> for AdvancedResolver<'ctx> { } fn walk_schema_stmt(&mut self, schema_stmt: &'ctx ast::SchemaStmt) -> Self::Result { - let schema_ty = self.ctx.node_ty_map.get(&schema_stmt.name.id)?.clone(); + let schema_ty = self + .ctx + .node_ty_map + .get(&schema_stmt.name.id) + .unwrap() + .clone(); let schema_symbol = self .gs .get_symbols() - .get_type_symbol(&schema_ty, self.get_current_module_info())?; + .get_type_symbol(&schema_ty, self.get_current_module_info()) + .unwrap(); if self .gs @@ -198,24 +204,29 @@ impl<'ctx> MutSelfTypedResultWalker<'ctx> for AdvancedResolver<'ctx> { self.gs .get_symbols_mut() .schemas - .get_mut(schema_symbol.get_id())? + .get_mut(schema_symbol.get_id()) + .unwrap() .parent_schema = self.walk_identifier_expr(parent); } if let Some(for_host) = &schema_stmt.for_host_name { self.gs .get_symbols_mut() .schemas - .get_mut(schema_symbol.get_id())? + .get_mut(schema_symbol.get_id()) + .unwrap() .for_host = self.walk_identifier_expr(for_host); } let mut mixins = vec![]; for mixin in schema_stmt.mixins.iter() { - mixins.push(self.walk_identifier_expr(mixin)?); + if let Some(mixin) = self.walk_identifier_expr(mixin) { + mixins.push(mixin); + } } self.gs .get_symbols_mut() .schemas - .get_mut(schema_symbol.get_id())? + .get_mut(schema_symbol.get_id()) + .unwrap() .mixins = mixins; if let Some(args) = &schema_stmt.args { @@ -254,12 +265,14 @@ impl<'ctx> MutSelfTypedResultWalker<'ctx> for AdvancedResolver<'ctx> { let name = self .gs .get_symbols() - .get_symbol(attribute_symbol)? + .get_symbol(attribute_symbol) + .unwrap() .get_name(); self.gs .get_symbols_mut() .schemas - .get_mut(schema_symbol.get_id())? + .get_mut(schema_symbol.get_id()) + .unwrap() .attributes .insert(name, attribute_symbol); } diff --git a/kclvm/sema/src/pre_process/identifier.rs b/kclvm/sema/src/pre_process/identifier.rs index 9ac00c454..4f25bcf1f 100644 --- a/kclvm/sema/src/pre_process/identifier.rs +++ b/kclvm/sema/src/pre_process/identifier.rs @@ -163,7 +163,13 @@ impl<'ctx> MutSelfMutWalker<'ctx> for RawIdentifierTransformer { identifier.names = identifier .names .iter() - .map(|name| Node::node_with_pos(remove_raw_ident_prefix(&name.node), name.pos())) + .map(|name| { + Node::node_with_pos_and_id( + remove_raw_ident_prefix(&name.node), + name.pos(), + name.id.clone(), + ) + }) .collect::>>(); } fn walk_schema_attr(&mut self, schema_attr: &'ctx mut ast::SchemaAttr) { diff --git a/kclvm/sema/src/resolver/cache.rs b/kclvm/sema/src/resolver/cache.rs new file mode 100644 index 000000000..f11c4d602 --- /dev/null +++ b/kclvm/sema/src/resolver/cache.rs @@ -0,0 +1,209 @@ +use std::{ + cell::RefCell, + collections::{HashMap, HashSet, VecDeque}, + rc::Rc, +}; + +use indexmap::IndexMap; + +use crate::ty::TypeRef; + +use super::scope::{ProgramScope, Scope}; +use kclvm_ast::ast; + +/// For CachedScope, we assume that all changed files must be located in kclvm_ast::MAIN_PKG , +/// if this is not the case, please clear the cache directly +#[derive(Debug, Clone, Default)] +pub struct CachedScope { + pub program_root: String, + pub scope_map: IndexMap>>, + pub node_ty_map: IndexMap, + dependency_graph: DependencyGraph, +} +#[derive(Debug, Clone, Default)] + +struct DependencyGraph { + /// map filename to pkgpath + pub module_map: HashMap>, + /// map pkgpath to node + pub node_map: HashMap, +} + +impl DependencyGraph { + pub fn clear(&mut self) { + self.module_map.clear(); + self.node_map.clear(); + } + + pub fn update(&mut self, program: &ast::Program) -> Result, String> { + let mut new_modules = HashMap::new(); + for (pkgpath, modules) in program.pkgs.iter() { + if pkgpath == kclvm_ast::MAIN_PKG { + continue; + } + if !self.node_map.contains_key(pkgpath) { + self.node_map.insert( + pkgpath.to_string(), + DependencyNode { + pkgpath: pkgpath.to_string(), + parents: HashSet::new(), + children: HashSet::new(), + }, + ); + } + for module in modules { + if !self.module_map.contains_key(&module.filename) { + new_modules.insert(module.filename.to_string(), module); + self.module_map + .insert(module.filename.to_string(), HashSet::new()); + } + self.module_map + .get_mut(&module.filename) + .unwrap() + .insert(pkgpath.to_string()); + } + } + + for new_module in new_modules.values() { + self.add_new_module(new_module); + } + let mut invalidated_set = HashSet::new(); + if let Some(main_modules) = program.pkgs.get(kclvm_ast::MAIN_PKG) { + for module in main_modules { + let result = self.invalidate_module(module)?; + let _ = result.into_iter().map(|pkg| invalidated_set.insert(pkg)); + self.remove_dependency_from_pkg(&module.filename); + self.add_new_module(module); + } + } + Ok(invalidated_set) + } + + fn add_new_module(&mut self, new_module: &ast::Module) { + let module_file = new_module.filename.clone(); + if let Some(pkgpaths) = self.module_map.get(&module_file) { + for stmt in &new_module.body { + if let ast::Stmt::Import(import_stmt) = &stmt.node { + let parent_pkg = &import_stmt.path; + if let Some(parent_node) = self.node_map.get_mut(parent_pkg) { + parent_node.children.insert(new_module.filename.clone()); + } + for pkgpath in pkgpaths { + let cur_node = self.node_map.get_mut(pkgpath).unwrap(); + cur_node.parents.insert(parent_pkg.clone()); + } + } + } + } + } + + fn invalidate_module( + &mut self, + changed_module: &ast::Module, + ) -> Result, String> { + let module_file = changed_module.filename.clone(); + let mut invalidated_set = HashSet::new(); + if let Some(pkgpaths) = self.module_map.get(&module_file).cloned() { + let mut pkg_queue = VecDeque::new(); + for pkgpath in pkgpaths.iter() { + invalidated_set.insert(pkgpath.clone()); + pkg_queue.push_back(self.node_map.get(pkgpath)); + } + + let mut old_size = 0; + while old_size < invalidated_set.len() { + old_size = invalidated_set.len(); + let cur_node = loop { + match pkg_queue.pop_front() { + Some(cur_node) => match cur_node { + None => continue, + Some(cur_node) => { + if invalidated_set.contains(&cur_node.pkgpath) { + continue; + } + invalidated_set.insert(cur_node.pkgpath.clone()); + break Some(cur_node); + } + }, + None => break None, + } + }; + if let Some(cur_node) = cur_node { + for child in cur_node.children.iter() { + if let Some(child_pkgs) = self.module_map.get(child) { + for child_pkg in child_pkgs { + if invalidated_set.contains(child_pkg) { + continue; + } + pkg_queue.push_back(self.node_map.get(child_pkg)); + } + } + } + } + } + }; + Ok(invalidated_set) + } + + fn remove_dependency_from_pkg(&mut self, filename: &str) { + if let Some(pkgpaths) = self.module_map.get(filename).cloned() { + for pkgpath in pkgpaths { + if let Some(node) = self.node_map.get(&pkgpath).cloned() { + for parent in node.parents { + if let Some(parent_node) = self.node_map.get_mut(&parent) { + parent_node.children.remove(filename); + } + } + } + } + } + } +} +#[derive(Debug, Clone, Default)] +struct DependencyNode { + pkgpath: String, + //the pkgpath which is imported by this pkg + parents: HashSet, + //the files which import this pkg + children: HashSet, +} + +impl CachedScope { + pub fn new(scope: &ProgramScope, program: &ast::Program) -> Self { + let mut cached_scope = Self { + program_root: program.root.to_string(), + scope_map: scope.scope_map.clone(), + node_ty_map: scope.node_ty_map.clone(), + dependency_graph: DependencyGraph::default(), + }; + let invalidated_pkgs = cached_scope.dependency_graph.update(program); + cached_scope.invalidte_cache(invalidated_pkgs.as_ref()); + cached_scope + } + + pub fn clear(&mut self) { + self.scope_map.clear(); + self.node_ty_map.clear(); + self.dependency_graph.clear(); + } + + pub fn invalidte_cache(&mut self, invalidated_pkgs: Result<&HashSet, &String>) { + match invalidated_pkgs { + Ok(invalidated_pkgs) => { + for invalidated_pkg in invalidated_pkgs.iter() { + self.scope_map.remove(invalidated_pkg); + } + } + Err(_) => self.clear(), + } + } + + pub fn update(&mut self, program: &ast::Program) { + if self.program_root != program.root { + self.clear(); + self.program_root = program.root.clone(); + } + let invalidated_pkgs = self.dependency_graph.update(program); + self.invalidte_cache(invalidated_pkgs.as_ref()); + } +} diff --git a/kclvm/sema/src/resolver/mod.rs b/kclvm/sema/src/resolver/mod.rs index 23ccb902a..2dd115140 100644 --- a/kclvm/sema/src/resolver/mod.rs +++ b/kclvm/sema/src/resolver/mod.rs @@ -1,5 +1,6 @@ mod arg; mod attr; +pub mod cache; mod calculation; mod config; pub mod doc; @@ -35,6 +36,7 @@ use kclvm_ast::ast::AstIndex; use kclvm_ast::ast::Program; use kclvm_error::*; +use self::cache::CachedScope; use self::scope::{builtin_scope, ProgramScope}; /// Resolver is responsible for program semantic checking, mainly @@ -167,15 +169,26 @@ impl Default for Options { /// Resolve program with default options. #[inline] pub fn resolve_program(program: &mut Program) -> ProgramScope { - resolve_program_with_opts(program, Options::default()) + resolve_program_with_opts(program, Options::default(), None) } /// Resolve program with options. See [Options] -pub fn resolve_program_with_opts(program: &mut Program, opts: Options) -> ProgramScope { +pub fn resolve_program_with_opts( + program: &mut Program, + opts: Options, + cached_scope: Option, +) -> ProgramScope { pre_process_program(program, &opts); let mut resolver = Resolver::new(program, opts.clone()); + if let Some(mut cached_scope) = cached_scope { + cached_scope.update(program); + resolver.scope_map = cached_scope.scope_map; + resolver.scope_map.remove(kclvm_ast::MAIN_PKG); + resolver.node_ty_map = cached_scope.node_ty_map + } resolver.resolve_import(); let scope = resolver.check_and_lint(kclvm_ast::MAIN_PKG); + if opts.type_alise { let type_alias_mapping = resolver.ctx.type_alias_mapping.clone(); process_program_type_alias(program, type_alias_mapping); diff --git a/kclvm/sema/src/resolver/tests.rs b/kclvm/sema/src/resolver/tests.rs index 0bab904f9..2890f4984 100644 --- a/kclvm/sema/src/resolver/tests.rs +++ b/kclvm/sema/src/resolver/tests.rs @@ -2,7 +2,9 @@ use super::Options; use super::Resolver; use crate::builtin::BUILTIN_FUNCTION_NAMES; use crate::pre_process::pre_process_program; +use crate::resolver::cache::CachedScope; use crate::resolver::resolve_program; +use crate::resolver::resolve_program_with_opts; use crate::resolver::scope::*; use crate::ty::{Type, TypeKind}; use kclvm_ast::ast; @@ -45,6 +47,37 @@ fn test_resolve_program() { assert!(main_scope.lookup("print").is_none()); } +#[test] +fn test_resolve_program_with_cache() { + let mut program = parse_program("./src/resolver/test_data/assign.k").unwrap(); + + let scope = resolve_program_with_opts( + &mut program, + Options { + merge_program: false, + type_alise: false, + ..Default::default() + }, + None, + ); + let cached_scope = CachedScope::new(&scope, &program); + let scope = resolve_program_with_opts( + &mut program, + Options { + merge_program: false, + type_alise: false, + ..Default::default() + }, + Some(cached_scope), + ); + assert_eq!(scope.pkgpaths(), vec!["__main__".to_string()]); + let main_scope = scope.main_scope().unwrap(); + let main_scope = main_scope.borrow_mut(); + assert!(main_scope.lookup("a").is_some()); + assert!(main_scope.lookup("b").is_some()); + assert!(main_scope.lookup("print").is_none()); +} + #[test] fn test_pkg_init_in_schema_resolve() { let sess = Arc::new(ParseSession::default()); diff --git a/kclvm/tools/src/LSP/src/util.rs b/kclvm/tools/src/LSP/src/util.rs index ebf113a90..3fab6c3cb 100644 --- a/kclvm/tools/src/LSP/src/util.rs +++ b/kclvm/tools/src/LSP/src/util.rs @@ -91,6 +91,7 @@ pub(crate) fn parse_param_and_compile( type_alise: false, ..Default::default() }, + None, ); let gs = GlobalState::default(); diff --git a/kclvm/tools/src/lint/mod.rs b/kclvm/tools/src/lint/mod.rs index 6d4cdc719..c2340d8d4 100644 --- a/kclvm/tools/src/lint/mod.rs +++ b/kclvm/tools/src/lint/mod.rs @@ -88,6 +88,7 @@ pub fn lint_files( merge_program: false, ..Default::default() }, + None, ) .handler .diagnostics,