From dd8ee8a3f9c01368877bf31f13339cf631659860 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Sat, 27 Jan 2024 18:35:30 -0800 Subject: [PATCH 1/3] LSP parse buffer: keep old parses alive whlie calling change listener. When updating the parse buffers, make sure the old ones don't disappear while we still call the callbacks for everyone interested in the changes. They might've hold on to some data and expect the old one to be still viable when they get the update call. Add a unit test. Issues: #2078 While at it: update and refine documentation. --- verilog/tools/ls/BUILD | 11 ++ verilog/tools/ls/lsp-parse-buffer.cc | 21 ++-- verilog/tools/ls/lsp-parse-buffer.h | 29 +++--- verilog/tools/ls/lsp-parse-buffer_test.cc | 120 ++++++++++++++++++++++ 4 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 verilog/tools/ls/lsp-parse-buffer_test.cc diff --git a/verilog/tools/ls/BUILD b/verilog/tools/ls/BUILD index 286c052ac..9498fd529 100644 --- a/verilog/tools/ls/BUILD +++ b/verilog/tools/ls/BUILD @@ -52,6 +52,17 @@ cc_library( ], ) +cc_test( + name = "lsp-parse-buffer_test", + srcs = ["lsp-parse-buffer_test.cc"], + deps = [ + ":lsp-parse-buffer", + "//common/lsp:lsp-text-buffer", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "verible-lsp-adapter", srcs = ["verible-lsp-adapter.cc"], diff --git a/verilog/tools/ls/lsp-parse-buffer.cc b/verilog/tools/ls/lsp-parse-buffer.cc index 4359b1be9..d93c387bf 100644 --- a/verilog/tools/ls/lsp-parse-buffer.cc +++ b/verilog/tools/ls/lsp-parse-buffer.cc @@ -50,20 +50,19 @@ ParsedBuffer::ParsedBuffer(int64_t version, absl::string_view uri, content, uri)) { VLOG(1) << "Analyzed " << uri << " lex:" << parser_->LexStatus() << "; parser:" << parser_->ParseStatus() << std::endl; - // TODO(hzeller): we should use a filename not URI; strip prefix. + // TODO(hzeller): should we use a filename not URI ? if (auto lint_result = RunLinter(uri, *parser_); lint_result.ok()) { lint_statuses_ = std::move(lint_result.value()); } } -void BufferTracker::Update(const std::string &filename, +void BufferTracker::Update(const std::string &uri, const verible::lsp::EditTextBuffer &txt) { if (current_ && current_->version() == txt.last_global_version()) { return; // Nothing to do (we don't really expect this to happen) } - txt.RequestContent([&txt, &filename, this](absl::string_view content) { - current_ = std::make_shared(txt.last_global_version(), - filename, content); + txt.RequestContent([&txt, &uri, this](absl::string_view content) { + current_.reset(new ParsedBuffer(txt.last_global_version(), uri, content)); }); if (current_->parsed_successfully()) { last_good_ = current_; @@ -74,9 +73,19 @@ verible::lsp::BufferCollection::UriBufferCallback BufferTrackerContainer::GetSubscriptionCallback() { return [this](const std::string &uri, const verible::lsp::EditTextBuffer *txt) { + // The Update() might replace, thus discard, old parsed buffers. + // However, the change listeners we're about to inform might + // expect them to be still alive while the update takes place. + // So hold on to them here until all updates are performed. + // (this copy is cheap as it is just reference counted pointers). + BufferTracker remember_previous; + if (const BufferTracker *tracker = FindBufferTrackerOrNull(uri)) { + remember_previous = *tracker; + } + if (txt) { const BufferTracker *tracker = Update(uri, *txt); - // Now inform our listeners. + // Updated current() and last_good(); Now inform our listeners. for (const auto &change_listener : change_listeners_) { change_listener(uri, tracker); } diff --git a/verilog/tools/ls/lsp-parse-buffer.h b/verilog/tools/ls/lsp-parse-buffer.h index 4e263ba4a..945267ee0 100644 --- a/verilog/tools/ls/lsp-parse-buffer.h +++ b/verilog/tools/ls/lsp-parse-buffer.h @@ -63,13 +63,16 @@ class ParsedBuffer { std::vector lint_statuses_; }; -// A buffer tracker tracks the EditTextBuffer content and keeps up to -// two versions of ParsedBuffers - the latest, that might have parse errors, -// and the last known good that parsed without errors (if available). +// A buffer tracker tracks of a single file EditTextBuffer content and stores +// a parsed version. +// It keeps up to two versions of ParsedBuffers - the latest, that might have +// parse errors, and the last known good that parsed without errors +// (if available). class BufferTracker { public: - void Update(const std::string &filename, - const verible::lsp::EditTextBuffer &txt); + // Update with a changed text buffer from the LSP subsystem. Triggers + // re-parsing and updating our current() and potentially last_good(). + void Update(const std::string &uri, const verible::lsp::EditTextBuffer &txt); // --- // Thread guarantee for the following functions. @@ -80,10 +83,10 @@ class BufferTracker { // --- // Get the current ParsedBuffer from the last text update we received - // from the editor. This can be nullptr if it could not be parsed. + // from the editor. // - // Use in operations that only really makes sense on the latest view and - // only if it was parseable, e.g. suggesting edits. + // Use in operations that only really makes sense on the latest view, + // e.g. suggesting edits. std::shared_ptr current() const { return current_; } // Get the ParsedBuffer that represents that last time we were able to @@ -91,8 +94,9 @@ class BufferTracker { // as current() if the last text update was fully parseable, or nullptr // if we never received a buffer that was parseable. // - // Use in operations that focus on returning something even it it is slightly - // outdated, e.g. finding a particular symbol. + // Use in operations that focus on returning something that requires a + // valid parsed file even it it is slightly outdated, e.g. finding + // a particular symbol. std::shared_ptr last_good() const { return last_good_; } private: @@ -107,9 +111,10 @@ class BufferTracker { std::shared_ptr last_good_; }; -// Container holding all buffer trackers keyed by file uri. +// Container holding a buffer tracker per file uri. // This is the correspondent to verible::lsp::BufferCollection that -// internally stores +// internally stores file content by uri. Here we keep parsed files per uri, +// whenever we're informed of a change in the buffer collection. class BufferTrackerContainer { public: // Return a callback that allows to subscribe to an lsp::BufferCollection diff --git a/verilog/tools/ls/lsp-parse-buffer_test.cc b/verilog/tools/ls/lsp-parse-buffer_test.cc new file mode 100644 index 000000000..2bb3dc15a --- /dev/null +++ b/verilog/tools/ls/lsp-parse-buffer_test.cc @@ -0,0 +1,120 @@ +// Copyright 2024 The Verible Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "verilog/tools/ls/lsp-parse-buffer.h" + +#include "common/lsp/lsp-text-buffer.h" +#include "gtest/gtest.h" + +namespace verilog { +namespace { +TEST(BufferTraccker, ParseGoodDocument) { + verible::lsp::EditTextBuffer good_document("module foo(); endmodule"); + BufferTracker tracker; + EXPECT_EQ(tracker.current().get(), nullptr); + tracker.Update("foo.sv", good_document); + + ASSERT_NE(tracker.current().get(), nullptr); + + // Successfully parsed, so last_good() == current() + EXPECT_EQ(tracker.last_good().get(), tracker.current().get()); +} + +TEST(BufferTraccker, ParseParseErrorDocument) { + verible::lsp::EditTextBuffer bad_document("moduleinvalid foo(); endmodule"); + BufferTracker tracker; + tracker.Update("foo.sv", bad_document); + + ASSERT_NE(tracker.current().get(), nullptr); // a current document, maybe bad + + // Not successfully parsed, so last_good() is still null. + ASSERT_EQ(tracker.last_good().get(), nullptr); +} + +TEST(BufferTrackerConatainer, PopulateBufferCollection) { + BufferTrackerContainer container; + auto feed_callback = container.GetSubscriptionCallback(); + + verible::lsp::EditTextBuffer foo_doc("module foo(); endmodule"); + feed_callback("foo.sv", &foo_doc); + const BufferTracker *tracker = container.FindBufferTrackerOrNull("foo.sv"); + ASSERT_NE(tracker, nullptr); + EXPECT_NE(tracker->last_good().get(), nullptr); + + verible::lsp::EditTextBuffer bar_doc("modulebroken bar(); endmodule"); + feed_callback("bar.sv", &bar_doc); + tracker = container.FindBufferTrackerOrNull("bar.sv"); + ASSERT_NE(tracker, nullptr); + EXPECT_NE(tracker->current().get(), nullptr); // Current always there + EXPECT_EQ(tracker->last_good().get(), nullptr); // Was a bad parse + + feed_callback("bar.sv", nullptr); // Remove. + tracker = container.FindBufferTrackerOrNull("bar.sv"); + EXPECT_EQ(tracker, nullptr); +} + +TEST(BufferTrackerConatainer, ParseUpdateNotification) { + BufferTrackerContainer container; + + const verible::TextStructureView *last_text_structure = nullptr; + int update_remove_count = 0; // track these + container.AddChangeListener([&update_remove_count, &last_text_structure]( + const std::string &, + const BufferTracker *tracker) { + if (tracker != nullptr) { + ++update_remove_count; + } else { + --update_remove_count; + }; + + // attempt to access the previous text structure if it was not null to + // make sure it was not deleted. If it was, then this UB might only be + // detected in the ASAN test. + if (last_text_structure) { + // Accessing the content and see if it is alive and well, not corrupted. + EXPECT_TRUE(absl::StartsWith(last_text_structure->Contents(), "module")); + } + + // Remember current text structure for last time. + if (tracker) { + last_text_structure = &tracker->current()->parser().Data(); + } else { + last_text_structure = nullptr; + } + }); + + EXPECT_EQ(update_remove_count, 0); + + auto feed_callback = container.GetSubscriptionCallback(); + // Put one document in there. + verible::lsp::EditTextBuffer foo_doc("module foo(); endmodule"); + feed_callback("foo.sv", &foo_doc); + + EXPECT_EQ(update_remove_count, 1); + + const BufferTracker *tracker = container.FindBufferTrackerOrNull("foo.sv"); + ASSERT_NE(tracker, nullptr); + EXPECT_NE(tracker->last_good().get(), nullptr); + + verible::lsp::EditTextBuffer updated_foo_doc("module foobar(); endmodule"); + feed_callback("foo.sv", &updated_foo_doc); + EXPECT_EQ(update_remove_count, 2); + + // Remove doc + feed_callback("foo.sv", nullptr); + EXPECT_EQ(update_remove_count, 1); +} + +} // namespace +} // namespace verilog From 30a413a4988e155623fea4eab672d88d706f182e Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Sat, 27 Jan 2024 18:39:00 -0800 Subject: [PATCH 2/3] Provide erase() for interval_set and string_memory_map. ... needed later in verilog_project to unregeister string-view ranges. Issues: #2078 --- common/strings/string_memory_map.h | 3 +++ common/strings/string_memory_map_test.cc | 22 ++++++++++++++++++++++ common/util/interval_set.h | 3 +++ common/util/interval_set_test.cc | 20 ++++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/common/strings/string_memory_map.h b/common/strings/string_memory_map.h index 350e37a5a..75f270750 100644 --- a/common/strings/string_memory_map.h +++ b/common/strings/string_memory_map.h @@ -65,6 +65,9 @@ class StringViewSuperRangeMap { return string_map_.find({substring.begin(), substring.end()}); } + // Erase given range. + const_iterator erase(const_iterator pos) { return string_map_.erase(pos); } + // Similar to find(), but asserts that a superstring range is found, // and converts the result directly to a string_view. absl::string_view must_find(absl::string_view substring) const { diff --git a/common/strings/string_memory_map_test.cc b/common/strings/string_memory_map_test.cc index a4e053210..4e3cde726 100644 --- a/common/strings/string_memory_map_test.cc +++ b/common/strings/string_memory_map_test.cc @@ -106,6 +106,28 @@ TEST(StringViewSuperRangeMapTest, TwoStrings) { }); } +TEST(StringViewSuperRangeMapTest, EraseString) { + constexpr absl::string_view text1("onestring"); + constexpr absl::string_view text2("another"); + StringViewSuperRangeMap svmap; + svmap.must_emplace(text1); + svmap.must_emplace(text2); + + auto found = svmap.find(text1); + EXPECT_NE(found, svmap.end()); + svmap.erase(found); + + // should be gone now + found = svmap.find(text1); + EXPECT_EQ(found, svmap.end()); + + found = svmap.find(text2); + EXPECT_NE(found, svmap.end()); + svmap.erase(found); + + EXPECT_TRUE(svmap.empty()); +} + // Function to get the owned address range of the underlying string. static absl::string_view StringViewKey( const std::unique_ptr &owned) { diff --git a/common/util/interval_set.h b/common/util/interval_set.h index 85f9c9a42..b39348e7c 100644 --- a/common/util/interval_set.h +++ b/common/util/interval_set.h @@ -625,6 +625,9 @@ class DisjointIntervalSet : private internal::IntervalSetImpl { return p; } + // Erase given interval. + const_iterator erase(const_iterator pos) { return intervals_.erase(pos); } + // Same as emplace(), but fails fatally if emplacement fails, // and only returns the iterator to the new map entry (which should have // consumed 'value'). diff --git a/common/util/interval_set_test.cc b/common/util/interval_set_test.cc index b4345a471..2f56ef1ef 100644 --- a/common/util/interval_set_test.cc +++ b/common/util/interval_set_test.cc @@ -1123,6 +1123,26 @@ TEST(DisjointIntervalSetTest, MustEmplaceOverlapsUpper) { EXPECT_DEATH(iset.must_emplace(45, 55), "Failed to emplace"); } +TEST(DisjointIntervalSetTest, EraseRange) { + IntIntervalSet iset; + iset.must_emplace(30, 40); + iset.must_emplace(50, 60); + + auto found = iset.find(35); + EXPECT_NE(found, iset.end()); + iset.erase(found); + + // ... should be gone now. + found = iset.find(35); + EXPECT_EQ(found, iset.end()); + + found = iset.find(55); + EXPECT_NE(found, iset.end()); + iset.erase(found); + + EXPECT_TRUE(iset.empty()); +} + TEST(DisjointIntervalMapTest, FindInterval) { IntIntervalSet iset; iset.must_emplace(20, 25); From 3afdb923501b897c037cf8afb957c8f28c55a6cb Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Sat, 27 Jan 2024 18:40:26 -0800 Subject: [PATCH 3/3] Unregister string view ranges on removed files. When files are updated and old ones removed, make sure that the corresponding index for the substring ranges is also updated. Move the data and logic needed for the substring index into its own class ContentToFileIndex. Since we _can_ removed string ranges now, we don't really have to choose anymore if we want to populate string view maps; still providing this feature for now, but calling it provide_lookup_file_origin, as this is what it provides. Make ParsedVerilogSourceFile take an Analyzer instead of a TextStructureView to better emphasize that this is essentially the same as the other VerilogSourceFiles, just with a non-owned Analyzer. This also allows to return the parse absl::Status instead of a potentially non-truthful ok(). Updating plumbing needed for the passing-an-analyzer. Add test for updating the project and verify that no stale string view index is kept (the original issue in #2078). Fixes: #2078 --- verilog/analysis/verilog_project.cc | 156 +++++++++--------- verilog/analysis/verilog_project.h | 135 +++++++++------ verilog/analysis/verilog_project_test.cc | 75 ++++++++- .../indexing_facts_tree_extractor_test.cc | 2 +- .../tools/kythe/verilog_kythe_extractor.cc | 2 +- verilog/tools/ls/autoexpand_test.cc | 2 +- verilog/tools/ls/symbol-table-handler.cc | 4 +- verilog/tools/ls/symbol-table-handler.h | 2 +- verilog/tools/ls/verilog-language-server.cc | 2 +- 9 files changed, 238 insertions(+), 142 deletions(-) diff --git a/verilog/analysis/verilog_project.cc b/verilog/analysis/verilog_project.cc index 5789defc7..1ae3aa4cf 100644 --- a/verilog/analysis/verilog_project.cc +++ b/verilog/analysis/verilog_project.cc @@ -42,7 +42,7 @@ static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{ }; VerilogSourceFile::VerilogSourceFile(absl::string_view referenced_path, - const absl::Status& status) + const absl::Status &status) : referenced_path_(referenced_path), status_(status) {} absl::Status VerilogSourceFile::Open() { @@ -89,7 +89,7 @@ absl::Status VerilogSourceFile::Parse() { return status_; } -const verible::TextStructureView* VerilogSourceFile::GetTextStructure() const { +const verible::TextStructureView *VerilogSourceFile::GetTextStructure() const { if (analyzed_structure_ == nullptr) return nullptr; return &analyzed_structure_->Data(); } @@ -101,8 +101,8 @@ std::vector VerilogSourceFile::ErrorMessages() const { return result; } -std::ostream& operator<<(std::ostream& stream, - const VerilogSourceFile& source) { +std::ostream &operator<<(std::ostream &stream, + const VerilogSourceFile &source) { stream << "referenced path: " << source.ReferencedPath() << std::endl; stream << "resolved path: " << source.ResolvedPath() << std::endl; stream << "corpus: " << source.Corpus() << std::endl; @@ -110,36 +110,48 @@ std::ostream& operator<<(std::ostream& stream, stream << "status: " << (status.ok() ? "ok" : status.message()) << std::endl; const auto content = source.GetContent(); stream << "have content? " << (!content.empty() ? "yes" : "no") << std::endl; - const auto* text_structure = source.GetTextStructure(); + const auto *text_structure = source.GetTextStructure(); stream << "have text structure? " << (text_structure ? "yes" : "no") << std::endl; return stream; } -absl::Status InMemoryVerilogSourceFile::Open() { - processing_state_ = ProcessingState::kOpened; - status_ = absl::OkStatus(); - return status_; +void VerilogProject::ContentToFileIndex::Register( + const VerilogSourceFile *file) { + CHECK(file); + const absl::string_view content = file->GetContent(); + string_view_map_.must_emplace(content); + const auto map_inserted = + buffer_to_analyzer_map_.emplace(content.begin(), file); + CHECK(map_inserted.second); } -absl::Status ParsedVerilogSourceFile::Open() { - processing_state_ = ProcessingState::kOpened; - status_ = absl::OkStatus(); - return status_; +void VerilogProject::ContentToFileIndex::Unregister( + const VerilogSourceFile *file) { + CHECK(file); + const absl::string_view content = file->GetContent(); + auto full_content_found = string_view_map_.find(content); + CHECK(full_content_found != string_view_map_.end()); + string_view_map_.erase(full_content_found); + buffer_to_analyzer_map_.erase(content.begin()); } -absl::Status ParsedVerilogSourceFile::Parse() { - processing_state_ = ProcessingState::kParsed; - status_ = absl::OkStatus(); - return status_; -} +const VerilogSourceFile *VerilogProject::ContentToFileIndex::Lookup( + absl::string_view content_substring) const { + // Look for corresponding source text (superstring) buffer start. + const auto found_superstring = string_view_map_.find(content_substring); + if (found_superstring == string_view_map_.end()) return nullptr; + const absl::string_view::const_iterator buffer_start = + found_superstring->first; -const verible::TextStructureView* ParsedVerilogSourceFile::GetTextStructure() - const { - return text_structure_; + // Reverse-lookup originating file based on buffer start. + const auto found_file = buffer_to_analyzer_map_.find(buffer_start); + if (found_file == buffer_to_analyzer_map_.end()) return nullptr; + + return found_file->second; } -absl::StatusOr VerilogProject::OpenFile( +absl::StatusOr VerilogProject::OpenFile( absl::string_view referenced_filename, absl::string_view resolved_filename, absl::string_view corpus) { const auto inserted = files_.emplace( @@ -147,44 +159,37 @@ absl::StatusOr VerilogProject::OpenFile( referenced_filename, resolved_filename, corpus)); CHECK(inserted.second); // otherwise, would have already returned above const auto file_iter = inserted.first; - VerilogSourceFile& file(*file_iter->second); + VerilogSourceFile &file(*file_iter->second); // Read the file's contents. if (absl::Status status = file.Open(); !status.ok()) { return status; } - // NOTE: string view maps don't support removal operation. The following block - // is valid only if files won't be removed from the project. - if (populate_string_maps_) { - const absl::string_view contents(file.GetContent()); - - // Register the file's contents range in string_view_map_. - string_view_map_.must_emplace(contents); - - // Map the start of the file's contents to its corresponding owner - // VerilogSourceFile. - const auto map_inserted = - buffer_to_analyzer_map_.emplace(contents.begin(), file_iter); - CHECK(map_inserted.second); - } + if (content_index_) content_index_->Register(&file); return &file; } +bool VerilogProject::RemoveByName(const std::string &filename) { + NameToFileMap::const_iterator found = files_.find(filename); + if (found == files_.end()) return false; + if (content_index_) content_index_->Unregister(found->second.get()); + files_.erase(found); + return true; +} + +// TODO: explain better in the header what happens with includes. bool VerilogProject::RemoveRegisteredFile( absl::string_view referenced_filename) { - CHECK(!populate_string_maps_) - << "Removing of files added to string maps is not supported! Disable " - "populating string maps."; - if (files_.erase(std::string(referenced_filename)) == 1) { + if (RemoveByName(std::string(referenced_filename))) { LOG(INFO) << "Removed " << referenced_filename << " from the project."; return true; } - for (const auto& include_path : include_paths_) { + for (const auto &include_path : include_paths_) { const std::string resolved_filename = verible::file::JoinPath(include_path, referenced_filename); - if (files_.erase(resolved_filename) == 1) { + if (RemoveByName(resolved_filename)) { LOG(INFO) << "Removed " << resolved_filename << " from the project."; return true; } @@ -204,28 +209,39 @@ std::string VerilogProject::GetRelativePathToSource( } void VerilogProject::UpdateFileContents( - absl::string_view path, const verible::TextStructureView* updatedtext) { - std::string projectpath = GetRelativePathToSource(path); + absl::string_view path, const verilog::VerilogAnalyzer *parsed) { + constexpr absl::string_view kCorpus = ""; + const std::string projectpath = GetRelativePathToSource(path); // If we get a non-null parsed file, use that, otherwise fall back to // file based loading. - std::unique_ptr contents = nullptr; - if (updatedtext) { - contents = std::make_unique( - projectpath, path, updatedtext, /*corpus=*/""); + std::unique_ptr source_file; + bool do_register_content = true; + if (parsed) { + source_file = std::make_unique(projectpath, path, + *parsed, kCorpus); } else { - contents = std::make_unique(projectpath, path, ""); + source_file = + std::make_unique(projectpath, path, kCorpus); + do_register_content = source_file->Open().ok(); } - auto fileptr = files_.find(projectpath); - if (fileptr == files_.end()) { - files_.emplace(projectpath, std::move(contents)); + if (content_index_ && do_register_content) { + content_index_->Register(source_file.get()); + } + + auto previously_existing = files_.find(projectpath); + if (previously_existing == files_.end()) { + files_.emplace(projectpath, std::move(source_file)); } else { - fileptr->second = std::move(contents); + if (content_index_) { + content_index_->Unregister(previously_existing->second.get()); + } + previously_existing->second = std::move(source_file); } } -VerilogSourceFile* VerilogProject::LookupRegisteredFileInternal( +VerilogSourceFile *VerilogProject::LookupRegisteredFileInternal( absl::string_view referenced_filename) const { const auto opened_file = FindOpenedFile(referenced_filename); if (opened_file) { @@ -236,7 +252,7 @@ VerilogSourceFile* VerilogProject::LookupRegisteredFileInternal( } // Check if this is already opened include file - for (const auto& include_path : include_paths_) { + for (const auto &include_path : include_paths_) { const std::string resolved_filename = verible::file::JoinPath(include_path, referenced_filename); const auto opened_file = FindOpenedFile(resolved_filename); @@ -247,7 +263,7 @@ VerilogSourceFile* VerilogProject::LookupRegisteredFileInternal( return nullptr; } -absl::optional> +absl::optional> VerilogProject::FindOpenedFile(absl::string_view filename) const { const auto found = files_.find(filename); if (found != files_.end()) { @@ -259,7 +275,7 @@ VerilogProject::FindOpenedFile(absl::string_view filename) const { return absl::nullopt; } -absl::StatusOr VerilogProject::OpenTranslationUnit( +absl::StatusOr VerilogProject::OpenTranslationUnit( absl::string_view referenced_filename) { // Check for a pre-existing entry to avoid duplicate files. { @@ -290,7 +306,7 @@ absl::Status VerilogProject::IncludeFileNotFoundError( "' among the included paths: ", absl::StrJoin(include_paths_, ", "))); } -absl::StatusOr VerilogProject::OpenIncludedFile( +absl::StatusOr VerilogProject::OpenIncludedFile( absl::string_view referenced_filename) { VLOG(1) << __FUNCTION__ << ", referenced: " << referenced_filename; // Check for a pre-existing entry to avoid duplicate files. @@ -302,7 +318,7 @@ absl::StatusOr VerilogProject::OpenIncludedFile( } // Check if this is already opened include file - for (const auto& include_path : include_paths_) { + for (const auto &include_path : include_paths_) { const std::string resolved_filename = verible::file::JoinPath(include_path, referenced_filename); const auto opened_file = FindOpenedFile(resolved_filename); @@ -312,7 +328,7 @@ absl::StatusOr VerilogProject::OpenIncludedFile( } // Locate the file among the base paths. - for (const auto& include_path : include_paths_) { + for (const auto &include_path : include_paths_) { const std::string resolved_filename = verible::file::JoinPath(include_path, referenced_filename); if (verible::file::FileExists(resolved_filename).ok()) { @@ -349,22 +365,10 @@ void VerilogProject::AddVirtualFile(absl::string_view resolved_filename, } } -const VerilogSourceFile* VerilogProject::LookupFileOrigin( +const VerilogSourceFile *VerilogProject::LookupFileOrigin( absl::string_view content_substring) const { - CHECK(populate_string_maps_) - << "Populating string maps must be enabled for LookupFileOrigin!"; - // Look for corresponding source text (superstring) buffer start. - const auto found_superstring = string_view_map_.find(content_substring); - if (found_superstring == string_view_map_.end()) return nullptr; - const absl::string_view::const_iterator buffer_start = - found_superstring->first; - - // Reverse-lookup originating file based on buffer start. - const auto found_file = buffer_to_analyzer_map_.find(buffer_start); - if (found_file == buffer_to_analyzer_map_.end()) return nullptr; - - const VerilogSourceFile* file = found_file->second->second.get(); - return file; + CHECK(content_index_) << "LookupFileOrigin() not enabled in constructor"; + return content_index_->Lookup(content_substring); } } // namespace verilog diff --git a/verilog/analysis/verilog_project.h b/verilog/analysis/verilog_project.h index 3f1aca422..e17c4f243 100644 --- a/verilog/analysis/verilog_project.h +++ b/verilog/analysis/verilog_project.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,8 @@ class InMemoryVerilogSourceFile final : public VerilogSourceFile { absl::string_view corpus = "") : VerilogSourceFile(filename, filename, corpus) { content_ = std::move(content); + processing_state_ = ProcessingState::kOpened; // Advance state + status_ = absl::OkStatus(); } // Legacy @@ -187,77 +190,78 @@ class InMemoryVerilogSourceFile final : public VerilogSourceFile { filename, std::make_shared(contents), corpus) {} - // Load text into analyzer structure without actually opening a file. - absl::Status Open() final; + // Nothing to do, content already loaded. + absl::Status Open() final { return absl::OkStatus(); } }; -// Source file that was already parsed and got its own TextStructure. +// Source file that was already parsed. // Doesn't require file-system access, nor create temporary files. +// TODO: reorg class hierachy. The base class contains an owened analyzed +// structure; simplify to only track ownership in one place. class ParsedVerilogSourceFile final : public VerilogSourceFile { public: - // this constructor is used for updating file contents in the language server - // it sets referenced_path and resolved_path based on URI from language server + // Construct with an already existing VerilogAnalyzer that already + // parsed its content. + // Ownership if "analyzer" is not taken over, must outlive this objet. ParsedVerilogSourceFile(absl::string_view referenced_path, absl::string_view resolved_path, - const verible::TextStructureView *text_structure, + const verilog::VerilogAnalyzer &analyzer, absl::string_view corpus = "") : VerilogSourceFile(referenced_path, resolved_path, corpus), - text_structure_(text_structure) {} - - // filename can be fake, it is not used to open any file. - // text_structure is a pointer to a TextStructureView object of - // already parsed file. Current implementation does _not_ make a - // copy of it and expects it will be available for the lifetime of - // object of this class. - ParsedVerilogSourceFile(absl::string_view filename, - const verible::TextStructureView *text_structure, - absl::string_view corpus = "") - : VerilogSourceFile(filename, filename, corpus), - text_structure_(text_structure) {} + not_owned_analyzer_(&analyzer) { + processing_state_ = ProcessingState::kParsed; // Advance to full parsed. + status_ = analyzer.ParseStatus(); + } // Do nothing (file contents already loaded) - absl::Status Open() final; + absl::Status Open() final { return absl::OkStatus(); } // Do nothing (contents already parsed) - absl::Status Parse() final; + absl::Status Parse() final { return status_; } // Return TextStructureView provided previously in constructor - const verible::TextStructureView *GetTextStructure() const final; + const verible::TextStructureView *GetTextStructure() const final { + return ¬_owned_analyzer_->Data(); + } + // Return string-view content range of text structure. absl::string_view GetContent() const final { - return text_structure_->Contents(); + return not_owned_analyzer_->Data().Contents(); } private: - const verible::TextStructureView *const text_structure_; + // TODO: make some sort of owned/non-owned version so that we don't have + // two different types. + const verilog::VerilogAnalyzer *const not_owned_analyzer_; }; // VerilogProject represents a set of files as a cohesive unit of compilation. // Files can include top-level translation units and preprocessor included // files. This is responsible for owning string memory that corresponds -// to files' contents. +// to files' contents and analysis results. class VerilogProject { // Collection of per-file metadata and analyzer objects // key: referenced file name (as opposed to resolved filename) - using file_set_type = + using NameToFileMap = std::map, VerilogSourceFile::Less>; public: - using iterator = file_set_type::iterator; - using const_iterator = file_set_type::const_iterator; + using iterator = NameToFileMap::iterator; + using const_iterator = NameToFileMap::const_iterator; - // Constructor. Note that `populate_string_maps` (populating internal string - // view maps) fragments the class's usage. Enabling it prevents removing files - // from the project (which is required for Kythe facts extraction). + // Construct VerilogProject with a choice of allowing to look up file + // origin. VerilogProject(absl::string_view root, const std::vector &include_paths, absl::string_view corpus = "", - bool populate_string_maps = true) + bool provide_lookup_file_origin = true) : translation_unit_root_(root), - include_paths_(include_paths), corpus_(corpus), - populate_string_maps_(populate_string_maps) {} + include_paths_(include_paths), + content_index_(provide_lookup_file_origin + ? std::make_optional() + : std::nullopt) {} VerilogProject(const VerilogProject &) = delete; VerilogProject(VerilogProject &&) = delete; @@ -289,7 +293,7 @@ class VerilogProject { absl::string_view referenced_filename); // Adds an already opened file by directly passing its content. - // TODO(refactor): is this needed ? + // This is needed in external kythe backends. void AddVirtualFile(absl::string_view resolved_filename, absl::string_view content); @@ -317,9 +321,15 @@ class VerilogProject { // Returns relative path to the VerilogProject std::string GetRelativePathToSource(absl::string_view absolute_filepath); - // Updates file from external source, e.g. Language Server + // Updates file from external source with an already parsed content. + // (e.g. Language Server). + // "parsed" needs to be a VerilogAnalyzer that is valid until the given + // path is removed. + // If "parsed" is nullptr, the old parsed file is removed and replaced + // with a standard VerilogSourceFile, reading from a filesystem. + // (TODO: this is a fairly specific functionality; make this composed). void UpdateFileContents(absl::string_view path, - const verible::TextStructureView *updatedtext); + const verilog::VerilogAnalyzer *parsed); // Adds include directory to the project void AddIncludePath(absl::string_view includepath) { @@ -348,37 +358,52 @@ class VerilogProject { absl::optional> FindOpenedFile( absl::string_view filename) const; + // Attempt to remove file and metadata if it exists. Return 'true' on success. + bool RemoveByName(const std::string &filename); + // The path from which top-level translation units are referenced relatively // (often from a file list). This path can be relative or absolute. // Default: the working directory of the invoking process. const std::string translation_unit_root_ = "."; - // The sequence of directories from which to search for `included files. - // These can be absolute, or relative to the process's working directory. - std::vector include_paths_; - // The corpus to which this project belongs (e.g., // 'github.com/chipsalliance/verible'). const std::string corpus_; - // If true, opening a file will add its contents to string_view_map_ and - // buffer_to_analyzer_map_. NOTE: string view maps don't support removal - // operation. Setting this option prevents removing of the files from the - // project (removing the files leads to undefined behavior). - const bool populate_string_maps_; + // The sequence of directories from which to search for `included files. + // These can be absolute, or relative to the process's working directory. + std::vector include_paths_; // Set of opened files, keyed by referenced (not resolved) filename. - file_set_type files_; - - // Maps any string_view (substring) to its full source file text - // (superstring). - verible::StringViewSuperRangeMap string_view_map_; + NameToFileMap files_; + + // Index files by content substrings. Any substring in any of the files + // allows to look up the corresponding VerilogSourceFile object. + class ContentToFileIndex { + public: + // Put content range of file into index. + void Register(const VerilogSourceFile *file); + + // Remove given file from the index. + void Unregister(const VerilogSourceFile *file); + + // Given a memory subrange of any of the indexed files, return the + // corresponding file or nullptr if none of the files contains that range. + const VerilogSourceFile *Lookup(absl::string_view content_substring) const; + + private: + // Maps any string_view (substring) to its full source file text + // (superstring). + verible::StringViewSuperRangeMap string_view_map_; + + // Maps start of text buffer to its corresponding analyzer object. + // key: the starting address of a string buffer belonging to an opened file. + // This can come from the .begin() of any entry in string_view_map_. + std::map + buffer_to_analyzer_map_; + }; - // Maps start of text buffer to its corresponding analyzer object. - // key: the starting address of a string buffer belonging to an opened file. - // This can come from the .begin() of any entry in string_view_map_. - std::map - buffer_to_analyzer_map_; + std::optional content_index_; }; } // namespace verilog diff --git a/verilog/analysis/verilog_project_test.cc b/verilog/analysis/verilog_project_test.cc index 13eeace0f..0cb2828a9 100644 --- a/verilog/analysis/verilog_project_test.cc +++ b/verilog/analysis/verilog_project_test.cc @@ -206,21 +206,20 @@ TEST(InMemoryVerilogSourceFileTest, ParseInvalidFile) { EXPECT_EQ(&text_structure->SyntaxTree(), tree); } -TEST(ParsedVerilogSourceFileTest, ParseValidFile) { +TEST(ParsedVerilogSourceFileTest, PreparsedValidFile) { constexpr absl::string_view text("localparam int p = 1;\n"); std::unique_ptr analyzed_structure = std::make_unique(text, "internal"); absl::Status status = analyzed_structure->Analyze(); EXPECT_TRUE(status.ok()); - const TextStructureView &input_text_structure = analyzed_structure->Data(); - ParsedVerilogSourceFile file("internal", &input_text_structure); + ParsedVerilogSourceFile file("internal", "resolved", *analyzed_structure); // Parse automatically opens. EXPECT_TRUE(file.Parse().ok()); EXPECT_TRUE(file.Status().ok()); const TextStructureView *text_structure = ABSL_DIE_IF_NULL(file.GetTextStructure()); - EXPECT_EQ(&input_text_structure, text_structure); + EXPECT_EQ(&analyzed_structure->Data(), text_structure); const absl::string_view owned_string_range(text_structure->Contents()); EXPECT_EQ(owned_string_range, text); const auto *tokens = &text_structure->TokenStream(); @@ -236,6 +235,24 @@ TEST(ParsedVerilogSourceFileTest, ParseValidFile) { EXPECT_EQ(&text_structure->SyntaxTree(), tree); } +TEST(ParsedVerilogSourceFileTest, PreparsedInvalidValidFile) { + constexpr absl::string_view text("localp_TYPO_aram int p = 1;\n"); + std::unique_ptr analyzed_structure = + std::make_unique(text, "internal"); + absl::Status status = analyzed_structure->Analyze(); + EXPECT_FALSE(status.ok()); + + ParsedVerilogSourceFile file("internal", "resolved", *analyzed_structure); + EXPECT_TRUE(file.Open().ok()); // Already successfully implicitly opened. + EXPECT_FALSE(file.Parse().ok()); // We expect the same parse failure. + EXPECT_FALSE(file.Status().ok()); + const TextStructureView *text_structure = + ABSL_DIE_IF_NULL(file.GetTextStructure()); + EXPECT_EQ(&analyzed_structure->Data(), text_structure); + const absl::string_view owned_string_range(text_structure->Contents()); + EXPECT_EQ(owned_string_range, text); +} + TEST(VerilogProjectTest, NonexistentTranslationUnit) { const auto tempdir = ::testing::TempDir(); VerilogProject project(tempdir, {tempdir}); @@ -266,6 +283,56 @@ TEST(VerilogProjectTest, NonexistentFileLookup) { } } +TEST(VerilogProjectTest, UpdateFileContents) { + // The update file content is used typically by the language server. + // By default, we load a file from the filesystem unless we get it from the + // editor, which then overrides it. If removed, we should fall back to file + // contents. + const auto tempdir = ::testing::TempDir(); + const std::string project_root_dir = JoinPath(tempdir, "srcs"); + EXPECT_TRUE(CreateDir(project_root_dir).ok()); + + // root is director with sources. + VerilogProject project(project_root_dir, {}); + + // Prepare file to be auto-loaded later + constexpr absl::string_view file_content("module foo();\nendmodule\n"); + const ScopedTestFile tf(project_root_dir, file_content); + const absl::string_view reference_name = Basename(tf.filename()); + + VerilogSourceFile *from_file; + absl::string_view search_substring; + + // Push a local analyzed name under the name of the file. + constexpr absl::string_view external_content("localparam int p = 1;\n"); + std::unique_ptr analyzed_structure = + std::make_unique(external_content, "internal"); + project.UpdateFileContents(tf.filename(), analyzed_structure.get()); + + // Look up the file and see that content is the external content + from_file = *project.OpenTranslationUnit(reference_name); + EXPECT_EQ(from_file->GetContent(), external_content); + + // ... and we find our file given the substring. + search_substring = from_file->GetContent().substr(5); + EXPECT_EQ(project.LookupFileOrigin(search_substring), from_file); + + // Updating with empty content, i.e. removing the memory virtual file + // force reading from file. + project.UpdateFileContents(tf.filename(), nullptr); + + // Should be file content. + from_file = *project.OpenTranslationUnit(reference_name); + EXPECT_EQ(from_file->GetContent(), file_content); + + // Looking up by the old substring should not find anything anymore. + EXPECT_EQ(project.LookupFileOrigin(search_substring), nullptr); + + // But we find our file from our substring. + search_substring = from_file->GetContent().substr(5); + EXPECT_EQ(project.LookupFileOrigin(search_substring), from_file); +} + TEST(VerilogProjectTest, LookupFileOriginTest) { const auto tempdir = ::testing::TempDir(); const std::string sources_dir = JoinPath(tempdir, "srcs"); diff --git a/verilog/tools/kythe/indexing_facts_tree_extractor_test.cc b/verilog/tools/kythe/indexing_facts_tree_extractor_test.cc index 7cf158702..5388042e3 100644 --- a/verilog/tools/kythe/indexing_facts_tree_extractor_test.cc +++ b/verilog/tools/kythe/indexing_facts_tree_extractor_test.cc @@ -101,7 +101,7 @@ class SimpleTestProject : public TempDir, public VerilogProject { explicit SimpleTestProject(absl::string_view code_text, const std::vector &include_paths = {}) : VerilogProject(temp_dir_, include_paths, /*corpus=*/"unittest", - /*populate_string_maps=*/false), + /*provide_lookup_file_origin=*/false), code_text_(code_text), translation_unit_(code_text, temp_dir_, [this](absl::string_view full_file_name) diff --git a/verilog/tools/kythe/verilog_kythe_extractor.cc b/verilog/tools/kythe/verilog_kythe_extractor.cc index 17cdf487b..ccd597e4b 100644 --- a/verilog/tools/kythe/verilog_kythe_extractor.cc +++ b/verilog/tools/kythe/verilog_kythe_extractor.cc @@ -211,7 +211,7 @@ Output: Produces Indexing Facts for kythe (http://kythe.io). file_list.preprocessing.include_dirs.end()); verilog::VerilogProject project(file_list_root, include_dir_paths, absl::GetFlag(FLAGS_verilog_project_name), - /*populate_string_maps=*/false); + /*provide_lookup_file_origin=*/false); const std::vector errors( verilog::kythe::ExtractTranslationUnits(file_list_path, &project, diff --git a/verilog/tools/ls/autoexpand_test.cc b/verilog/tools/ls/autoexpand_test.cc index 12144d042..128b8f15b 100644 --- a/verilog/tools/ls/autoexpand_test.cc +++ b/verilog/tools/ls/autoexpand_test.cc @@ -142,7 +142,7 @@ void TestTextEditsWithProject( SymbolTableHandler symbol_table_handler; symbol_table_handler.SetProject(proj); symbol_table_handler.UpdateFileContent(TESTED_FILENAME, - &tracker.current()->parser().Data()); + &tracker.current()->parser()); symbol_table_handler.BuildProjectSymbolTable(); // Run the tested edit function std::vector edits = run.edit_fn(&symbol_table_handler, &tracker); diff --git a/verilog/tools/ls/symbol-table-handler.cc b/verilog/tools/ls/symbol-table-handler.cc index f9a89d100..ca6eb83a3 100644 --- a/verilog/tools/ls/symbol-table-handler.cc +++ b/verilog/tools/ls/symbol-table-handler.cc @@ -365,9 +365,9 @@ void SymbolTableHandler::CollectReferences( } void SymbolTableHandler::UpdateFileContent( - absl::string_view path, const verible::TextStructureView *content) { + absl::string_view path, const verilog::VerilogAnalyzer *parsed) { files_dirty_ = true; - curr_project_->UpdateFileContents(path, content); + curr_project_->UpdateFileContents(path, parsed); } }; // namespace verilog diff --git a/verilog/tools/ls/symbol-table-handler.h b/verilog/tools/ls/symbol-table-handler.h index 11cab5cae..c7f205e43 100644 --- a/verilog/tools/ls/symbol-table-handler.h +++ b/verilog/tools/ls/symbol-table-handler.h @@ -70,7 +70,7 @@ class SymbolTableHandler { // Provide new parsed content for the given path. If "content" is nullptr, // opens the given file instead. void UpdateFileContent(absl::string_view path, - const verible::TextStructureView *content); + const verilog::VerilogAnalyzer *parsed); // Creates a symbol table for entire project (public: needed in unit-test) std::vector BuildProjectSymbolTable(); diff --git a/verilog/tools/ls/verilog-language-server.cc b/verilog/tools/ls/verilog-language-server.cc index b8e141cb7..af8a492e8 100644 --- a/verilog/tools/ls/verilog-language-server.cc +++ b/verilog/tools/ls/verilog-language-server.cc @@ -253,7 +253,7 @@ void VerilogLanguageServer::UpdateEditedFileInProject( } if (!buffer_tracker->last_good()) return; symbol_table_handler_.UpdateFileContent( - path, &buffer_tracker->last_good()->parser().Data()); + path, &buffer_tracker->last_good()->parser()); VLOG(1) << "Updated file: " << uri << " (" << path << ")"; }