From d4a8c9a2c883762021fb19afb5506bd94e6c6426 Mon Sep 17 00:00:00 2001 From: HuangYi Date: Fri, 12 Jan 2024 14:17:58 +0800 Subject: [PATCH 1/5] Problem: memiavl background snapshot rewriting panic when shutdown Solution: - gracefully cancel the task when shutdown --- CHANGELOG.md | 4 ++++ memiavl/db.go | 25 +++++++++++++++++++++---- memiavl/import.go | 3 ++- memiavl/multitree.go | 4 ++-- memiavl/snapshot.go | 25 +++++++++++++++++++++---- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b59b5fade7..eab1081f5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## UNRELEASED + +- [#]() memiavl cancel background snapshot rewriting when graceful shutdown. + *January 5, 2024* ## v1.1.0-rc2 diff --git a/memiavl/db.go b/memiavl/db.go index cdfac6de5f..0ec6d5bee1 100644 --- a/memiavl/db.go +++ b/memiavl/db.go @@ -1,6 +1,7 @@ package memiavl import ( + "context" "errors" "fmt" "os" @@ -50,6 +51,9 @@ type DB struct { // result channel of snapshot rewrite goroutine snapshotRewriteChan chan snapshotResult + // context cancel function to cancel the snapshot rewrite goroutine + snapshotRewriteCancel context.CancelFunc + // the number of old snapshots to keep (excluding the latest one) snapshotKeepRecent uint32 // block interval to take a new snapshot @@ -414,6 +418,7 @@ func (db *DB) checkBackgroundSnapshotRewrite() error { select { case result := <-db.snapshotRewriteChan: db.snapshotRewriteChan = nil + db.snapshotRewriteCancel = nil if result.mtree == nil { // background snapshot rewrite failed @@ -628,7 +633,7 @@ func (db *DB) copy(cacheSize int) *DB { } // RewriteSnapshot writes the current version of memiavl into a snapshot, and update the `current` symlink. -func (db *DB) RewriteSnapshot() error { +func (db *DB) RewriteSnapshot(ctx context.Context) error { db.mtx.Lock() defer db.mtx.Unlock() @@ -639,7 +644,7 @@ func (db *DB) RewriteSnapshot() error { snapshotDir := snapshotName(db.lastCommitInfo.Version) tmpDir := snapshotDir + TmpSuffix path := filepath.Join(db.dir, tmpDir) - if err := db.MultiTree.WriteSnapshot(path, db.snapshotWriterPool); err != nil { + if err := db.MultiTree.WriteSnapshot(ctx, path, db.snapshotWriterPool); err != nil { return errors.Join(err, os.RemoveAll(path)) } if err := os.Rename(path, filepath.Join(db.dir, snapshotDir)); err != nil { @@ -707,8 +712,11 @@ func (db *DB) rewriteSnapshotBackground() error { return errors.New("there's another ongoing snapshot rewriting process") } + ctx, cancel := context.WithCancel(context.Background()) + ch := make(chan snapshotResult) db.snapshotRewriteChan = ch + db.snapshotRewriteCancel = cancel cloned := db.copy(0) wal := db.wal @@ -716,7 +724,7 @@ func (db *DB) rewriteSnapshotBackground() error { defer close(ch) cloned.logger.Info("start rewriting snapshot", "version", cloned.Version()) - if err := cloned.RewriteSnapshot(); err != nil { + if err := cloned.RewriteSnapshot(ctx); err != nil { ch <- snapshotResult{err: err} return } @@ -746,7 +754,9 @@ func (db *DB) Close() error { defer db.mtx.Unlock() errs := []error{ - db.waitAsyncCommit(), db.MultiTree.Close(), db.wal.Close(), + db.waitAsyncCommit(), + db.MultiTree.Close(), + db.wal.Close(), } db.wal = nil @@ -755,6 +765,13 @@ func (db *DB) Close() error { db.fileLock = nil } + if db.snapshotRewriteChan != nil { + db.snapshotRewriteCancel() + <-db.snapshotRewriteChan + db.snapshotRewriteChan = nil + db.snapshotRewriteCancel = nil + } + return errors.Join(errs...) } diff --git a/memiavl/import.go b/memiavl/import.go index 5634f54d48..5f418c8f7c 100644 --- a/memiavl/import.go +++ b/memiavl/import.go @@ -1,6 +1,7 @@ package memiavl import ( + "context" "errors" "fmt" "math" @@ -133,7 +134,7 @@ func doImport(dir string, version int64, nodes <-chan *ExportNode) (returnErr er return errors.New("version overflows uint32") } - return writeSnapshot(dir, uint32(version), func(w *snapshotWriter) (uint32, error) { + return writeSnapshot(context.Background(), dir, uint32(version), func(w *snapshotWriter) (uint32, error) { i := &importer{ snapshotWriter: *w, } diff --git a/memiavl/multitree.go b/memiavl/multitree.go index 789f8d2639..78e1241def 100644 --- a/memiavl/multitree.go +++ b/memiavl/multitree.go @@ -357,7 +357,7 @@ func (t *MultiTree) CatchupWAL(wal *wal.Log, endVersion int64) error { return nil } -func (t *MultiTree) WriteSnapshot(dir string, wp *pond.WorkerPool) error { +func (t *MultiTree) WriteSnapshot(ctx context.Context, dir string, wp *pond.WorkerPool) error { if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } @@ -368,7 +368,7 @@ func (t *MultiTree) WriteSnapshot(dir string, wp *pond.WorkerPool) error { for _, entry := range t.trees { tree, name := entry.Tree, entry.Name group.Submit(func() error { - return tree.WriteSnapshot(filepath.Join(dir, name)) + return tree.WriteSnapshot(ctx, filepath.Join(dir, name)) }) } diff --git a/memiavl/snapshot.go b/memiavl/snapshot.go index 12ca196496..101b79f9ca 100644 --- a/memiavl/snapshot.go +++ b/memiavl/snapshot.go @@ -2,6 +2,7 @@ package memiavl import ( "bufio" + "context" "encoding/binary" "errors" "fmt" @@ -24,6 +25,9 @@ const ( FileNameLeaves = "leaves" FileNameKVs = "kvs" FileNameMetadata = "metadata" + + // check for cancel every 1000 leaves + CancelCheckInterval = 1000 ) // Snapshot manage the lifecycle of mmap-ed files for the snapshot, @@ -348,8 +352,8 @@ func (snapshot *Snapshot) export(callback func(*ExportNode) bool) { } // WriteSnapshot save the IAVL tree to a new snapshot directory. -func (t *Tree) WriteSnapshot(snapshotDir string) error { - return writeSnapshot(snapshotDir, t.version, func(w *snapshotWriter) (uint32, error) { +func (t *Tree) WriteSnapshot(ctx context.Context, snapshotDir string) error { + return writeSnapshot(ctx, snapshotDir, t.version, func(w *snapshotWriter) (uint32, error) { if t.root == nil { return 0, nil } else { @@ -362,6 +366,7 @@ func (t *Tree) WriteSnapshot(snapshotDir string) error { } func writeSnapshot( + ctx context.Context, dir string, version uint32, doWrite func(*snapshotWriter) (uint32, error), ) (returnErr error) { @@ -407,7 +412,7 @@ func writeSnapshot( leavesWriter := bufio.NewWriter(fpLeaves) kvsWriter := bufio.NewWriter(fpKVs) - w := newSnapshotWriter(nodesWriter, leavesWriter, kvsWriter) + w := newSnapshotWriter(ctx, nodesWriter, leavesWriter, kvsWriter) leaves, err := doWrite(w) if err != nil { return err @@ -460,6 +465,9 @@ func writeSnapshot( } type snapshotWriter struct { + // context for cancel the writing process + ctx context.Context + nodesWriter, leavesWriter, kvWriter io.Writer // count how many nodes have been written @@ -469,8 +477,9 @@ type snapshotWriter struct { kvsOffset uint64 } -func newSnapshotWriter(nodesWriter, leavesWriter, kvsWriter io.Writer) *snapshotWriter { +func newSnapshotWriter(ctx context.Context, nodesWriter, leavesWriter, kvsWriter io.Writer) *snapshotWriter { return &snapshotWriter{ + ctx: ctx, nodesWriter: nodesWriter, leavesWriter: leavesWriter, kvWriter: kvsWriter, @@ -502,6 +511,14 @@ func (w *snapshotWriter) writeKeyValue(key, value []byte) error { } func (w *snapshotWriter) writeLeaf(version uint32, key, value, hash []byte) error { + if w.leafCounter%CancelCheckInterval == 0 { + select { + case <-w.ctx.Done(): + return w.ctx.Err() + default: + } + } + var buf [SizeLeafWithoutHash]byte binary.LittleEndian.PutUint32(buf[OffsetLeafVersion:], version) binary.LittleEndian.PutUint32(buf[OffsetLeafKeyLen:], uint32(len(key))) From f6ec99ec64300e743115649a0d6200203f150043 Mon Sep 17 00:00:00 2001 From: yihuang Date: Fri, 12 Jan 2024 14:19:17 +0800 Subject: [PATCH 2/5] Update CHANGELOG.md Signed-off-by: yihuang --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eab1081f5a..cd1fe9db29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## UNRELEASED -- [#]() memiavl cancel background snapshot rewriting when graceful shutdown. +- [#1292](https://github.com/crypto-org-chain/cronos/pull/1292) memiavl cancel background snapshot rewriting when graceful shutdown. *January 5, 2024* From 3b7b39ce5d723acbea9bbe34b5c31b28fa80d75d Mon Sep 17 00:00:00 2001 From: HuangYi Date: Fri, 12 Jan 2024 14:23:09 +0800 Subject: [PATCH 3/5] close order --- memiavl/db.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/memiavl/db.go b/memiavl/db.go index 0ec6d5bee1..e237ad32fa 100644 --- a/memiavl/db.go +++ b/memiavl/db.go @@ -753,11 +753,20 @@ func (db *DB) Close() error { db.mtx.Lock() defer db.mtx.Unlock() - errs := []error{ - db.waitAsyncCommit(), + errs := []error{db.waitAsyncCommit()} + + if db.snapshotRewriteChan != nil { + db.snapshotRewriteCancel() + <-db.snapshotRewriteChan + db.snapshotRewriteChan = nil + db.snapshotRewriteCancel = nil + } + + errs = append(errs, db.MultiTree.Close(), db.wal.Close(), - } + ) + db.wal = nil if db.fileLock != nil { @@ -765,13 +774,6 @@ func (db *DB) Close() error { db.fileLock = nil } - if db.snapshotRewriteChan != nil { - db.snapshotRewriteCancel() - <-db.snapshotRewriteChan - db.snapshotRewriteChan = nil - db.snapshotRewriteCancel = nil - } - return errors.Join(errs...) } From 8a416523d63dade6ae77ea68052315e88913037f Mon Sep 17 00:00:00 2001 From: HuangYi Date: Fri, 12 Jan 2024 14:31:26 +0800 Subject: [PATCH 4/5] not break the old api --- memiavl/db.go | 20 +++++++++++++++----- memiavl/multitree.go | 8 ++++++-- memiavl/snapshot.go | 8 ++++++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/memiavl/db.go b/memiavl/db.go index e237ad32fa..06ff5a711f 100644 --- a/memiavl/db.go +++ b/memiavl/db.go @@ -633,7 +633,12 @@ func (db *DB) copy(cacheSize int) *DB { } // RewriteSnapshot writes the current version of memiavl into a snapshot, and update the `current` symlink. -func (db *DB) RewriteSnapshot(ctx context.Context) error { +func (db *DB) RewriteSnapshot() error { + return db.RewriteSnapshotWithContext(context.Background()) +} + +// RewriteSnapshotWithContext writes the current version of memiavl into a snapshot, and update the `current` symlink. +func (db *DB) RewriteSnapshotWithContext(ctx context.Context) error { db.mtx.Lock() defer db.mtx.Unlock() @@ -644,7 +649,7 @@ func (db *DB) RewriteSnapshot(ctx context.Context) error { snapshotDir := snapshotName(db.lastCommitInfo.Version) tmpDir := snapshotDir + TmpSuffix path := filepath.Join(db.dir, tmpDir) - if err := db.MultiTree.WriteSnapshot(ctx, path, db.snapshotWriterPool); err != nil { + if err := db.MultiTree.WriteSnapshotWithContext(ctx, path, db.snapshotWriterPool); err != nil { return errors.Join(err, os.RemoveAll(path)) } if err := os.Rename(path, filepath.Join(db.dir, snapshotDir)); err != nil { @@ -724,7 +729,7 @@ func (db *DB) rewriteSnapshotBackground() error { defer close(ch) cloned.logger.Info("start rewriting snapshot", "version", cloned.Version()) - if err := cloned.RewriteSnapshot(ctx); err != nil { + if err := cloned.RewriteSnapshotWithContext(ctx); err != nil { ch <- snapshotResult{err: err} return } @@ -832,11 +837,16 @@ func (db *DB) UpdateCommitInfo() { } // WriteSnapshot wraps MultiTree.WriteSnapshot to add a lock. -func (db *DB) WriteSnapshot(dir string) error { +func (db *DB) WriteSnapshot(ctx context.Context, dir string) error { + return db.WriteSnapshotWithContext(ctx, dir) +} + +// WriteSnapshotWithContext wraps MultiTree.WriteSnapshotWithContext to add a lock. +func (db *DB) WriteSnapshotWithContext(ctx context.Context, dir string) error { db.mtx.Lock() defer db.mtx.Unlock() - return db.MultiTree.WriteSnapshot(dir, db.snapshotWriterPool) + return db.MultiTree.WriteSnapshotWithContext(ctx, dir, db.snapshotWriterPool) } func snapshotName(version int64) string { diff --git a/memiavl/multitree.go b/memiavl/multitree.go index 78e1241def..f05cb963b1 100644 --- a/memiavl/multitree.go +++ b/memiavl/multitree.go @@ -357,7 +357,11 @@ func (t *MultiTree) CatchupWAL(wal *wal.Log, endVersion int64) error { return nil } -func (t *MultiTree) WriteSnapshot(ctx context.Context, dir string, wp *pond.WorkerPool) error { +func (t *MultiTree) WriteSnapshot(dir string, wp *pond.WorkerPool) error { + return t.WriteSnapshotWithContext(context.Background(), dir, wp) +} + +func (t *MultiTree) WriteSnapshotWithContext(ctx context.Context, dir string, wp *pond.WorkerPool) error { if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } @@ -368,7 +372,7 @@ func (t *MultiTree) WriteSnapshot(ctx context.Context, dir string, wp *pond.Work for _, entry := range t.trees { tree, name := entry.Tree, entry.Name group.Submit(func() error { - return tree.WriteSnapshot(ctx, filepath.Join(dir, name)) + return tree.WriteSnapshotWithContext(ctx, filepath.Join(dir, name)) }) } diff --git a/memiavl/snapshot.go b/memiavl/snapshot.go index 101b79f9ca..2c063deb71 100644 --- a/memiavl/snapshot.go +++ b/memiavl/snapshot.go @@ -351,8 +351,12 @@ func (snapshot *Snapshot) export(callback func(*ExportNode) bool) { } } -// WriteSnapshot save the IAVL tree to a new snapshot directory. -func (t *Tree) WriteSnapshot(ctx context.Context, snapshotDir string) error { +func (t *Tree) WriteSnapshot(snapshotDir string) error { + return t.WriteSnapshotWithContext(context.Background(), snapshotDir) +} + +// WriteSnapshotWithContext save the IAVL tree to a new snapshot directory. +func (t *Tree) WriteSnapshotWithContext(ctx context.Context, snapshotDir string) error { return writeSnapshot(ctx, snapshotDir, t.version, func(w *snapshotWriter) (uint32, error) { if t.root == nil { return 0, nil From fe2dadf41a81fea501b1ea77872714c71668257b Mon Sep 17 00:00:00 2001 From: yihuang Date: Fri, 12 Jan 2024 14:33:23 +0800 Subject: [PATCH 5/5] Update memiavl/db.go Signed-off-by: yihuang --- memiavl/db.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/memiavl/db.go b/memiavl/db.go index 06ff5a711f..6739576722 100644 --- a/memiavl/db.go +++ b/memiavl/db.go @@ -837,8 +837,8 @@ func (db *DB) UpdateCommitInfo() { } // WriteSnapshot wraps MultiTree.WriteSnapshot to add a lock. -func (db *DB) WriteSnapshot(ctx context.Context, dir string) error { - return db.WriteSnapshotWithContext(ctx, dir) +func (db *DB) WriteSnapshot(dir string) error { + return db.WriteSnapshotWithContext(context.Background(), dir) } // WriteSnapshotWithContext wraps MultiTree.WriteSnapshotWithContext to add a lock.