From c6f44f1cc7c760ac17e6441f7d4de26aeb741b9d Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Thu, 27 Feb 2025 01:44:11 +0200 Subject: [PATCH 01/11] move typed_store into common --- Cargo.lock | 1 + client/blockchain-service/src/handler.rs | 2 +- client/blockchain-service/src/lib.rs | 1 - client/blockchain-service/src/state.rs | 10 +- client/blockchain-service/src/utils.rs | 2 +- client/common/Cargo.toml | 1 + client/common/src/lib.rs | 1 + .../src/typed_store.rs | 283 ++++++++++++++++-- 8 files changed, 267 insertions(+), 34 deletions(-) rename client/{blockchain-service => common}/src/typed_store.rs (57%) diff --git a/Cargo.lock b/Cargo.lock index 77bfb9a6f..9812a796d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13752,6 +13752,7 @@ dependencies = [ "pallet-storage-providers", "parity-scale-codec", "polkadot-primitives 16.0.0", + "rocksdb", "sc-client-api", "sc-executor", "sc-network", diff --git a/client/blockchain-service/src/handler.rs b/client/blockchain-service/src/handler.rs index e59c49970..49343d278 100644 --- a/client/blockchain-service/src/handler.rs +++ b/client/blockchain-service/src/handler.rs @@ -39,6 +39,7 @@ use pallet_storage_providers_runtime_api::{ use shc_actors_framework::actor::{Actor, ActorEventLoop}; use shc_common::{ blockchain_utils::{convert_raw_multiaddresses_to_multiaddr, get_events_at_block}, + typed_store::{CFDequeAPI, ProvidesTypedDbSingleAccess}, types::{ BlockNumber, EitherBucketOrBspId, Fingerprint, ParachainClient, StorageProviderId, TickNumber, BCSV_KEY_TYPE, @@ -63,7 +64,6 @@ use crate::{ OngoingProcessStopStoringForInsolventUserRequestCf, }, transaction::SubmittedTransaction, - typed_store::{CFDequeAPI, ProvidesTypedDbSingleAccess}, types::{ ForestStorageSnapshotInfo, MinimalBlockInfo, NewBlockNotificationKind, StopStoringForInsolventUserRequest, SubmitProofRequest, diff --git a/client/blockchain-service/src/lib.rs b/client/blockchain-service/src/lib.rs index ed9d5322a..4ede59cd1 100644 --- a/client/blockchain-service/src/lib.rs +++ b/client/blockchain-service/src/lib.rs @@ -3,7 +3,6 @@ pub mod events; pub mod handler; pub mod state; pub mod transaction; -pub mod typed_store; pub mod types; pub mod utils; diff --git a/client/blockchain-service/src/state.rs b/client/blockchain-service/src/state.rs index b36f97a22..643edb597 100644 --- a/client/blockchain-service/src/state.rs +++ b/client/blockchain-service/src/state.rs @@ -2,16 +2,16 @@ use std::path::PathBuf; use log::info; use rocksdb::{ColumnFamilyDescriptor, Options, DB}; +use shc_common::typed_store::{ + BufferedWriteSupport, CFDequeAPI, ProvidesDbContext, ProvidesTypedDbAccess, + ProvidesTypedDbSingleAccess, ScaleEncodedCf, SingleScaleEncodedValueCf, TypedCf, + TypedDbContext, TypedRocksDB, +}; use shc_common::types::BlockNumber; use crate::events::{ProcessFileDeletionRequestData, ProcessMspRespondStoringRequestData}; use crate::{ events::{ProcessConfirmStoringRequestData, ProcessStopStoringForInsolventUserRequestData}, - typed_store::{ - BufferedWriteSupport, CFDequeAPI, ProvidesDbContext, ProvidesTypedDbAccess, - ProvidesTypedDbSingleAccess, ScaleEncodedCf, SingleScaleEncodedValueCf, TypedCf, - TypedDbContext, TypedRocksDB, - }, types::{ ConfirmStoringRequest, FileDeletionRequest, RespondStorageRequest, StopStoringForInsolventUserRequest, diff --git a/client/blockchain-service/src/utils.rs b/client/blockchain-service/src/utils.rs index 38b984f6f..9e98b1026 100644 --- a/client/blockchain-service/src/utils.rs +++ b/client/blockchain-service/src/utils.rs @@ -16,6 +16,7 @@ use shc_actors_framework::actor::Actor; use shc_common::{ blockchain_utils::get_events_at_block, consts::CURRENT_FOREST_KEY, + typed_store::{CFDequeAPI, ProvidesTypedDbSingleAccess}, types::{ BlockNumber, ForestRoot, ParachainClient, ProofsDealerProviderId, StorageProviderId, TrieAddMutation, TrieMutation, TrieRemoveMutation, BCSV_KEY_TYPE, @@ -51,7 +52,6 @@ use crate::{ OngoingProcessMspRespondStorageRequestCf, OngoingProcessStopStoringForInsolventUserRequestCf, }, - typed_store::{CFDequeAPI, ProvidesTypedDbSingleAccess}, types::{Extrinsic, MinimalBlockInfo, NewBlockNotificationKind, SendExtrinsicOptions, Tip}, BlockchainService, }; diff --git a/client/common/Cargo.toml b/client/common/Cargo.toml index 4a58500bf..cf91b6c80 100644 --- a/client/common/Cargo.toml +++ b/client/common/Cargo.toml @@ -18,6 +18,7 @@ workspace = true anyhow = { workspace = true } bincode = { workspace = true } codec = { workspace = true } +rocksdb = { workspace = true } serde = { workspace = true, default-features = true } trie-db = { workspace = true } lazy-static = { workspace = true } diff --git a/client/common/src/lib.rs b/client/common/src/lib.rs index bf29d613f..98b76e43d 100644 --- a/client/common/src/lib.rs +++ b/client/common/src/lib.rs @@ -1,3 +1,4 @@ pub mod blockchain_utils; pub mod consts; +pub mod typed_store; pub mod types; diff --git a/client/blockchain-service/src/typed_store.rs b/client/common/src/typed_store.rs similarity index 57% rename from client/blockchain-service/src/typed_store.rs rename to client/common/src/typed_store.rs index ab4e080da..6d1368fd8 100644 --- a/client/blockchain-service/src/typed_store.rs +++ b/client/common/src/typed_store.rs @@ -1,9 +1,13 @@ use codec::{Decode, Encode}; -use rocksdb::{AsColumnFamilyRef, ColumnFamily, DBPinnableSlice, IteratorMode, WriteBatch, DB}; +use rocksdb::{ + AsColumnFamilyRef, ColumnFamily, DBPinnableSlice, Direction, IteratorMode, ReadOptions, + WriteBatch, DB, +}; use std::{ cell::{Ref, RefCell}, collections::BTreeMap, marker::PhantomData, + ops::RangeBounds, }; pub trait DbCodec { @@ -138,6 +142,14 @@ pub trait ReadableRocks { cf: &impl AsColumnFamilyRef, mode: IteratorMode, ) -> impl Iterator, Box<[u8]>)> + 'a; + + /// Gets an iterator over the column family with custom read options. + fn iterator_cf_opt<'a>( + &'a self, + cf: &impl AsColumnFamilyRef, + mode: IteratorMode, + read_opts: ReadOptions, + ) -> impl Iterator, Box<[u8]>)> + 'a; } /// A write-supporting interface of a RocksDB database. @@ -230,6 +242,7 @@ impl<'r, R: WriteableRocks> Drop for BufferedWriteSupport<'r, R> { } } +/// Implementation specific to BufferedWriteSupport impl<'r, R: WriteableRocks> TypedDbContext<'r, R, BufferedWriteSupport<'r, R>> { /// Explicitly flushes the current contents of the write buffer and clears the associated /// overlay. @@ -284,31 +297,212 @@ impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, } } -impl<'r, 'o, 'w, CF: TypedCf, R: WriteableRocks> - TypedCfApi<'r, 'o, 'w, CF, R, BufferedWriteSupport<'r, R>> -{ - /// Upserts the new value at the given key. - pub fn put(&self, key: &CF::Key, value: &CF::Value) { - let raw_key = CF::KeyCodec::encode(key); - let raw_value = CF::ValueCodec::encode(value); - self.cf_overlay.put(raw_key.clone(), raw_value.clone()); - self.write_support - .buffer - .put(self.cf.handle, raw_key, raw_value); - } - - /// Deletes the entry of the given key. - pub fn delete(&self, key: &CF::Key) { - let raw_key = CF::KeyCodec::encode(key); - self.cf_overlay.delete(raw_key.clone()); - self.write_support.buffer.delete(self.cf.handle, raw_key); - } - - /// Iterates over the column family. This only supports `Start` mode and does not take the overlay into account. - pub fn iterate_without_overlay(&'r self) -> impl Iterator + 'r { - self.rocks - .iterator_cf(self.cf.handle, IteratorMode::Start) - .map(|(key, value)| (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value))) +impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, 'o, 'w, CF, R, W> { + /// Iterates over a range of keys in the column family using Rust's range syntax. + /// This provides an ergonomic way to express all possible range queries. + /// + /// The method supports all standard Rust range types: + /// + /// # Examples: + /// - `iterate_with_range(key1..key2)` - Iterate from key1 to key2 (exclusive) + /// - `iterate_with_range(key1..=key2)` - Iterate from key1 to key2 (inclusive) + /// - `iterate_with_range(key1..)` - Iterate from key1 to the end + /// - `iterate_with_range(..key2)` - Iterate from the beginning to key2 (exclusive) + /// - `iterate_with_range(..=key2)` - Iterate from the beginning to key2 (inclusive) + /// - `iterate_with_range(..)` - Iterate over all keys + /// + /// For reverse iteration, you can compare the keys manually and use the appropriate range: + /// - To iterate from key1 to key2 in reverse (where key1 > key2): `iterate_with_range(key1..=key2)` + /// - To iterate from a key backwards to the beginning: `iterate_with_range(..=key)` + /// + /// The direction is automatically determined based on the comparison of the encoded keys. + pub fn iterate_with_range( + &'r self, + range: Range, + ) -> Box + 'r> + where + Range: RangeBounds, + { + use std::ops::Bound; + + match (range.start_bound(), range.end_bound()) { + (Bound::Included(start), Bound::Excluded(end)) => { + // Range: start..end + let from_encoded = CF::KeyCodec::encode(start); + let to_encoded = CF::KeyCodec::encode(end); + + // Compare the encoded bytes to determine direction + let direction = if from_encoded > to_encoded { + Direction::Reverse + } else { + Direction::Forward + }; + + let mut read_opts = ReadOptions::default(); + + // Set bounds based on direction + match direction { + Direction::Forward => { + read_opts.set_iterate_lower_bound(from_encoded.clone()); + read_opts.set_iterate_upper_bound(to_encoded); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(from_encoded.as_slice(), direction), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + Direction::Reverse => { + // For reverse iteration, we need to swap the bounds + read_opts.set_iterate_lower_bound(to_encoded); + read_opts.set_iterate_upper_bound(from_encoded.clone()); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(from_encoded.as_slice(), direction), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + } + } + (Bound::Included(start), Bound::Included(end)) => { + // Range: start..=end + let from_encoded = CF::KeyCodec::encode(start); + let to_encoded = CF::KeyCodec::encode(end); + + // Compare the encoded bytes to determine direction + let direction = if from_encoded > to_encoded { + Direction::Reverse + } else { + Direction::Forward + }; + + let mut read_opts = ReadOptions::default(); + + match direction { + Direction::Forward => { + // We need to handle this specially since RocksDB's upper bound is exclusive + read_opts.set_iterate_lower_bound(from_encoded.clone()); + + // For inclusive end, we need to make the upper bound the next possible key + let mut end_bytes = to_encoded.clone(); + // Add a byte to make it the next possible key (to simulate inclusive end) + end_bytes.push(0); + read_opts.set_iterate_upper_bound(end_bytes); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(from_encoded.as_slice(), direction), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + Direction::Reverse => { + // For reverse iteration with inclusive end, we need to swap the bounds + read_opts.set_iterate_lower_bound(to_encoded.clone()); + read_opts.set_iterate_upper_bound(from_encoded.clone()); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(from_encoded.as_slice(), direction), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + } + } + (Bound::Included(start), Bound::Unbounded) => { + // Range: start.. + let from_encoded = CF::KeyCodec::encode(start); + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_lower_bound(from_encoded.clone()); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(from_encoded.as_slice(), Direction::Forward), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + (Bound::Unbounded, Bound::Excluded(end)) => { + // Range: ..end + let to_encoded = CF::KeyCodec::encode(end); + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_upper_bound(to_encoded); + + Box::new( + self.rocks + .iterator_cf_opt(self.cf.handle, IteratorMode::Start, read_opts) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + (Bound::Unbounded, Bound::Included(end)) => { + // Range: ..=end + // Similar to start..=end, but from the beginning + let end_encoded = CF::KeyCodec::encode(end); + let mut end_bytes = end_encoded.clone(); + // Add a byte to make it the next possible key (to simulate inclusive end) + end_bytes.push(0); + + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_upper_bound(end_bytes); + + Box::new( + self.rocks + .iterator_cf_opt(self.cf.handle, IteratorMode::Start, read_opts) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + (Bound::Unbounded, Bound::Unbounded) => { + // Range: .. + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::Start, + ReadOptions::default(), + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + _ => { + // This should never happen with Rust's range types + panic!("Unsupported range bounds") + } + } } } @@ -354,6 +548,17 @@ impl ReadableRocks for TypedRocksDB { .iterator_cf(cf, mode) .map(|result| result.expect("DB iterator")) } + + fn iterator_cf_opt<'a>( + &'a self, + cf: &impl AsColumnFamilyRef, + mode: IteratorMode, + read_opts: ReadOptions, + ) -> impl Iterator, Box<[u8]>)> + 'a { + self.db + .iterator_cf_opt(cf, read_opts, mode) + .map(|result| result.expect("DB iterator")) + } } impl WriteableRocks for TypedRocksDB { @@ -539,3 +744,29 @@ pub trait CFDequeAPI: ProvidesTypedDbSingleAccess { self.right_index() - self.left_index() } } + +// Add implementation for TypedCfApi with BufferedWriteSupport +impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks> + TypedCfApi<'r, 'o, 'w, CF, R, BufferedWriteSupport<'r, R>> +where + R: WriteableRocks, +{ + /// Updates the key with a value. + pub fn put(&self, key: &CF::Key, value: &CF::Value) { + let key_bytes = CF::KeyCodec::encode(key); + let value_bytes = CF::ValueCodec::encode(value); + self.write_support + .buffer + .put(self.cf.handle, key_bytes.clone(), value_bytes.clone()); + self.cf_overlay.put(key_bytes, value_bytes); + } + + /// Deletes the key. + pub fn delete(&self, key: &CF::Key) { + let key_bytes = CF::KeyCodec::encode(key); + self.write_support + .buffer + .delete(self.cf.handle, key_bytes.clone()); + self.cf_overlay.delete(key_bytes); + } +} From a2d9033225230abe89d9e8dad2fdf63f66396798 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Thu, 27 Feb 2025 20:16:00 +0200 Subject: [PATCH 02/11] add CFHashSetAPI --- client/common/src/typed_store.rs | 482 ++++++++++++++++++++++++------- 1 file changed, 381 insertions(+), 101 deletions(-) diff --git a/client/common/src/typed_store.rs b/client/common/src/typed_store.rs index 6d1368fd8..4727a2096 100644 --- a/client/common/src/typed_store.rs +++ b/client/common/src/typed_store.rs @@ -331,119 +331,65 @@ impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, let from_encoded = CF::KeyCodec::encode(start); let to_encoded = CF::KeyCodec::encode(end); - // Compare the encoded bytes to determine direction - let direction = if from_encoded > to_encoded { - Direction::Reverse - } else { - Direction::Forward - }; - let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_lower_bound(from_encoded.clone()); + read_opts.set_iterate_upper_bound(to_encoded); - // Set bounds based on direction - match direction { - Direction::Forward => { - read_opts.set_iterate_lower_bound(from_encoded.clone()); - read_opts.set_iterate_upper_bound(to_encoded); - - Box::new( - self.rocks - .iterator_cf_opt( - self.cf.handle, - IteratorMode::From(from_encoded.as_slice(), direction), - read_opts, - ) - .map(|(key, value)| { - (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) - }), - ) - } - Direction::Reverse => { - // For reverse iteration, we need to swap the bounds - read_opts.set_iterate_lower_bound(to_encoded); - read_opts.set_iterate_upper_bound(from_encoded.clone()); - - Box::new( - self.rocks - .iterator_cf_opt( - self.cf.handle, - IteratorMode::From(from_encoded.as_slice(), direction), - read_opts, - ) - .map(|(key, value)| { - (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) - }), + // Clone the slice to extend its lifetime + let from_slice = from_encoded.clone(); + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(&from_slice, Direction::Forward), + read_opts, ) - } - } + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) } (Bound::Included(start), Bound::Included(end)) => { // Range: start..=end let from_encoded = CF::KeyCodec::encode(start); let to_encoded = CF::KeyCodec::encode(end); - // Compare the encoded bytes to determine direction - let direction = if from_encoded > to_encoded { - Direction::Reverse - } else { - Direction::Forward - }; - let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_lower_bound(from_encoded.clone()); - match direction { - Direction::Forward => { - // We need to handle this specially since RocksDB's upper bound is exclusive - read_opts.set_iterate_lower_bound(from_encoded.clone()); - - // For inclusive end, we need to make the upper bound the next possible key - let mut end_bytes = to_encoded.clone(); - // Add a byte to make it the next possible key (to simulate inclusive end) - end_bytes.push(0); - read_opts.set_iterate_upper_bound(end_bytes); - - Box::new( - self.rocks - .iterator_cf_opt( - self.cf.handle, - IteratorMode::From(from_encoded.as_slice(), direction), - read_opts, - ) - .map(|(key, value)| { - (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) - }), - ) - } - Direction::Reverse => { - // For reverse iteration with inclusive end, we need to swap the bounds - read_opts.set_iterate_lower_bound(to_encoded.clone()); - read_opts.set_iterate_upper_bound(from_encoded.clone()); - - Box::new( - self.rocks - .iterator_cf_opt( - self.cf.handle, - IteratorMode::From(from_encoded.as_slice(), direction), - read_opts, - ) - .map(|(key, value)| { - (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) - }), + // For inclusive end, we need to make the upper bound the next possible key + let mut end_bytes = to_encoded.clone(); + end_bytes.push(0); + read_opts.set_iterate_upper_bound(end_bytes); + + // Clone the slice to extend its lifetime + let from_slice = from_encoded.clone(); + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(&from_slice, Direction::Forward), + read_opts, ) - } - } + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) } (Bound::Included(start), Bound::Unbounded) => { // Range: start.. let from_encoded = CF::KeyCodec::encode(start); + let mut read_opts = ReadOptions::default(); read_opts.set_iterate_lower_bound(from_encoded.clone()); + // Clone the slice to extend its lifetime + let from_slice = from_encoded.clone(); Box::new( self.rocks .iterator_cf_opt( self.cf.handle, - IteratorMode::From(from_encoded.as_slice(), Direction::Forward), + IteratorMode::From(&from_slice, Direction::Forward), read_opts, ) .map(|(key, value)| { @@ -451,10 +397,15 @@ impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, }), ) } - (Bound::Unbounded, Bound::Excluded(end)) => { - // Range: ..end + (Bound::Excluded(start), Bound::Excluded(end)) => { + // Range: start+1..end + let from_encoded = CF::KeyCodec::encode(start); let to_encoded = CF::KeyCodec::encode(end); + let mut read_opts = ReadOptions::default(); + // We need to find the next key after 'start' to implement 'Excluded' semantics + // For simplicity, we'll just use the same lower bound and filter in the iterator + read_opts.set_iterate_lower_bound(from_encoded.clone()); read_opts.set_iterate_upper_bound(to_encoded); Box::new( @@ -465,14 +416,62 @@ impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, }), ) } + (Bound::Excluded(start), Bound::Included(end)) => { + // Range: start+1..=end + let from_encoded = CF::KeyCodec::encode(start); + let to_encoded = CF::KeyCodec::encode(end); + + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_lower_bound(from_encoded.clone()); + + let mut end_bytes = to_encoded.clone(); + end_bytes.push(0); + read_opts.set_iterate_upper_bound(end_bytes); + + Box::new( + self.rocks + .iterator_cf_opt(self.cf.handle, IteratorMode::Start, read_opts) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + (Bound::Excluded(start), Bound::Unbounded) => { + // Range: start+1.. + let start_bytes = CF::KeyCodec::encode(start); + + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_lower_bound(start_bytes.clone()); + + Box::new( + self.rocks + .iterator_cf_opt( + self.cf.handle, + IteratorMode::From(&start_bytes, Direction::Forward), + read_opts, + ) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } (Bound::Unbounded, Bound::Included(end)) => { // Range: ..=end - // Similar to start..=end, but from the beginning - let end_encoded = CF::KeyCodec::encode(end); - let mut end_bytes = end_encoded.clone(); - // Add a byte to make it the next possible key (to simulate inclusive end) - end_bytes.push(0); + let end_bytes = CF::KeyCodec::encode(end); + let mut read_opts = ReadOptions::default(); + read_opts.set_iterate_upper_bound(end_bytes); + Box::new( + self.rocks + .iterator_cf_opt(self.cf.handle, IteratorMode::Start, read_opts) + .map(|(key, value)| { + (CF::KeyCodec::decode(&key), CF::ValueCodec::decode(&value)) + }), + ) + } + (Bound::Unbounded, Bound::Excluded(end)) => { + // Range: ..end + let end_bytes = CF::KeyCodec::encode(end); let mut read_opts = ReadOptions::default(); read_opts.set_iterate_upper_bound(end_bytes); @@ -498,10 +497,6 @@ impl<'r, 'o, 'w, CF: TypedCf, R: ReadableRocks, W: WriteSupport> TypedCfApi<'r, }), ) } - _ => { - // This should never happen with Rust's range types - panic!("Unsupported range bounds") - } } } } @@ -526,6 +521,14 @@ pub struct TypedRocksDB { pub db: DB, } +impl TypedRocksDB { + /// Opens a RocksDB database with default options at the given path. + pub fn open_default(path: &str) -> Result { + let db = DB::open_default(path)?; + Ok(Self { db }) + } +} + impl ReadableRocks for TypedRocksDB { fn cf_handle(&self, name: &str) -> &ColumnFamily { self.db.cf_handle(name).expect(name) @@ -770,3 +773,280 @@ where self.cf_overlay.delete(key_bytes); } } + +/// A trait for a hashset-like structure on top of RocksDB. +/// This trait provides common operations for working with sets of keys. +pub trait CFHashSetAPI: ProvidesTypedDbAccess { + /// The type of the key stored in the hashset. + type Value: Encode + Decode; + + /// The column family used to store the hashset. + type SetCF: Default + TypedCf; + + /// Checks if the hashset contains the given key. + fn contains(&self, key: &Self::Value) -> bool { + self.db_context() + .cf(&Self::SetCF::default()) + .get(key) + .is_some() + } + + /// Inserts a key into the hashset. + /// Returns true if the key was not present in the set. + fn insert(&mut self, key: &Self::Value) -> bool { + let was_present = self.contains(key); + if !was_present { + self.db_context().cf(&Self::SetCF::default()).put(key, &()); + } + !was_present + } + + /// Removes a key from the hashset. + /// Returns true if the key was present in the set. + fn remove(&mut self, key: &Self::Value) -> bool { + let was_present = self.contains(key); + if was_present { + self.db_context().cf(&Self::SetCF::default()).delete(key); + } + was_present + } + + /// Returns all keys in the hashset as a vector, in order. + fn keys(&self) -> Vec { + self.db_context() + .cf(&Self::SetCF::default()) + .iterate_with_range(..) + .map(|(key, _)| key) + .collect() + } + + /// Returns keys in the given range as a vector, in order. + fn keys_in_range>(&self, range: R) -> Vec { + self.db_context() + .cf(&Self::SetCF::default()) + .iterate_with_range(range) + .map(|(key, _)| key) + .collect() + } + + /// Performs an operation on each key in the hashset. + /// This method iterates over the keys without collecting them all into memory. + fn for_each(&self, mut f: F) + where + F: FnMut(&Self::Value), + { + for (key, _) in self + .db_context() + .cf(&Self::SetCF::default()) + .iterate_with_range(..) + { + f(&key); + } + } + + /// Performs an operation on each key in the given range. + /// This method iterates over the keys without collecting them all into memory. + fn for_each_in_range(&self, range: R, mut f: F) + where + R: RangeBounds, + F: FnMut(&Self::Value), + { + for (key, _) in self + .db_context() + .cf(&Self::SetCF::default()) + .iterate_with_range(range) + { + f(&key); + } + } + + /// Clears all keys from the hashset. + fn clear(&mut self) { + let keys: Vec = self.keys(); + for key in keys { + self.remove(&key); + } + } + + /// Returns the number of keys in the hashset. + fn len(&self) -> usize { + self.keys().len() + } + + /// Returns true if the hashset is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +// Example implementations for string and integer hashsets +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + // Define a column family for user IDs + #[derive(Default, Clone)] + struct UserIdSetCF; + + impl ScaleEncodedCf for UserIdSetCF { + type Key = u64; + type Value = (); + const SCALE_ENCODED_NAME: &'static str = "user_id_set"; + } + + // Define a struct that will implement CFHashSetAPI + struct UserIdSet<'a> { + db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, + } + + // Implement ProvidesDbContext (required for CFHashSetAPI) + impl<'a> ProvidesDbContext for UserIdSet<'a> { + fn db_context(&self) -> &TypedDbContext> { + &self.db_context + } + } + + // Implement ProvidesTypedDbAccess (required for CFHashSetAPI) + impl<'a> ProvidesTypedDbAccess for UserIdSet<'a> {} + + // Implement CFHashSetAPI + impl<'a> CFHashSetAPI for UserIdSet<'a> { + type Value = u64; + type SetCF = UserIdSetCF; + } + + #[test] + fn test_hashset_operations() { + // Create a temporary directory for the test + let temp_dir = format!("/tmp/rocksdb_test_{}", std::process::id()); + let _ = fs::remove_dir_all(&temp_dir); // Clean up any previous test data + fs::create_dir_all(&temp_dir).expect("Failed to create temp directory"); + + // Set up RocksDB with the required column family + let cf_name = UserIdSetCF::SCALE_ENCODED_NAME; + let mut db_opts = rocksdb::Options::default(); + db_opts.create_if_missing(true); + db_opts.create_missing_column_families(true); + + let cf_opts = rocksdb::Options::default(); + let cf_descriptor = rocksdb::ColumnFamilyDescriptor::new(cf_name, cf_opts); + + let db = rocksdb::DB::open_cf_descriptors(&db_opts, &temp_dir, vec![cf_descriptor]) + .expect("Failed to open database"); + let typed_db = TypedRocksDB { db }; + + // Create a write batch and context + let write_support = BufferedWriteSupport::new(&typed_db); + let db_context = TypedDbContext::new(&typed_db, write_support); + + // Create the hashset with the context + let mut user_set = UserIdSet { db_context }; + + // Test initial state + assert!(user_set.is_empty(), "New hashset should be empty"); + assert_eq!(user_set.len(), 0, "New hashset should have length 0"); + + // Test insert + assert!( + user_set.insert(&123), + "Insert should return true for new key" + ); + assert!( + !user_set.insert(&123), + "Insert should return false for existing key" + ); + assert!( + user_set.insert(&456), + "Insert should return true for new key" + ); + assert!( + user_set.insert(&789), + "Insert should return true for new key" + ); + + // Flush the changes to make them visible + user_set.db_context().flush(); + + // Test contains + assert!( + user_set.contains(&123), + "Hashset should contain inserted key" + ); + assert!( + user_set.contains(&456), + "Hashset should contain inserted key" + ); + assert!( + user_set.contains(&789), + "Hashset should contain inserted key" + ); + assert!( + !user_set.contains(&999), + "Hashset should not contain non-inserted key" + ); + + // Test keys + let keys = user_set.keys(); + assert_eq!(keys.len(), 3, "Hashset should have 3 keys"); + assert!(keys.contains(&123), "Keys should contain 123"); + assert!(keys.contains(&456), "Keys should contain 456"); + assert!(keys.contains(&789), "Keys should contain 789"); + + // Test keys_in_range + let range_keys = user_set.keys_in_range(100..500); + assert_eq!(range_keys.len(), 2, "Range query should return 2 keys"); + assert!(range_keys.contains(&123), "Range keys should contain 123"); + assert!(range_keys.contains(&456), "Range keys should contain 456"); + assert!( + !range_keys.contains(&789), + "Range keys should not contain 789" + ); + + // Test remove + assert!( + user_set.remove(&123), + "Remove should return true for existing key" + ); + + // Flush the changes to make them visible + user_set.db_context().flush(); + + assert!( + !user_set.remove(&123), + "Remove should return false for non-existing key" + ); + assert!( + !user_set.contains(&123), + "Hashset should not contain removed key" + ); + + // Test for_each + let mut count = 0; + let mut sum = 0; + user_set.for_each(|key| { + count += 1; + sum += key; + }); + assert_eq!(count, 2, "for_each should iterate over 2 keys"); + assert_eq!(sum, 456 + 789, "Sum of remaining keys should be 456 + 789"); + + // Test clear + user_set.clear(); + + // Flush the changes to make them visible + user_set.db_context().flush(); + + assert!(user_set.is_empty(), "Hashset should be empty after clear"); + assert_eq!( + user_set.len(), + 0, + "Hashset should have length 0 after clear" + ); + + // Clean up + drop(user_set); + drop(typed_db); + let _ = fs::remove_dir_all(&temp_dir); + } +} From 28ff986b946c0b293d913cb60b6bcae99cd89b52 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Fri, 28 Feb 2025 15:27:00 +0200 Subject: [PATCH 03/11] add CFRangeMapAPI --- Cargo.lock | 1 + client/common/Cargo.toml | 3 + client/common/src/typed_store.rs | 1079 ++++++++++++++++++++++++++---- 3 files changed, 941 insertions(+), 142 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9812a796d..c0f0936d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13770,6 +13770,7 @@ dependencies = [ "sp-std 14.0.0 (git+https://github.com/paritytech/polkadot-sdk.git?branch=stable2409)", "sp-trie 29.0.0", "storage-hub-runtime", + "tempfile", "thiserror 1.0.69", "trie-db 0.29.1", ] diff --git a/client/common/Cargo.toml b/client/common/Cargo.toml index cf91b6c80..d46c28a16 100644 --- a/client/common/Cargo.toml +++ b/client/common/Cargo.toml @@ -60,6 +60,9 @@ pallet-payment-streams = { workspace = true } pallet-proofs-dealer = { workspace = true } pallet-storage-providers = { workspace = true } +[dev-dependencies] +tempfile = "3.8" + [features] default = ["std"] std = [ diff --git a/client/common/src/typed_store.rs b/client/common/src/typed_store.rs index 4727a2096..987620b6d 100644 --- a/client/common/src/typed_store.rs +++ b/client/common/src/typed_store.rs @@ -862,7 +862,7 @@ pub trait CFHashSetAPI: ProvidesTypedDbAccess { /// Clears all keys from the hashset. fn clear(&mut self) { - let keys: Vec = self.keys(); + let keys: Vec<_> = self.keys(); for key in keys { self.remove(&key); } @@ -879,174 +879,969 @@ pub trait CFHashSetAPI: ProvidesTypedDbAccess { } } -// Example implementations for string and integer hashsets +/// A composite key that combines a primary key and a value for efficient range queries +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CompositeKey { + pub key: K, + pub value: V, +} + +impl Encode for CompositeKey { + fn encode(&self) -> Vec { + // Encode key and value separately, then concatenate + let mut result = self.key.encode(); + result.extend(self.value.encode()); + result + } +} + +impl Decode for CompositeKey { + fn decode(input: &mut I) -> Result { + // This is a simplified implementation that assumes we can decode the key and value + // from the input stream. In a real implementation, we would need to know the boundary + // between the key and value. + let key = K::decode(input)?; + let value = V::decode(input)?; + Ok(CompositeKey { key, value }) + } +} + +/// A trait for a hashmap-like structure on top of RocksDB that supports efficient range queries within keys. +/// This implementation uses a composite key approach where the key and value are combined into a single key, +/// and the actual value stored is empty (unit type). +pub trait CFRangeMapAPI: ProvidesTypedDbAccess { + /// The type of the key stored in the hashmap. + type Key: Encode + Decode + Clone + PartialEq + Eq + PartialOrd + Ord; + + /// The type of the value elements stored for each key. + /// The Default trait is required for creating empty values in range queries. + type Value: Encode + Decode + Clone + PartialEq + Eq + PartialOrd + Ord + Default; + + /// The column family used to store the hashmap. + type MapCF: Default + TypedCf, Value = ()>; + + /// Checks if the hashmap contains the given key. + fn contains_key(&self, key: &Self::Key) -> bool { + // Create a range that only includes the specific key + let start = CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }; + let mut end_key = key.clone(); + // This assumes we can increment the last byte to get the next key + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + let end = CompositeKey { + key: end_key, + value: Self::Value::default(), + }; + + // Check if there's at least one entry for this key + self.db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(start..end) + .next() + .is_some() + } + + /// Checks if the hashmap contains the given key-value pair. + fn contains(&self, key: &Self::Key, value: &Self::Value) -> bool { + let composite_key = CompositeKey { + key: key.clone(), + value: value.clone(), + }; + self.db_context() + .cf(&Self::MapCF::default()) + .get(&composite_key) + .is_some() + } + + /// Inserts a key-value pair into the hashmap. + /// Returns true if the pair was not present in the map. + fn insert(&self, key: &Self::Key, value: Self::Value) -> bool { + let composite_key = CompositeKey { + key: key.clone(), + value, + }; + if self + .db_context() + .cf(&Self::MapCF::default()) + .get(&composite_key) + .is_some() + { + return false; + } + self.db_context() + .cf(&Self::MapCF::default()) + .put(&composite_key, &()); + true + } + + /// Removes a specific key-value pair from the hashmap. + /// Returns true if the pair was found and removed. + fn remove(&self, key: &Self::Key, value: &Self::Value) -> bool { + let composite_key = CompositeKey { + key: key.clone(), + value: value.clone(), + }; + if self + .db_context() + .cf(&Self::MapCF::default()) + .get(&composite_key) + .is_none() + { + return false; + } + self.db_context() + .cf(&Self::MapCF::default()) + .delete(&composite_key); + true + } + + /// Removes all values for a specific key. + /// Returns the number of values removed. + fn remove_key(&self, key: &Self::Key) -> usize { + let start = CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }; + let mut end_key = key.clone(); + // This assumes we can increment the last byte to get the next key + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + let end = CompositeKey { + key: end_key, + value: Self::Value::default(), + }; + + // Use a batched approach to avoid loading everything into memory at once + const BATCH_SIZE: usize = 1000; + let mut total_removed = 0; + + loop { + let values: Vec<_> = self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(start.clone()..end.clone()) + .take(BATCH_SIZE) + .map(|(k, _)| k) + .collect(); + + if values.is_empty() { + break; + } + + let batch_size = values.len(); + for composite_key in values { + self.db_context() + .cf(&Self::MapCF::default()) + .delete(&composite_key); + } + + total_removed += batch_size; + + // Flush after each batch to ensure changes are visible + self.db_context().flush(); + } + + total_removed + } + + /// Returns all keys in the hashmap. + fn keys(&self) -> Vec { + let mut result = Vec::new(); + let mut current_key: Option = None; + + for (composite_key, _) in self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(..) + { + if current_key.as_ref() != Some(&composite_key.key) { + current_key = Some(composite_key.key.clone()); + result.push(composite_key.key); + } + } + + result + } + + /// Returns all values for a specific key. + fn values_for_key(&self, key: &Self::Key) -> Vec { + // Create a range that only includes the specific key + let start = CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }; + let mut end_key = key.clone(); + // This assumes we can increment the last byte to get the next key + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + let end = CompositeKey { + key: end_key, + value: Self::Value::default(), + }; + + // Only iterate over the range for this specific key + self.db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(start..end) + .map(|(composite_key, _)| composite_key.value) + .collect() + } + + /// Returns values for a specific key within a given range. + /// Uses the streaming iterator internally for efficiency. + fn values_in_range(&self, key: &Self::Key, range: R) -> Vec + where + R: RangeBounds, + { + use std::ops::Bound; + + let start_bound = match range.start_bound() { + Bound::Included(v) => Bound::Included(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Excluded(v) => Bound::Excluded(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Unbounded => Bound::Included(CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }), + }; + + let mut end_key = key.clone(); + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + + let end_bound = match range.end_bound() { + Bound::Included(v) => Bound::Included(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Excluded(v) => Bound::Excluded(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Unbounded => Bound::Excluded(CompositeKey { + key: end_key, + value: Self::Value::default(), + }), + }; + + let range = (start_bound, end_bound); + + self.db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(range) + .map(|(k, _)| k.value) + .collect() + } + + /// Performs an operation on each value for a specific key. + fn for_each_value(&self, key: &Self::Key, mut f: F) + where + F: FnMut(&Self::Value), + { + let start = CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }; + let mut end_key = key.clone(); + // This assumes we can increment the last byte to get the next key + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + let end = CompositeKey { + key: end_key, + value: Self::Value::default(), + }; + + for (composite_key, _) in self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(start..end) + { + f(&composite_key.value); + } + } + + /// Performs an operation on each value for a specific key within a given range. + fn for_each_value_in_range(&self, key: &Self::Key, range: R, mut f: F) + where + R: RangeBounds, + F: FnMut(&Self::Value), + { + use std::ops::Bound; + + let start_bound = match range.start_bound() { + Bound::Included(v) => Bound::Included(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Excluded(v) => Bound::Excluded(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Unbounded => Bound::Included(CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }), + }; + + let mut end_key = key.clone(); + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + + let end_bound = match range.end_bound() { + Bound::Included(v) => Bound::Included(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Excluded(v) => Bound::Excluded(CompositeKey { + key: key.clone(), + value: v.clone(), + }), + Bound::Unbounded => Bound::Excluded(CompositeKey { + key: end_key, + value: Self::Value::default(), + }), + }; + + let range = (start_bound, end_bound); + + for (composite_key, _) in self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(range) + { + f(&composite_key.value); + } + } + + /// Returns the number of keys in the hashmap. + fn len(&self) -> usize { + let mut count = 0; + let mut current_key: Option = None; + + // Count unique keys without collecting them + for (composite_key, _) in self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(..) + { + if current_key.as_ref() != Some(&composite_key.key) { + current_key = Some(composite_key.key.clone()); + count += 1; + } + } + + count + } + + /// Returns true if the hashmap is empty. + fn is_empty(&self) -> bool { + self.db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(..) + .next() + .is_none() + } + + /// Returns the number of values for a specific key. + fn values_len(&self, key: &Self::Key) -> usize { + // Create a range that only includes the specific key + let start = CompositeKey { + key: key.clone(), + value: Self::Value::default(), + }; + let mut end_key = key.clone(); + // This assumes we can increment the last byte to get the next key + let end_bytes = end_key.encode(); + if let Some(last) = end_bytes.last().cloned() { + let mut new_end = end_bytes.clone(); + *new_end.last_mut().unwrap() = last.wrapping_add(1); + end_key = Self::Key::decode(&mut &new_end[..]).unwrap_or(end_key); + } + let end = CompositeKey { + key: end_key, + value: Self::Value::default(), + }; + + // Count values without collecting them + self.db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(start..end) + .count() + } + + /// Clears all key-value pairs from the hashmap. + fn clear(&self) { + // Use a batched approach to avoid loading everything into memory at once + const BATCH_SIZE: usize = 1000; + + loop { + let keys: Vec<_> = self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(..) + .take(BATCH_SIZE) + .map(|(k, _)| k) + .collect(); + + if keys.is_empty() { + break; + } + + for composite_key in keys { + self.db_context() + .cf(&Self::MapCF::default()) + .delete(&composite_key); + } + + // Flush after each batch to ensure changes are visible + self.db_context().flush(); + } + } + + /// Performs an operation on each unique key in the hashmap. + /// This is more efficient than collecting all keys and then iterating. + fn for_each_key(&self, mut f: F) + where + F: FnMut(&Self::Key), + { + let mut current_key: Option = None; + + for (composite_key, _) in self + .db_context() + .cf(&Self::MapCF::default()) + .iterate_with_range(..) + { + if current_key.as_ref() != Some(&composite_key.key) { + current_key = Some(composite_key.key.clone()); + f(&composite_key.key); + } + } + } +} + #[cfg(test)] mod tests { use super::*; - use std::fs; + use codec::{Decode, Encode}; + use rocksdb::{ColumnFamilyDescriptor, Options}; + use tempfile::tempdir; + + // Define test types for our RangeMap + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Encode, Decode)] + struct TestKey(u32); + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] + struct TestValue(u32); + + impl Default for TestValue { + // This implementation is required for the CFRangeMapAPI trait + // which uses Default::default() for empty values in range queries + fn default() -> Self { + TestValue(0) + } + } + + impl Encode for TestValue { + fn encode(&self) -> Vec { + self.0.encode() + } + } + + impl Decode for TestValue { + fn decode(input: &mut I) -> Result { + let value = u32::decode(input)?; + Ok(TestValue(value)) + } + } + + // Define column family for our test + struct TestRangeMapCF; - // Define a column family for user IDs - #[derive(Default, Clone)] - struct UserIdSetCF; + impl Default for TestRangeMapCF { + fn default() -> Self { + Self + } + } - impl ScaleEncodedCf for UserIdSetCF { - type Key = u64; + impl TypedCf for TestRangeMapCF { + type Key = CompositeKey; type Value = (); - const SCALE_ENCODED_NAME: &'static str = "user_id_set"; + + type KeyCodec = ScaleDbCodec; + type ValueCodec = ScaleDbCodec; + + const NAME: &'static str = "test_range_map_cf"; } - // Define a struct that will implement CFHashSetAPI - struct UserIdSet<'a> { - db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, + // Define our test struct that implements CFRangeMapAPI + struct TestRangeMap<'db> { + db_context: TypedDbContext<'db, TypedRocksDB, BufferedWriteSupport<'db, TypedRocksDB>>, } - // Implement ProvidesDbContext (required for CFHashSetAPI) - impl<'a> ProvidesDbContext for UserIdSet<'a> { + impl<'db> ProvidesDbContext for TestRangeMap<'db> { fn db_context(&self) -> &TypedDbContext> { &self.db_context } } - // Implement ProvidesTypedDbAccess (required for CFHashSetAPI) - impl<'a> ProvidesTypedDbAccess for UserIdSet<'a> {} + impl<'db> ProvidesTypedDbAccess for TestRangeMap<'db> {} + + impl<'db> CFRangeMapAPI for TestRangeMap<'db> { + type Key = TestKey; + type Value = TestValue; + type MapCF = TestRangeMapCF; + } + + // Helper function to create a test database and range map + fn setup_test_range_map() -> (tempfile::TempDir, TestRangeMap<'static>) { + // Create a temporary directory for the database + let temp_dir = tempdir().unwrap(); + let path = temp_dir.path().to_str().unwrap(); - // Implement CFHashSetAPI - impl<'a> CFHashSetAPI for UserIdSet<'a> { - type Value = u64; - type SetCF = UserIdSetCF; + // Open the database with the test column family + let mut opts = Options::default(); + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + let cf_descriptor = ColumnFamilyDescriptor::new(TestRangeMapCF::NAME, Options::default()); + let db = DB::open_cf_descriptors(&opts, path, vec![cf_descriptor]).unwrap(); + + // We need to leak the DB to extend its lifetime + let db_box = Box::new(TypedRocksDB { db }); + let typed_db = Box::leak(db_box); + + // Create a buffered write support + let write_support = BufferedWriteSupport::new(typed_db); + + // Create a database context + let db_context = TypedDbContext::new(typed_db, write_support); + + // Create our test range map + let range_map = TestRangeMap { db_context }; + + (temp_dir, range_map) } #[test] - fn test_hashset_operations() { - // Create a temporary directory for the test - let temp_dir = format!("/tmp/rocksdb_test_{}", std::process::id()); - let _ = fs::remove_dir_all(&temp_dir); // Clean up any previous test data - fs::create_dir_all(&temp_dir).expect("Failed to create temp directory"); - - // Set up RocksDB with the required column family - let cf_name = UserIdSetCF::SCALE_ENCODED_NAME; - let mut db_opts = rocksdb::Options::default(); - db_opts.create_if_missing(true); - db_opts.create_missing_column_families(true); - - let cf_opts = rocksdb::Options::default(); - let cf_descriptor = rocksdb::ColumnFamilyDescriptor::new(cf_name, cf_opts); - - let db = rocksdb::DB::open_cf_descriptors(&db_opts, &temp_dir, vec![cf_descriptor]) - .expect("Failed to open database"); - let typed_db = TypedRocksDB { db }; - - // Create a write batch and context - let write_support = BufferedWriteSupport::new(&typed_db); - let db_context = TypedDbContext::new(&typed_db, write_support); - - // Create the hashset with the context - let mut user_set = UserIdSet { db_context }; - - // Test initial state - assert!(user_set.is_empty(), "New hashset should be empty"); - assert_eq!(user_set.len(), 0, "New hashset should have length 0"); - - // Test insert - assert!( - user_set.insert(&123), - "Insert should return true for new key" - ); - assert!( - !user_set.insert(&123), - "Insert should return false for existing key" - ); - assert!( - user_set.insert(&456), - "Insert should return true for new key" - ); - assert!( - user_set.insert(&789), - "Insert should return true for new key" - ); + fn test_range_map_basic_operations() { + let (_temp_dir, range_map) = setup_test_range_map(); + + // Test basic operations + let key1 = TestKey(1); + let value1 = TestValue(10); + let value2 = TestValue(20); + let value3 = TestValue(30); + + // Initially the map should be empty + assert!(range_map.is_empty()); + assert_eq!(range_map.len(), 0); + + // Insert some values + assert!(range_map.insert(&key1, value1.clone())); + assert!(range_map.insert(&key1, value2.clone())); + assert!(range_map.insert(&key1, value3.clone())); // Flush the changes to make them visible - user_set.db_context().flush(); - - // Test contains - assert!( - user_set.contains(&123), - "Hashset should contain inserted key" - ); - assert!( - user_set.contains(&456), - "Hashset should contain inserted key" - ); - assert!( - user_set.contains(&789), - "Hashset should contain inserted key" - ); - assert!( - !user_set.contains(&999), - "Hashset should not contain non-inserted key" - ); - - // Test keys - let keys = user_set.keys(); - assert_eq!(keys.len(), 3, "Hashset should have 3 keys"); - assert!(keys.contains(&123), "Keys should contain 123"); - assert!(keys.contains(&456), "Keys should contain 456"); - assert!(keys.contains(&789), "Keys should contain 789"); - - // Test keys_in_range - let range_keys = user_set.keys_in_range(100..500); - assert_eq!(range_keys.len(), 2, "Range query should return 2 keys"); - assert!(range_keys.contains(&123), "Range keys should contain 123"); - assert!(range_keys.contains(&456), "Range keys should contain 456"); - assert!( - !range_keys.contains(&789), - "Range keys should not contain 789" - ); - - // Test remove - assert!( - user_set.remove(&123), - "Remove should return true for existing key" - ); + range_map.db_context.flush(); + + // Check contains + assert!(range_map.contains_key(&key1)); + assert!(range_map.contains(&key1, &value1)); + assert!(range_map.contains(&key1, &value2)); + assert!(range_map.contains(&key1, &value3)); + + // Check values for key + let values = range_map.values_for_key(&key1); + assert_eq!(values.len(), 3); + assert!(values.contains(&value1)); + assert!(values.contains(&value2)); + assert!(values.contains(&value3)); + + // Test range queries + let values_range = range_map.values_in_range(&key1, value1.clone()..value3.clone()); + assert_eq!(values_range.len(), 2); + assert!(values_range.contains(&value1)); + assert!(values_range.contains(&value2)); + + // Test removal + assert!(range_map.remove(&key1, &value2)); + range_map.db_context.flush(); + assert!(!range_map.contains(&key1, &value2)); + + // Check values after removal + let values = range_map.values_for_key(&key1); + assert_eq!(values.len(), 2); + assert!(values.contains(&value1)); + assert!(values.contains(&value3)); + + // Test remove_key + assert_eq!(range_map.remove_key(&key1), 2); + range_map.db_context.flush(); + assert!(!range_map.contains_key(&key1)); + assert!(range_map.is_empty()); + } + + #[test] + fn test_range_map_multiple_keys() { + let (_temp_dir, range_map) = setup_test_range_map(); + + // Test with multiple keys + let key1 = TestKey(1); + let key2 = TestKey(2); + + // Insert values for key1 + range_map.insert(&key1, TestValue(10)); + range_map.insert(&key1, TestValue(20)); + + // Insert values for key2 + range_map.insert(&key2, TestValue(30)); + range_map.insert(&key2, TestValue(40)); // Flush the changes to make them visible - user_set.db_context().flush(); - - assert!( - !user_set.remove(&123), - "Remove should return false for non-existing key" - ); - assert!( - !user_set.contains(&123), - "Hashset should not contain removed key" - ); - - // Test for_each + range_map.db_context.flush(); + + // Check keys + let keys = range_map.keys(); + assert_eq!(keys.len(), 2); + assert!(keys.contains(&key1)); + assert!(keys.contains(&key2)); + + // Check values for each key + let values1 = range_map.values_for_key(&key1); + assert_eq!(values1.len(), 2); + assert!(values1.contains(&TestValue(10))); + assert!(values1.contains(&TestValue(20))); + + let values2 = range_map.values_for_key(&key2); + assert_eq!(values2.len(), 2); + assert!(values2.contains(&TestValue(30))); + assert!(values2.contains(&TestValue(40))); + + // Test for_each_value let mut count = 0; - let mut sum = 0; - user_set.for_each(|key| { - count += 1; - sum += key; + range_map.for_each_value(&key1, |_| count += 1); + assert_eq!(count, 2); + + // Test for_each_value_in_range + let mut values = Vec::new(); + range_map.for_each_value_in_range(&key2, TestValue(30)..=TestValue(40), |v| { + values.push(v.clone()) }); - assert_eq!(count, 2, "for_each should iterate over 2 keys"); - assert_eq!(sum, 456 + 789, "Sum of remaining keys should be 456 + 789"); + assert_eq!(values.len(), 2); // Test clear - user_set.clear(); + range_map.clear(); + range_map.db_context.flush(); + assert!(range_map.is_empty()); + assert_eq!(range_map.len(), 0); + } - // Flush the changes to make them visible - user_set.db_context().flush(); - - assert!(user_set.is_empty(), "Hashset should be empty after clear"); - assert_eq!( - user_set.len(), - 0, - "Hashset should have length 0 after clear" - ); - - // Clean up - drop(user_set); - drop(typed_db); - let _ = fs::remove_dir_all(&temp_dir); + #[test] + fn test_range_map_duplicate_inserts() { + let (_temp_dir, range_map) = setup_test_range_map(); + + let key = TestKey(1); + let value = TestValue(10); + + // First insert should succeed + assert!(range_map.insert(&key, value.clone())); + range_map.db_context.flush(); + + // Second insert of the same key-value pair should fail + assert!(!range_map.insert(&key, value.clone())); + range_map.db_context.flush(); + + // Check that there's still only one value + assert_eq!(range_map.values_for_key(&key).len(), 1); + } + + #[test] + fn test_range_map_empty_operations() { + let (_temp_dir, range_map) = setup_test_range_map(); + + let key = TestKey(1); + let value = TestValue(10); + + // Operations on empty map + assert!(!range_map.contains_key(&key)); + assert!(!range_map.contains(&key, &value)); + assert!(range_map.values_for_key(&key).is_empty()); + assert!(range_map + .values_in_range(&key, TestValue(0)..TestValue(100)) + .is_empty()); + + // Removing from empty map + assert!(!range_map.remove(&key, &value)); + assert_eq!(range_map.remove_key(&key), 0); + + // Empty map properties + assert!(range_map.is_empty()); + assert_eq!(range_map.len(), 0); + assert_eq!(range_map.values_len(&key), 0); + + // for_each on empty map + let mut count = 0; + range_map.for_each_value(&key, |_| count += 1); + assert_eq!(count, 0); + + // for_each_in_range on empty map + let mut count = 0; + range_map.for_each_value_in_range(&key, TestValue(0)..TestValue(100), |_| count += 1); + assert_eq!(count, 0); + } + + #[test] + fn test_range_map_range_queries() { + let (_temp_dir, range_map) = setup_test_range_map(); + + let key = TestKey(1); + + // Insert values 10 through 100 in steps of 10 + for i in 1..=10 { + range_map.insert(&key, TestValue(i * 10)); + } + range_map.db_context.flush(); + + // Test inclusive range + let values = range_map.values_in_range(&key, TestValue(20)..=TestValue(50)); + assert_eq!(values.len(), 4); // 20, 30, 40, 50 + assert!(values.contains(&TestValue(20))); + assert!(values.contains(&TestValue(30))); + assert!(values.contains(&TestValue(40))); + assert!(values.contains(&TestValue(50))); + + // Test exclusive range + let values = range_map.values_in_range(&key, TestValue(20)..TestValue(50)); + assert_eq!(values.len(), 3); // 20, 30, 40 + assert!(values.contains(&TestValue(20))); + assert!(values.contains(&TestValue(30))); + assert!(values.contains(&TestValue(40))); + assert!(!values.contains(&TestValue(50))); + + // Test unbounded start + let values = range_map.values_in_range(&key, ..TestValue(30)); + assert_eq!(values.len(), 2); // 10, 20 + assert!(values.contains(&TestValue(10))); + assert!(values.contains(&TestValue(20))); + assert!(!values.contains(&TestValue(30))); // Exclusive upper bound + + // Test inclusive unbounded start + let values = range_map.values_in_range(&key, ..=TestValue(30)); + assert_eq!(values.len(), 3); // 10, 20, 30 + assert!(values.contains(&TestValue(10))); + assert!(values.contains(&TestValue(20))); + assert!(values.contains(&TestValue(30))); + + // Test unbounded end + let values = range_map.values_in_range(&key, TestValue(80)..); + assert_eq!(values.len(), 3); // 80, 90, 100 + assert!(values.contains(&TestValue(80))); + assert!(values.contains(&TestValue(90))); + assert!(values.contains(&TestValue(100))); + + // Test full range + let values = range_map.values_in_range(&key, ..); + assert_eq!(values.len(), 10); // All values + + // Test for_each_value_in_range with different range types + let mut values = Vec::new(); + range_map.for_each_value_in_range(&key, TestValue(60)..=TestValue(80), |v| { + values.push(v.clone()) + }); + assert_eq!(values.len(), 3); // 60, 70, 80 + assert!(values.contains(&TestValue(60))); + assert!(values.contains(&TestValue(70))); + assert!(values.contains(&TestValue(80))); + } + + #[test] + fn test_range_map_edge_cases() { + let (_temp_dir, range_map) = setup_test_range_map(); + + // Test with minimum and maximum values + let key = TestKey(u32::MAX); + let min_value = TestValue(u32::MIN); + let max_value = TestValue(u32::MAX); + + range_map.insert(&key, min_value.clone()); + range_map.insert(&key, max_value.clone()); + range_map.db_context.flush(); + + assert!(range_map.contains(&key, &min_value)); + assert!(range_map.contains(&key, &max_value)); + + // Test with adjacent keys + let key1 = TestKey(100); + let key2 = TestKey(101); + + range_map.insert(&key1, TestValue(1)); + range_map.insert(&key2, TestValue(2)); + range_map.db_context.flush(); + + assert_eq!(range_map.values_for_key(&key1).len(), 1); + assert_eq!(range_map.values_for_key(&key2).len(), 1); + + // Test removing non-existent values + assert!(!range_map.remove(&key1, &TestValue(999))); + assert!(!range_map.remove(&TestKey(999), &TestValue(1))); + + // Test with empty range + let values = range_map.values_in_range(&key1, TestValue(2)..TestValue(1)); + assert!(values.is_empty()); + + // Test with point range (single value) + let values = range_map.values_in_range(&key1, TestValue(1)..=TestValue(1)); + assert_eq!(values.len(), 1); + assert!(values.contains(&TestValue(1))); + } + + #[test] + fn test_range_map_persistence() { + // This test verifies that data persists across database connections + let temp_dir = tempdir().unwrap(); + let path = temp_dir.path().to_str().unwrap(); + + // First connection + { + let mut opts = Options::default(); + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + let cf_descriptor = + ColumnFamilyDescriptor::new(TestRangeMapCF::NAME, Options::default()); + let db = DB::open_cf_descriptors(&opts, path, vec![cf_descriptor]).unwrap(); + let typed_db = TypedRocksDB { db }; + + let write_support = BufferedWriteSupport::new(&typed_db); + let db_context = TypedDbContext::new(&typed_db, write_support); + let range_map = TestRangeMap { db_context }; + + // Insert some data + range_map.insert(&TestKey(1), TestValue(10)); + range_map.insert(&TestKey(1), TestValue(20)); + range_map.insert(&TestKey(2), TestValue(30)); + + // Flush to ensure data is written to disk + range_map.db_context.flush(); + + // DB will be closed when it goes out of scope + } + + // Second connection + { + let opts = Options::default(); + let cf_descriptor = + ColumnFamilyDescriptor::new(TestRangeMapCF::NAME, Options::default()); + let db = DB::open_cf_descriptors(&opts, path, vec![cf_descriptor]).unwrap(); + let typed_db = TypedRocksDB { db }; + + let write_support = BufferedWriteSupport::new(&typed_db); + let db_context = TypedDbContext::new(&typed_db, write_support); + let range_map = TestRangeMap { db_context }; + + // Verify data persisted + assert!(range_map.contains(&TestKey(1), &TestValue(10))); + assert!(range_map.contains(&TestKey(1), &TestValue(20))); + assert!(range_map.contains(&TestKey(2), &TestValue(30))); + + // Verify counts + assert_eq!(range_map.values_for_key(&TestKey(1)).len(), 2); + assert_eq!(range_map.values_for_key(&TestKey(2)).len(), 1); + + // Add more data + range_map.insert(&TestKey(3), TestValue(40)); + range_map.db_context.flush(); + } + + // Third connection to verify second connection's changes + { + let opts = Options::default(); + let cf_descriptor = + ColumnFamilyDescriptor::new(TestRangeMapCF::NAME, Options::default()); + let db = DB::open_cf_descriptors(&opts, path, vec![cf_descriptor]).unwrap(); + let typed_db = TypedRocksDB { db }; + + let write_support = BufferedWriteSupport::new(&typed_db); + let db_context = TypedDbContext::new(&typed_db, write_support); + let range_map = TestRangeMap { db_context }; + + // Verify all data including the new addition + assert!(range_map.contains(&TestKey(1), &TestValue(10))); + assert!(range_map.contains(&TestKey(1), &TestValue(20))); + assert!(range_map.contains(&TestKey(2), &TestValue(30))); + assert!(range_map.contains(&TestKey(3), &TestValue(40))); + + // Clean up + range_map.clear(); + range_map.db_context.flush(); + assert!(range_map.is_empty()); + } + } + + #[test] + fn test_range_map_concurrent_operations() { + let (_temp_dir, range_map) = setup_test_range_map(); + + // Insert a large number of values + for i in 0..100 { + range_map.insert(&TestKey(i), TestValue(i)); + } + range_map.db_context.flush(); + + // Verify all values were inserted + for i in 0..100 { + assert!(range_map.contains(&TestKey(i), &TestValue(i))); + } + + // Remove every other value + for i in (0..100).step_by(2) { + assert!(range_map.remove(&TestKey(i), &TestValue(i))); + } + range_map.db_context.flush(); + + // Verify correct values remain + for i in 0..100 { + if i % 2 == 0 { + assert!(!range_map.contains(&TestKey(i), &TestValue(i))); + } else { + assert!(range_map.contains(&TestKey(i), &TestValue(i))); + } + } + + // Count remaining keys + let keys = range_map.keys(); + assert_eq!(keys.len(), 50); // Only odd-numbered keys remain } } From a38c585de55a44d59f0dca4ebefc4cc004f2daa0 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Tue, 4 Mar 2025 04:26:57 +0200 Subject: [PATCH 04/11] extract BspManager and make it persistent --- Cargo.lock | 1 + node/Cargo.toml | 1 + node/src/service.rs | 7 +- node/src/services/bsp_peer_manager.rs | 607 ++++++++++++++++++ node/src/services/builder.rs | 38 +- node/src/services/handler.rs | 6 + node/src/services/mod.rs | 1 + node/src/tasks/msp_move_bucket.rs | 280 +------- .../integration/msp/move-bucket.test.ts | 12 +- 9 files changed, 685 insertions(+), 268 deletions(-) create mode 100644 node/src/services/bsp_peer_manager.rs diff --git a/Cargo.lock b/Cargo.lock index c95fde8ea..746a22f7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16212,6 +16212,7 @@ dependencies = [ "polkadot-runtime-common 17.0.0", "priority-queue", "rand", + "rocksdb", "sc-basic-authorship", "sc-chain-spec", "sc-cli", diff --git a/node/Cargo.toml b/node/Cargo.toml index 0990d3eee..a6511b358 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -32,6 +32,7 @@ rand = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } toml = { workspace = true } +rocksdb = { workspace = true } # Local pallet-file-system = { workspace = true } diff --git a/node/src/service.rs b/node/src/service.rs index 087a1ffc5..6fba6fa46 100644 --- a/node/src/service.rs +++ b/node/src/service.rs @@ -280,16 +280,21 @@ where StorageHubBuilder: StorageLayerBuilder + Buildable<(R, S)>, StorageHubHandler<(R, S)>: RunnableTasks, { + let rocks_db_path = rocksdb_root_path.into(); + // Spawn the Blockchain Service if node is running as a Storage Provider sh_builder .with_blockchain( client.clone(), keystore.clone(), Arc::new(rpc_handlers), - rocksdb_root_path, + rocks_db_path.clone(), ) .await; + // Initialize the BSP peer manager + sh_builder.with_peer_manager(rocks_db_path.clone()); + // Build the StorageHubHandler let mut sh_handler = sh_builder.build(); diff --git a/node/src/services/bsp_peer_manager.rs b/node/src/services/bsp_peer_manager.rs new file mode 100644 index 000000000..28cf144fd --- /dev/null +++ b/node/src/services/bsp_peer_manager.rs @@ -0,0 +1,607 @@ +use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + time::Instant, +}; + +use anyhow::anyhow; +use codec::{Decode, Encode}; +use log::*; +use ordered_float::OrderedFloat; +use priority_queue::PriorityQueue; +use rand::{prelude::SliceRandom, thread_rng}; +use rocksdb::{ColumnFamilyDescriptor, Options, DB}; +use sc_network::PeerId; +use sp_core::H256; + +use shc_common::typed_store::{ + BufferedWriteSupport, ProvidesDbContext, ProvidesTypedDbAccess, ProvidesTypedDbSingleAccess, + ScaleEncodedCf, SingleScaleEncodedValueCf, TypedCf, TypedDbContext, TypedRocksDB, +}; +use tokio::sync::RwLock; + +// PeerId wrapper that implements Encode/Decode +#[derive(Clone, Debug, Eq, PartialEq, Encode, Decode, Hash)] +pub struct EncodablePeerId(Vec); + +impl From for EncodablePeerId { + fn from(peer_id: PeerId) -> Self { + Self(peer_id.to_bytes()) + } +} + +impl From for PeerId { + fn from(encodable_id: EncodablePeerId) -> Self { + PeerId::from_bytes(&encodable_id.0).expect("Valid peer ID bytes") + } +} + +// Column family definitions with proper types + +#[derive(Default, Clone)] +pub struct PeerIdCf; +impl ScaleEncodedCf for PeerIdCf { + type Key = EncodablePeerId; + type Value = PersistentBspPeerStats; + + const SCALE_ENCODED_NAME: &'static str = "bsp_peer_stats"; +} + +#[derive(Default, Clone)] +pub struct PeerFileKeyCf; +impl ScaleEncodedCf for PeerFileKeyCf { + type Key = (EncodablePeerId, H256); // Composite key of PeerId and file_key + type Value = (); // No value needed, just tracking the association + + const SCALE_ENCODED_NAME: &'static str = "bsp_peer_file_keys"; +} + +#[derive(Default)] +pub struct LastUpdateTimeCf; +impl SingleScaleEncodedValueCf for LastUpdateTimeCf { + type Value = u64; // Timestamp in millis + + const SINGLE_SCALE_ENCODED_VALUE_NAME: &'static str = "bsp_peer_last_update"; +} + +const ALL_COLUMN_FAMILIES: [&str; 3] = + [PeerIdCf::NAME, PeerFileKeyCf::NAME, LastUpdateTimeCf::NAME]; + +/// Version of BspPeerStats that can be persisted with SCALE codec +#[derive(Debug, Clone, Encode, Decode)] +pub struct PersistentBspPeerStats { + /// The number of successful downloads for the peer + pub successful_downloads: u64, + /// The number of failed downloads for the peer + pub failed_downloads: u64, + /// The total number of bytes downloaded for the peer + pub total_bytes_downloaded: u64, + /// The total download time for the peer + pub total_download_time_ms: u64, + /// Timestamp (in millis since UNIX epoch) of the last successful download + pub last_success_time_millis: Option, +} + +impl From<&BspPeerStats> for PersistentBspPeerStats { + fn from(stats: &BspPeerStats) -> Self { + Self { + successful_downloads: stats.successful_downloads, + failed_downloads: stats.failed_downloads, + total_bytes_downloaded: stats.total_bytes_downloaded, + total_download_time_ms: stats.total_download_time_ms, + last_success_time_millis: stats + .last_success_time + .map(|time| time.elapsed().as_millis().try_into().unwrap_or(u64::MAX)), + } + } +} + +/// Statistics about a BSP peer's performance +#[derive(Debug, Clone)] +pub struct BspPeerStats { + /// The number of successful downloads for the peer + pub successful_downloads: u64, + /// The number of failed downloads for the peer + pub failed_downloads: u64, + /// The total number of bytes downloaded for the peer + pub total_bytes_downloaded: u64, + /// The total download time for the peer + pub total_download_time_ms: u64, + /// The time of the last successful download for the peer + pub last_success_time: Option, + /// The set of file keys that the peer can provide + pub file_keys: HashSet, +} + +impl BspPeerStats { + fn new() -> Self { + Self { + successful_downloads: 0, + failed_downloads: 0, + total_bytes_downloaded: 0, + total_download_time_ms: 0, + last_success_time: None, + file_keys: HashSet::new(), + } + } + + fn from_persistent(persistent: PersistentBspPeerStats, file_keys: HashSet) -> Self { + Self { + successful_downloads: persistent.successful_downloads, + failed_downloads: persistent.failed_downloads, + total_bytes_downloaded: persistent.total_bytes_downloaded, + total_download_time_ms: persistent.total_download_time_ms, + last_success_time: persistent.last_success_time_millis.map(|_| Instant::now()), + file_keys, + } + } + + /// Record a successful download and update the stats + fn add_success(&mut self, bytes_downloaded: u64, download_time_ms: u64) { + self.successful_downloads += 1; + self.total_bytes_downloaded += bytes_downloaded; + self.total_download_time_ms += download_time_ms; + self.last_success_time = Some(Instant::now()); + } + + /// Record a failed download attempt + fn add_failure(&mut self) { + self.failed_downloads += 1; + } + + /// Calculate the success rate (0.0 to 1.0) + fn get_success_rate(&self) -> f64 { + let total = self.successful_downloads + self.failed_downloads; + if total == 0 { + return 0.0; + } + self.successful_downloads as f64 / total as f64 + } + + /// Calculate average download speed in bytes/second + fn get_average_speed_bytes_per_sec(&self) -> f64 { + if self.total_download_time_ms == 0 { + return 0.0; + } + (self.total_bytes_downloaded as f64 * 1000.0) / self.total_download_time_ms as f64 + } + + /// Calculate an overall score for this peer (0.0 to 1.0) + /// The score is a weighted combination of success rate and speed + fn get_score(&self) -> f64 { + // Weight success rate (70%) and speed (30%) + let success_weight = 0.7; + let speed_weight = 0.3; + + let success_score = self.get_success_rate(); + let speed_score = if self.successful_downloads == 0 { + 0.0 + } else { + // Normalize speed score (50MB/s is considered 1.0) + let max_speed = 50.0 * 1024.0 * 1024.0; + (self.get_average_speed_bytes_per_sec() / max_speed).min(1.0) + }; + + (success_score * success_weight) + (speed_score * speed_weight) + } + + /// Add a file key that this peer can provide + fn add_file_key(&mut self, file_key: H256) -> bool { + self.file_keys.insert(file_key) + } +} + +/// Persistent storage for the BSP peer manager +pub struct BspPeerManagerStore { + /// The RocksDB database. + rocks: TypedRocksDB, +} + +impl BspPeerManagerStore { + pub fn new(db_path: PathBuf) -> anyhow::Result { + // Ensure the directory exists + std::fs::create_dir_all(&db_path)?; + + let db_path_str = db_path.to_str().expect("Failed to convert path to string"); + info!("BSP peer manager DB path: {}", db_path_str); + + // Setup RocksDB + let mut db_opts = Options::default(); + db_opts.create_if_missing(true); + db_opts.create_missing_column_families(true); + + let column_families: Vec = ALL_COLUMN_FAMILIES + .iter() + .map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), Options::default())) + .collect(); + + let db = DB::open_cf_descriptors(&db_opts, db_path_str, column_families) + .map_err(|e| anyhow!("Failed to open BSP peer manager database: {}", e))?; + + Ok(Self { + rocks: TypedRocksDB { db }, + }) + } + + /// Starts a read/write interaction with the DB through typed APIs + pub fn open_rw_context(&self) -> BspPeerManagerRwContext<'_> { + BspPeerManagerRwContext::new(TypedDbContext::new( + &self.rocks, + BufferedWriteSupport::new(&self.rocks), + )) + } +} + +/// Read/write context for the BSP peer manager store +pub struct BspPeerManagerRwContext<'a> { + /// The RocksDB database context + db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, +} + +impl<'a> BspPeerManagerRwContext<'a> { + pub fn new( + db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, + ) -> Self { + Self { db_context } + } + + /// Commits the changes to the database + pub fn commit(self) { + self.db_context.flush(); + } +} + +impl<'a> ProvidesDbContext for BspPeerManagerRwContext<'a> { + fn db_context(&self) -> &TypedDbContext> { + &self.db_context + } +} + +impl<'a> ProvidesTypedDbAccess for BspPeerManagerRwContext<'a> {} +impl<'a> ProvidesTypedDbSingleAccess for BspPeerManagerRwContext<'a> {} + +/// Thread-safe persistent BSP peer manager +/// +/// This service tracks BSP peer performance metrics and provides methods to: +/// - Record successful and failed download attempts +/// - Select the best peers for downloading specific files +/// - Persist performance data across node restarts +/// +/// All operations are thread-safe, use interior mutability, and changes are +/// immediately persisted to the database. +pub struct BspPeerManager { + /// Inner state protected by a read-write lock + inner: RwLock, + /// Database state store + store: BspPeerManagerStore, +} + +/// Inner state of the BSP peer manager +struct BspPeerManagerInner { + /// Performance stats for each peer + peers: HashMap, + /// Priority queues for each file, mapping peers to their scores + peer_queues: HashMap>>, +} + +impl BspPeerManager { + /// Create a new BspPeerManager with persistent storage + pub fn new(db_path: PathBuf) -> anyhow::Result { + let store = BspPeerManagerStore::new(db_path)?; + + // Load existing data + let (peers, peer_queues) = Self::load_from_db(&store)?; + + Ok(Self { + inner: RwLock::new(BspPeerManagerInner { peers, peer_queues }), + store, + }) + } + + /// Load peer stats from the database + fn load_from_db( + store: &BspPeerManagerStore, + ) -> anyhow::Result<( + HashMap, + HashMap>>, + )> { + let rw_context = store.open_rw_context(); + + // Load all peer stats + let mut peers = HashMap::new(); + let mut peer_file_keys: HashMap> = HashMap::new(); + + // Load peer stats + { + let cf = rw_context.db_context().cf(&PeerIdCf::default()); + let mut iter = cf.iterate_with_range(..); + while let Some((encodable_id, stats)) = iter.next() { + let peer_id = PeerId::from(encodable_id); + peer_file_keys.entry(peer_id).or_insert_with(HashSet::new); + peers.insert( + peer_id, + BspPeerStats::from_persistent(stats, HashSet::new()), + ); + } + } + + // Load file keys + { + let cf = rw_context.db_context().cf(&PeerFileKeyCf::default()); + let mut iter = cf.iterate_with_range(..); + while let Some(((encodable_id, file_key), _)) = iter.next() { + let peer_id = PeerId::from(encodable_id); + if let Some(file_keys) = peer_file_keys.get_mut(&peer_id) { + file_keys.insert(file_key); + } + } + } + + // Combine stats with file keys + for (peer_id, file_keys) in peer_file_keys { + if let Some(stats) = peers.get_mut(&peer_id) { + stats.file_keys = file_keys; + } + } + + // Build priority queues + let mut peer_queues = HashMap::new(); + for (peer_id, stats) in &peers { + for file_key in &stats.file_keys { + let queue = peer_queues + .entry(*file_key) + .or_insert_with(PriorityQueue::new); + queue.push(*peer_id, OrderedFloat::from(stats.get_score())); + } + } + + info!("Loaded {} BSP peers from database", peers.len()); + Ok((peers, peer_queues)) + } + + /// Register a BSP peer for a specific file + pub async fn add_peer(&self, peer_id: PeerId, file_key: H256) { + // Update in-memory state first + let add_to_db = { + let mut inner = self.inner.write().await; + + let stats = inner.peers.entry(peer_id).or_insert_with(BspPeerStats::new); + + // Check if this is a new file key for this peer + let is_new_file_key = stats.add_file_key(file_key); + + // Store the score outside the borrow if we need to update the queue + let score = if is_new_file_key { + stats.get_score() + } else { + 0.0 // Default value, won't be used + }; + + // Add to priority queue if it's a new file key + if is_new_file_key { + let queue = inner + .peer_queues + .entry(file_key) + .or_insert_with(PriorityQueue::new); + queue.push(peer_id, OrderedFloat::from(score)); + } + + is_new_file_key + }; + + // Only update the database if we added a new file key + if add_to_db { + let rw_context = self.store.open_rw_context(); + + // Store the file key association using the typed API + let encodable_id = EncodablePeerId::from(peer_id); + rw_context + .db_context() + .cf(&PeerFileKeyCf::default()) + .put(&(encodable_id.clone(), file_key), &()); + + // Update peer stats + { + let inner = self.inner.read().await; + if let Some(stats) = inner.peers.get(&peer_id) { + let cf = rw_context.db_context().cf(&PeerIdCf::default()); + cf.put(&encodable_id, &PersistentBspPeerStats::from(stats)); + } + } + + // Update timestamp + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + rw_context + .access_value(&LastUpdateTimeCf::default()) + .write(&now); + + // Commit all changes + rw_context.commit(); + } + } + + /// Record a successful download + pub async fn record_success( + &self, + peer_id: PeerId, + bytes_downloaded: u64, + download_time_ms: u64, + ) { + // Update in-memory state first + let should_update_db = { + let mut inner = self.inner.write().await; + + // First, get and update the stats + let peer_exists = if let Some(stats) = inner.peers.get_mut(&peer_id) { + stats.add_success(bytes_downloaded, download_time_ms); + + // Store what we need outside the borrow + let new_score = stats.get_score(); + let file_keys: Vec = stats.file_keys.iter().cloned().collect(); + + // Now update all queues (using a separate borrow) + for file_key in file_keys { + if let Some(queue) = inner.peer_queues.get_mut(&file_key) { + queue.change_priority(&peer_id, OrderedFloat::from(new_score)); + } + } + + true + } else { + false + }; + + peer_exists + }; + + // Update the database if we updated the in-memory state + if should_update_db { + // Get the updated stats to save + let stats_data = { + let inner = self.inner.read().await; + inner.peers.get(&peer_id).map(PersistentBspPeerStats::from) + }; + + // Save the updated stats using the typed API + if let Some(stats_data) = stats_data { + let rw_context = self.store.open_rw_context(); + let encodable_id = EncodablePeerId::from(peer_id); + + // Store updated stats + rw_context + .db_context() + .cf(&PeerIdCf::default()) + .put(&encodable_id, &stats_data); + + // Update timestamp + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + rw_context + .access_value(&LastUpdateTimeCf::default()) + .write(&now); + + // Commit all changes + rw_context.commit(); + } + } + } + + /// Record a failed download attempt + pub async fn record_failure(&self, peer_id: PeerId) { + // Update in-memory state first + let should_update_db = { + let mut inner = self.inner.write().await; + + // First, get and update the stats + let peer_exists = if let Some(stats) = inner.peers.get_mut(&peer_id) { + stats.add_failure(); + + // Store what we need outside the borrow + let new_score = stats.get_score(); + let file_keys: Vec = stats.file_keys.iter().cloned().collect(); + + // Now update all queues (using a separate borrow) + for file_key in file_keys { + if let Some(queue) = inner.peer_queues.get_mut(&file_key) { + queue.change_priority(&peer_id, OrderedFloat::from(new_score)); + } + } + + true + } else { + false + }; + + peer_exists + }; + + // Update the database if we updated the in-memory state + if should_update_db { + // Get the updated stats to save + let stats_data = { + let inner = self.inner.read().await; + inner.peers.get(&peer_id).map(PersistentBspPeerStats::from) + }; + + // Save the updated stats using the typed API + if let Some(stats_data) = stats_data { + let rw_context = self.store.open_rw_context(); + let encodable_id = EncodablePeerId::from(peer_id); + + // Store updated stats + rw_context + .db_context() + .cf(&PeerIdCf::default()) + .put(&encodable_id, &stats_data); + + // Update timestamp + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64; + rw_context + .access_value(&LastUpdateTimeCf::default()) + .write(&now); + + // Commit all changes + rw_context.commit(); + } + } + } + + /// Select the best peers for downloading a specific file + /// + /// Returns a list of peers in preferred order for download attempts. + /// The selection combines the best performing peers with some random peers + /// to ensure both performance and network health. + /// + /// # Arguments + /// * `count_best` - Number of top-performing peers to select + /// * `count_random` - Number of additional random peers to select + /// * `file_key` - The file to download + pub async fn select_peers( + &self, + count_best: usize, + count_random: usize, + file_key: &H256, + ) -> Vec { + let inner = self.inner.read().await; + + let queue = match inner.peer_queues.get(file_key) { + Some(queue) => queue, + None => return Vec::new(), + }; + + let mut selected_peers = Vec::with_capacity(count_best + count_random); + let mut queue_clone = queue.clone(); + + // Get top performers first + let actual_best_count = count_best.min(queue_clone.len()); + for _ in 0..actual_best_count { + if let Some((peer_id, _)) = queue_clone.pop() { + selected_peers.push(peer_id); + } + } + + // Add some random peers for diversity + if count_random > 0 && !queue_clone.is_empty() { + let remaining_peers: Vec<_> = queue_clone + .into_iter() + .map(|(peer_id, _)| peer_id) + .collect(); + + let mut remaining_peers = remaining_peers; + remaining_peers.shuffle(&mut thread_rng()); + + let actual_random_count = count_random.min(remaining_peers.len()); + selected_peers.extend_from_slice(&remaining_peers[0..actual_random_count]); + } + + selected_peers + } +} diff --git a/node/src/services/builder.rs b/node/src/services/builder.rs index 2aba52702..78bb0cf79 100644 --- a/node/src/services/builder.rs +++ b/node/src/services/builder.rs @@ -1,4 +1,5 @@ use async_channel::Receiver; +use log::*; use sc_network::{config::IncomingRequest, service::traits::NetworkService, ProtocolName}; use sc_service::RpcHandlers; use shc_indexer_db::DbPool; @@ -19,6 +20,7 @@ use shc_rpc::StorageHubClientRpcConfig; const DEFAULT_EXTRINSIC_RETRY_TIMEOUT_SECONDS: u64 = 60; use super::{ + bsp_peer_manager::BspPeerManager, handler::{ProviderConfig, StorageHubHandler}, types::{ BspForestStorageHandlerT, BspProvider, InMemoryStorageLayer, MspForestStorageHandlerT, @@ -47,6 +49,7 @@ where extrinsic_retry_timeout: u64, indexer_db_pool: Option, notify_period: Option, + peer_manager: Option>, } /// Common components to build for any given configuration of [`ShRole`] and [`ShStorageLayer`]. @@ -66,6 +69,7 @@ where extrinsic_retry_timeout: DEFAULT_EXTRINSIC_RETRY_TIMEOUT_SECONDS, indexer_db_pool: None, notify_period: None, + peer_manager: None, } } @@ -169,6 +173,28 @@ where self } + /// Initialize the BSP peer manager for tracking peer performance + pub fn with_peer_manager(&mut self, rocks_db_path: PathBuf) -> &mut Self { + let mut peer_db_path = rocks_db_path; + peer_db_path.push("bsp_peer_manager"); + + // Create directory if it doesn't exist + if let Err(e) = std::fs::create_dir_all(&peer_db_path) { + warn!( + "Failed to create directory for BSP peer manager at {:?}: {}. Will continue without peer manager.", + peer_db_path, e + ); + return self; + } + + let manager = BspPeerManager::new(peer_db_path) + .expect("Failed to initialize BSP peer manager. This is a critical component and the node cannot function without it."); + + info!("Successfully initialized BSP peer manager"); + self.peer_manager = Some(Arc::new(manager)); + self + } + /// Create the RPC configuration needed to initialise the RPC methods of the StorageHub client. /// /// This method is meant to be called after the Storage Layer has been set up. @@ -304,6 +330,7 @@ where extrinsic_retry_timeout: self.extrinsic_retry_timeout, }, self.indexer_db_pool.clone(), + self.peer_manager.expect("Peer Manager not set"), ) } } @@ -340,6 +367,7 @@ where extrinsic_retry_timeout: self.extrinsic_retry_timeout, }, self.indexer_db_pool.clone(), + self.peer_manager.expect("Peer Manager not set"), ) } } @@ -368,14 +396,16 @@ where .as_ref() .expect("File Storage not set.") .clone(), - // Not used by the user role - <(UserRole, NoStorageLayer) as ShNodeType>::FSH::new(), - // Not used by the user role + self.forest_storage_handler + .as_ref() + .expect("Forest Storage Handler not set.") + .clone(), ProviderConfig { - capacity_config: CapacityConfig::new(0, 0), + capacity_config: self.capacity_config.expect("Capacity Config not set"), extrinsic_retry_timeout: self.extrinsic_retry_timeout, }, self.indexer_db_pool.clone(), + self.peer_manager.expect("Peer Manager not set"), ) } } diff --git a/node/src/services/handler.rs b/node/src/services/handler.rs index 9c0380109..11e71cd43 100644 --- a/node/src/services/handler.rs +++ b/node/src/services/handler.rs @@ -29,6 +29,7 @@ use shc_forest_manager::traits::ForestStorageHandler; use shc_indexer_db::DbPool; use crate::{ + services::bsp_peer_manager::BspPeerManager, services::types::{ BspForestStorageHandlerT, BspProvider, MspForestStorageHandlerT, MspProvider, ShNodeType, ShStorageLayer, UserRole, @@ -75,6 +76,8 @@ where pub provider_config: ProviderConfig, /// The indexer database pool. pub indexer_db_pool: Option, + /// The BSP peer manager for tracking peer performance. + pub peer_manager: Arc, } impl Clone for StorageHubHandler @@ -90,6 +93,7 @@ where forest_storage_handler: self.forest_storage_handler.clone(), provider_config: self.provider_config.clone(), indexer_db_pool: self.indexer_db_pool.clone(), + peer_manager: self.peer_manager.clone(), } } } @@ -106,6 +110,7 @@ where forest_storage_handler: NT::FSH, provider_config: ProviderConfig, indexer_db_pool: Option, + peer_manager: Arc, ) -> Self { Self { task_spawner, @@ -115,6 +120,7 @@ where forest_storage_handler, provider_config, indexer_db_pool, + peer_manager, } } } diff --git a/node/src/services/mod.rs b/node/src/services/mod.rs index 6b27b438c..650346470 100644 --- a/node/src/services/mod.rs +++ b/node/src/services/mod.rs @@ -1,3 +1,4 @@ +pub mod bsp_peer_manager; pub mod builder; pub mod forest_storage; pub mod handler; diff --git a/node/src/tasks/msp_move_bucket.rs b/node/src/tasks/msp_move_bucket.rs index 614ec5ab1..b47cd866f 100644 --- a/node/src/tasks/msp_move_bucket.rs +++ b/node/src/tasks/msp_move_bucket.rs @@ -1,15 +1,13 @@ use anyhow::anyhow; use codec::Decode; use futures::future::join_all; -use ordered_float::OrderedFloat; -use priority_queue::PriorityQueue; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; use std::{ - collections::{HashMap, HashSet}, + collections::HashSet, sync::{Arc, Mutex}, time::Duration, }; -use tokio::sync::{RwLock, Semaphore}; +use tokio::sync::Semaphore; use sc_network::PeerId; use sc_tracing::tracing::*; @@ -36,6 +34,7 @@ use shp_constants::FILE_CHUNK_SIZE; use shp_file_metadata::{Chunk, ChunkId, Leaf as ProvenLeaf}; use crate::services::{ + bsp_peer_manager::BspPeerManager, handler::StorageHubHandler, types::{MspForestStorageHandlerT, ShNodeType}, }; @@ -95,7 +94,6 @@ where NT::FSH: MspForestStorageHandlerT, { storage_hub_handler: StorageHubHandler, - peer_manager: Arc>, pending_bucket_id: Option, file_storage_inserted_file_keys: Vec, } @@ -108,7 +106,6 @@ where fn clone(&self) -> MspRespondMoveBucketTask { Self { storage_hub_handler: self.storage_hub_handler.clone(), - peer_manager: self.peer_manager.clone(), pending_bucket_id: self.pending_bucket_id.clone(), file_storage_inserted_file_keys: self.file_storage_inserted_file_keys.clone(), } @@ -123,7 +120,6 @@ where pub fn new(storage_hub_handler: StorageHubHandler) -> Self { Self { storage_hub_handler, - peer_manager: Arc::new(RwLock::new(BspPeerManager::new())), pending_bucket_id: None, file_storage_inserted_file_keys: Vec::new(), } @@ -237,11 +233,11 @@ where } // Register BSP peers for file transfer - { - let mut peer_manager = task.peer_manager.write().await; - for &peer_id in &bsp_peer_ids { - peer_manager.add_peer(peer_id, file_key); - } + for &peer_id in &bsp_peer_ids { + task.storage_hub_handler + .peer_manager + .add_peer(peer_id, file_key) + .await; } // Download file using existing download_file method @@ -618,7 +614,7 @@ where chunk_batch: &HashSet, peer_id: PeerId, download_request: RemoteDownloadDataResponse, - peer_manager: &Arc>, + peer_manager: &Arc, batch_size_bytes: u64, start_time: std::time::Instant, ) -> Result { @@ -628,8 +624,7 @@ where // Verify fingerprint let expected_fingerprint = file_metadata.fingerprint(); if file_key_proof.file_metadata.fingerprint() != expected_fingerprint { - let mut peer_manager = peer_manager.write().await; - peer_manager.record_failure(peer_id); + peer_manager.record_failure(peer_id).await; return Err(anyhow!( "Fingerprint mismatch. Expected: {:?}, got: {:?}", expected_fingerprint, @@ -642,8 +637,7 @@ where .map_err(|e| anyhow!("Failed to get proven data: {:?}", e))?; if proven.len() != chunk_batch.len() { - let mut peer_manager = peer_manager.write().await; - peer_manager.record_failure(peer_id); + peer_manager.record_failure(peer_id).await; return Err(anyhow!( "Expected {} proven chunks but got {}", chunk_batch.len(), @@ -658,8 +652,9 @@ where } let download_time = start_time.elapsed(); - let mut peer_manager = peer_manager.write().await; - peer_manager.record_success(peer_id, batch_size_bytes, download_time.as_millis() as u64); + peer_manager + .record_success(peer_id, batch_size_bytes, download_time.as_millis() as u64) + .await; Ok(true) } @@ -705,7 +700,7 @@ where file_metadata: &FileMetadata, chunk_batch: &HashSet, bucket: &BucketId, - peer_manager: &Arc>, + peer_manager: &Arc, batch_size_bytes: u64, ) -> Result { for attempt in 0..=DOWNLOAD_RETRY_ATTEMPTS { @@ -753,8 +748,7 @@ where continue; } Err(e) => { - let mut peer_manager = peer_manager.write().await; - peer_manager.record_failure(peer_id); + peer_manager.record_failure(peer_id).await; return Err(e); } } @@ -770,8 +764,7 @@ where continue; } Err(e) => { - let mut peer_manager = peer_manager.write().await; - peer_manager.record_failure(peer_id); + peer_manager.record_failure(peer_id).await; return Err(anyhow!( "Download request failed after {} attempts to peer {:?}: {:?}", DOWNLOAD_RETRY_ATTEMPTS + 1, @@ -829,7 +822,7 @@ where // Create semaphore for chunk-level parallelism let chunk_semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CHUNKS_PER_FILE)); - let peer_manager = Arc::clone(&self.peer_manager); + let peer_manager = Arc::clone(&self.storage_hub_handler.peer_manager); let chunk_tasks: Vec<_> = (0..chunks_count) .step_by(MAX_CHUNKS_PER_REQUEST) @@ -850,19 +843,13 @@ where let batch_size_bytes = chunk_batch.len() as u64 * FILE_CHUNK_SIZE as u64; // Get the best performing peers for this request and shuffle them - let selected_peers = { - let peer_manager = peer_manager.read().await; - let mut peers = peer_manager.select_peers( - 2, - CHUNK_REQUEST_PEER_RETRY_ATTEMPTS - 2, - &file_key, - ); - peers.shuffle(&mut *GLOBAL_RNG.lock().unwrap()); - peers - }; + let mut peers = peer_manager + .select_peers(2, CHUNK_REQUEST_PEER_RETRY_ATTEMPTS - 2, &file_key) + .await; + peers.shuffle(&mut *GLOBAL_RNG.lock().unwrap()); // Try each selected peer - for peer_id in selected_peers { + for peer_id in peers { match task .try_download_chunk_batch( peer_id, @@ -939,224 +926,3 @@ where Ok(()) } } - -/// Tracks performance metrics for a BSP peer. -/// This struct is used to track the performance metrics for a BSP peer. -/// It is used to select the best performing peers for a given file. -#[derive(Debug, Clone)] -struct BspPeerStats { - /// The number of successful downloads for the peer - pub successful_downloads: u64, - - /// The number of failed downloads for the peer - pub failed_downloads: u64, - - /// The total number of bytes downloaded for the peer - pub total_bytes_downloaded: u64, - - /// The total download time for the peer - pub total_download_time_ms: u64, - - /// The time of the last successful download for the peer - pub last_success_time: Option, - - /// The set of file keys that the peer can provide. This is used to - /// update the right priority queue in [`BspPeerManager::peer_queues`]. - pub file_keys: HashSet, -} - -impl BspPeerStats { - fn new() -> Self { - Self { - successful_downloads: 0, - failed_downloads: 0, - total_bytes_downloaded: 0, - total_download_time_ms: 0, - last_success_time: None, - file_keys: HashSet::new(), - } - } - - fn record_success(&mut self, bytes_downloaded: u64, download_time_ms: u64) { - self.successful_downloads += 1; - self.total_bytes_downloaded += bytes_downloaded; - self.total_download_time_ms += download_time_ms; - self.last_success_time = Some(std::time::Instant::now()); - } - - fn record_failure(&mut self) { - self.failed_downloads += 1; - } - - fn get_success_rate(&self) -> f64 { - let total = self.successful_downloads + self.failed_downloads; - if total == 0 { - return 0.0; - } - self.successful_downloads as f64 / total as f64 - } - - fn get_average_speed_bytes_per_sec(&self) -> f64 { - if self.total_download_time_ms == 0 { - return 0.0; - } - (self.total_bytes_downloaded as f64 * 1000.0) / self.total_download_time_ms as f64 - } - - /// Calculates a score for the peer based on its success rate and average speed - /// - /// The score is a weighted combination of the peer's success rate and average speed. - /// The success rate is weighted more heavily (70%) compared to the average speed (30%). - fn get_score(&self) -> f64 { - // Combine success rate and speed into a single score - // Weight success rate more heavily (70%) compared to speed (30%) - let success_weight = 0.7; - let speed_weight = 0.3; - - let success_score = self.get_success_rate(); - let speed_score = if self.successful_downloads == 0 { - 0.0 - } else { - // Normalize speed score between 0 and 1 - // Using 30MB/s as a reference for max speed - let max_speed = 50.0 * 1024.0 * 1024.0; - (self.get_average_speed_bytes_per_sec() / max_speed).min(1.0) - }; - - (success_score * success_weight) + (speed_score * speed_weight) - } - - /// Register that this peer is requested for a file key - fn add_file_key(&mut self, file_key: H256) { - self.file_keys.insert(file_key); - } -} - -/// Manages BSP peer selection and performance tracking -/// -/// This struct maintains performance metrics for each peer and provides improved peer selection -/// based on historical performance. It uses a priority queue system to rank peers based on their -/// performance scores, which are calculated using a weighted combination of success rate (70%) and -/// download speed (30%). -/// -/// # Peer Selection Strategy -/// - Selects a mix of best-performing peers and random peers for each request -/// - Uses priority queues to maintain peer rankings per file -/// - Implements a hybrid selection approach: -/// - Best performers: Selected based on weighted scoring -/// - Random selection: Ensures network diversity and prevents starvation -/// -/// # Performance Tracking -/// - Tracks success/failure rates -/// - Monitors download speeds -/// - Records total bytes transferred -#[derive(Debug)] -struct BspPeerManager { - peers: HashMap, - // Map from file_key to a priority queue of peers sorted by score - peer_queues: HashMap>>, -} - -impl BspPeerManager { - /// Creates a new BspPeerManager with empty peer stats and queues - fn new() -> Self { - Self { - peers: HashMap::new(), - peer_queues: HashMap::new(), - } - } - - /// Registers a new peer for a specific file and initializes its performance tracking - fn add_peer(&mut self, peer_id: PeerId, file_key: H256) { - let stats = self - .peers - .entry(peer_id) - .or_insert_with(|| BspPeerStats::new()); - stats.add_file_key(file_key); - - // Add to the priority queue for this file key - let queue = self - .peer_queues - .entry(file_key) - .or_insert_with(PriorityQueue::new); - queue.push(peer_id, OrderedFloat::from(stats.get_score())); - } - - /// Records a successful download attempt and updates peer's performance metrics - fn record_success(&mut self, peer_id: PeerId, bytes_downloaded: u64, download_time_ms: u64) { - if let Some(stats) = self.peers.get_mut(&peer_id) { - stats.record_success(bytes_downloaded, download_time_ms); - let new_score = stats.get_score(); - - // Update scores in all queues containing this peer - for file_key in stats.file_keys.iter() { - if let Some(queue) = self.peer_queues.get_mut(file_key) { - queue.change_priority(&peer_id, OrderedFloat::from(new_score)); - } - } - } - } - - /// Records a failed download attempt and updates peer's performance metrics - fn record_failure(&mut self, peer_id: PeerId) { - if let Some(stats) = self.peers.get_mut(&peer_id) { - stats.record_failure(); - let new_score = stats.get_score(); - - // Update scores in all queues containing this peer - for file_key in stats.file_keys.iter() { - if let Some(queue) = self.peer_queues.get_mut(file_key) { - queue.change_priority(&peer_id, OrderedFloat::from(new_score)); - } - } - } - } - - /// Selects a list of peers for downloading chunks of a specific file - /// - /// # Arguments - /// - `count_best` - Number of top-performing peers to select based on scores - /// - `count_random` - Number of additional random peers to select for diversity - /// - `file_key` - The file key for which peers are being selected - /// - /// # Selection Strategy - /// 1. First selects the top `count_best` peers based on their performance scores - /// 2. Then randomly selects `count_random` additional peers from the remaining pool - /// 3. Returns a combined list of selected peers in a randomized order - /// - /// This hybrid approach ensures both performance (by selecting proven peers) and - /// network health (by giving chances to other peers through random selection). - fn select_peers(&self, count_best: usize, count_random: usize, file_key: &H256) -> Vec { - let queue = match self.peer_queues.get(file_key) { - Some(queue) => queue, - None => return Vec::new(), - }; - - let mut selected_peers = Vec::with_capacity(count_best + count_random); - let mut queue_clone = queue.clone(); - - // Extract top count_best peers - let actual_best_count = count_best.min(queue_clone.len()); - for _ in 0..actual_best_count { - if let Some((peer_id, _)) = queue_clone.pop() { - selected_peers.push(peer_id); - } - } - - // Randomly select additional peers from the remaining pool - if count_random > 0 && !queue_clone.is_empty() { - use rand::seq::SliceRandom; - let remaining_peers: Vec<_> = queue_clone - .into_iter() - .map(|(peer_id, _)| peer_id) - .collect(); - let mut remaining_peers = remaining_peers; - remaining_peers.shuffle(&mut *GLOBAL_RNG.lock().unwrap()); - - let actual_random_count = count_random.min(remaining_peers.len()); - selected_peers.extend(remaining_peers.iter().take(actual_random_count)); - } - - selected_peers - } -} diff --git a/test/suites/integration/msp/move-bucket.test.ts b/test/suites/integration/msp/move-bucket.test.ts index 1edf31e83..73b4440da 100644 --- a/test/suites/integration/msp/move-bucket.test.ts +++ b/test/suites/integration/msp/move-bucket.test.ts @@ -42,7 +42,7 @@ describeMspNet( } }); - it("postgres DB is ready", async () => { + it.only("postgres DB is ready", async () => { await userApi.docker.waitForLog({ containerName: "docker-sh-postgres-1", searchString: "database system is ready to accept connections", @@ -50,7 +50,7 @@ describeMspNet( }); }); - it("Network launches and can be queried", async () => { + it.only("Network launches and can be queried", async () => { const userNodePeerId = await userApi.rpc.system.localPeerId(); strictEqual(userNodePeerId.toString(), userApi.shConsts.NODE_INFOS.user.expectedPeerId); @@ -58,7 +58,7 @@ describeMspNet( strictEqual(mspNodePeerId.toString(), userApi.shConsts.NODE_INFOS.msp1.expectedPeerId); }); - it("Add 2 more BSPs (3 total) and set the replication target to 2", async () => { + it.only("Add 2 more BSPs (3 total) and set the replication target to 2", async () => { // Replicate to 2 BSPs, 5 blocks to maxthreshold const maxReplicationTargetRuntimeParameter = { RuntimeConfig: { @@ -104,7 +104,7 @@ describeMspNet( }); }); - it("User submits 3 storage requests in the same bucket for first MSP", async () => { + it.only("User submits 3 storage requests in the same bucket for first MSP", async () => { // Get value propositions form the MSP to use, and use the first one (can be any). const valueProps = await userApi.call.storageProvidersApi.queryValuePropositionsForMsp( userApi.shConsts.DUMMY_MSP_ID @@ -150,7 +150,7 @@ describeMspNet( await userApi.block.seal({ calls: txs, signer: shUser }); }); - it("MSP 1 receives files from user and accepts them", async () => { + it.only("MSP 1 receives files from user and accepts them", async () => { const originalRoot = await msp1Api.rpc.storagehubclient.getForestRoot(bucketId); // Get the events of the storage requests to extract the file keys and check @@ -307,7 +307,7 @@ describeMspNet( } }); - it("User moves bucket to second MSP", async () => { + it.only("User moves bucket to second MSP", async () => { // Get the value propositions of the second MSP to use, and use the first one (can be any). const valueProps = await userApi.call.storageProvidersApi.queryValuePropositionsForMsp( userApi.shConsts.DUMMY_MSP_ID_2 From c7e1b111faf70a351df98b1def741c2f2e1e7577 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Wed, 5 Mar 2025 16:41:53 +0200 Subject: [PATCH 05/11] add FileDownloadManager; move limits and rate-limit semaphores to it --- node/src/services/file_download_manager.rs | 99 ++++++++++++++++++++++ node/src/services/handler.rs | 15 +++- node/src/services/mod.rs | 1 + node/src/tasks/msp_move_bucket.rs | 68 +++++++++------ 4 files changed, 153 insertions(+), 30 deletions(-) create mode 100644 node/src/services/file_download_manager.rs diff --git a/node/src/services/file_download_manager.rs b/node/src/services/file_download_manager.rs new file mode 100644 index 000000000..bdb2dd5a2 --- /dev/null +++ b/node/src/services/file_download_manager.rs @@ -0,0 +1,99 @@ +use std::sync::Arc; +use tokio::sync::Semaphore; + +/// Constants for file download rate-limiting +pub struct FileDownloadLimits { + /// Maximum number of files to download in parallel + pub max_concurrent_file_downloads: usize, + /// Maximum number of chunks requests to do in parallel per file + pub max_concurrent_chunks_per_file: usize, + /// Maximum number of chunks to request in a single network request + pub max_chunks_per_request: usize, + /// Number of peers to select for each chunk download attempt + pub chunk_request_peer_retry_attempts: usize, + /// Number of retries per peer for a single chunk request + pub download_retry_attempts: usize, +} + +impl Default for FileDownloadLimits { + fn default() -> Self { + Self { + max_concurrent_file_downloads: 10, + max_concurrent_chunks_per_file: 5, + max_chunks_per_request: 10, + chunk_request_peer_retry_attempts: 5, + download_retry_attempts: 2, + } + } +} + +/// Manages file downloads with rate limiting +pub struct FileDownloadManager { + /// Configuration for download limits + limits: FileDownloadLimits, + /// Semaphore for controlling file-level parallelism + file_semaphore: Arc, +} + +impl FileDownloadManager { + /// Creates a new FileDownloadManager with default limits + pub fn new() -> Self { + Self::with_limits(FileDownloadLimits::default()) + } + + /// Creates a new FileDownloadManager with custom limits + pub fn with_limits(limits: FileDownloadLimits) -> Self { + let file_semaphore = Arc::new(Semaphore::new(limits.max_concurrent_file_downloads)); + + Self { + limits, + file_semaphore, + } + } + + /// Get a reference to the file semaphore for file-level parallelism + pub fn file_semaphore(&self) -> Arc { + Arc::clone(&self.file_semaphore) + } + + /// Create a new chunk semaphore for chunk-level parallelism within a file + pub fn new_chunk_semaphore(&self) -> Arc { + Arc::new(Semaphore::new(self.limits.max_concurrent_chunks_per_file)) + } + + /// Get the maximum number of chunks to request in a single batch + pub fn max_chunks_per_request(&self) -> usize { + self.limits.max_chunks_per_request + } + + /// Get the number of peer retry attempts for chunk downloads + pub fn chunk_request_peer_retry_attempts(&self) -> usize { + self.limits.chunk_request_peer_retry_attempts + } + + /// Get the number of download retry attempts per peer + pub fn download_retry_attempts(&self) -> usize { + self.limits.download_retry_attempts + } +} + +impl Clone for FileDownloadManager { + fn clone(&self) -> Self { + Self { + limits: self.limits.clone(), + file_semaphore: Arc::clone(&self.file_semaphore), + } + } +} + +impl Clone for FileDownloadLimits { + fn clone(&self) -> Self { + Self { + max_concurrent_file_downloads: self.max_concurrent_file_downloads, + max_concurrent_chunks_per_file: self.max_concurrent_chunks_per_file, + max_chunks_per_request: self.max_chunks_per_request, + chunk_request_peer_retry_attempts: self.chunk_request_peer_retry_attempts, + download_retry_attempts: self.download_retry_attempts, + } + } +} diff --git a/node/src/services/handler.rs b/node/src/services/handler.rs index 11e71cd43..8c375a85d 100644 --- a/node/src/services/handler.rs +++ b/node/src/services/handler.rs @@ -29,10 +29,13 @@ use shc_forest_manager::traits::ForestStorageHandler; use shc_indexer_db::DbPool; use crate::{ - services::bsp_peer_manager::BspPeerManager, - services::types::{ - BspForestStorageHandlerT, BspProvider, MspForestStorageHandlerT, MspProvider, ShNodeType, - ShStorageLayer, UserRole, + services::{ + bsp_peer_manager::BspPeerManager, + file_download_manager::FileDownloadManager, + types::{ + BspForestStorageHandlerT, BspProvider, MspForestStorageHandlerT, MspProvider, + ShNodeType, ShStorageLayer, UserRole, + }, }, tasks::{ bsp_charge_fees::BspChargeFeesTask, bsp_delete_file::BspDeleteFileTask, @@ -78,6 +81,8 @@ where pub indexer_db_pool: Option, /// The BSP peer manager for tracking peer performance. pub peer_manager: Arc, + /// The file download manager for rate-limiting downloads. + pub file_download_manager: Arc, } impl Clone for StorageHubHandler @@ -94,6 +99,7 @@ where provider_config: self.provider_config.clone(), indexer_db_pool: self.indexer_db_pool.clone(), peer_manager: self.peer_manager.clone(), + file_download_manager: self.file_download_manager.clone(), } } } @@ -121,6 +127,7 @@ where provider_config, indexer_db_pool, peer_manager, + file_download_manager: Arc::new(FileDownloadManager::new()), } } } diff --git a/node/src/services/mod.rs b/node/src/services/mod.rs index 650346470..4ed8f4790 100644 --- a/node/src/services/mod.rs +++ b/node/src/services/mod.rs @@ -1,5 +1,6 @@ pub mod bsp_peer_manager; pub mod builder; +pub mod file_download_manager; pub mod forest_storage; pub mod handler; pub mod types; diff --git a/node/src/tasks/msp_move_bucket.rs b/node/src/tasks/msp_move_bucket.rs index b47cd866f..099f513a0 100644 --- a/node/src/tasks/msp_move_bucket.rs +++ b/node/src/tasks/msp_move_bucket.rs @@ -7,7 +7,6 @@ use std::{ sync::{Arc, Mutex}, time::Duration, }; -use tokio::sync::Semaphore; use sc_network::PeerId; use sc_tracing::tracing::*; @@ -45,17 +44,6 @@ lazy_static::lazy_static! { const LOG_TARGET: &str = "msp-move-bucket-task"; -/// Maximum number of files to download in parallel -const MAX_CONCURRENT_FILE_DOWNLOADS: usize = 10; -/// Maximum number of chunks requests to do in parallel per file -const MAX_CONCURRENT_CHUNKS_PER_FILE: usize = 5; -/// Maximum number of chunks to request in a single network request -const MAX_CHUNKS_PER_REQUEST: usize = 10; -/// Number of peers to select for each chunk download attempt (2 best + 3 random) -const CHUNK_REQUEST_PEER_RETRY_ATTEMPTS: usize = 5; -/// Number of retries per peer for a single chunk request -const DOWNLOAD_RETRY_ATTEMPTS: usize = 2; - /// [`MspRespondMoveBucketTask`] handles bucket move requests between MSPs. /// /// # Event Handling @@ -195,8 +183,11 @@ where .get_or_create(&bucket) .await; - // Create semaphore for file-level parallelism - let file_semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_FILE_DOWNLOADS)); + // Get the file semaphore from the download manager + let file_semaphore = self + .storage_hub_handler + .file_download_manager + .file_semaphore(); let file_tasks: Vec<_> = files .into_iter() .map(|file| { @@ -703,14 +694,21 @@ where peer_manager: &Arc, batch_size_bytes: u64, ) -> Result { - for attempt in 0..=DOWNLOAD_RETRY_ATTEMPTS { + // Get retry attempts from the FileDownloadManager + let download_retry_attempts = self + .storage_hub_handler + .file_download_manager + .download_retry_attempts(); + + // Retry the download up to the configured number of times + for attempt in 0..=download_retry_attempts { if attempt > 0 { warn!( target: LOG_TARGET, "Retrying download with peer {:?} (attempt {}/{})", peer_id, attempt + 1, - DOWNLOAD_RETRY_ATTEMPTS + 1 + download_retry_attempts + 1 ); } @@ -737,7 +735,7 @@ where .await { Ok(success) => return Ok(success), - Err(e) if attempt < DOWNLOAD_RETRY_ATTEMPTS => { + Err(e) if attempt < download_retry_attempts => { warn!( target: LOG_TARGET, "Download attempt {} failed for peer {:?}: {:?}", @@ -753,7 +751,7 @@ where } } } - Err(e) if attempt < DOWNLOAD_RETRY_ATTEMPTS => { + Err(e) if attempt < download_retry_attempts => { warn!( target: LOG_TARGET, "Download attempt {} failed for peer {:?}: {:?}", @@ -767,7 +765,7 @@ where peer_manager.record_failure(peer_id).await; return Err(anyhow!( "Download request failed after {} attempts to peer {:?}: {:?}", - DOWNLOAD_RETRY_ATTEMPTS + 1, + download_retry_attempts + 1, peer_id, e )); @@ -779,8 +777,12 @@ where } /// Creates a batch of chunk IDs to request together - fn create_chunk_batch(chunk_start: u64, chunks_count: u64) -> HashSet { - let chunk_end = std::cmp::min(chunk_start + (MAX_CHUNKS_PER_REQUEST as u64), chunks_count); + fn create_chunk_batch( + chunk_start: u64, + chunks_count: u64, + max_chunks_per_request: usize, + ) -> HashSet { + let chunk_end = std::cmp::min(chunk_start + (max_chunks_per_request as u64), chunks_count); (chunk_start..chunk_end).map(ChunkId::new).collect() } @@ -820,12 +822,16 @@ where "MSP: downloading file {:?}", file_key, ); - // Create semaphore for chunk-level parallelism - let chunk_semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_CHUNKS_PER_FILE)); + // Get a new chunk semaphore from the download manager + let chunk_semaphore = self + .storage_hub_handler + .file_download_manager + .new_chunk_semaphore(); let peer_manager = Arc::clone(&self.storage_hub_handler.peer_manager); + let download_manager = self.storage_hub_handler.file_download_manager.clone(); let chunk_tasks: Vec<_> = (0..chunks_count) - .step_by(MAX_CHUNKS_PER_REQUEST) + .step_by(download_manager.max_chunks_per_request()) .map(|chunk_start| { let semaphore = Arc::clone(&chunk_semaphore); let task = self.clone(); @@ -839,12 +845,22 @@ where .await .map_err(|e| anyhow!("Failed to acquire chunk semaphore: {:?}", e))?; - let chunk_batch = Self::create_chunk_batch(chunk_start, chunks_count); + let chunk_batch = Self::create_chunk_batch( + chunk_start, + chunks_count, + task.storage_hub_handler + .file_download_manager + .max_chunks_per_request(), + ); let batch_size_bytes = chunk_batch.len() as u64 * FILE_CHUNK_SIZE as u64; // Get the best performing peers for this request and shuffle them + let peer_retry_attempts = task + .storage_hub_handler + .file_download_manager + .chunk_request_peer_retry_attempts(); let mut peers = peer_manager - .select_peers(2, CHUNK_REQUEST_PEER_RETRY_ATTEMPTS - 2, &file_key) + .select_peers(2, peer_retry_attempts - 2, &file_key) .await; peers.shuffle(&mut *GLOBAL_RNG.lock().unwrap()); From 9295f9d96b8dcae5630bea48301abe835c90e5ae Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Thu, 6 Mar 2025 01:25:37 +0200 Subject: [PATCH 06/11] move download logic into FileDownloadManager --- node/src/services/file_download_manager.rs | 488 +++++++++++++++++++-- node/src/services/handler.rs | 5 +- node/src/tasks/msp_move_bucket.rs | 448 ++----------------- 3 files changed, 502 insertions(+), 439 deletions(-) diff --git a/node/src/services/file_download_manager.rs b/node/src/services/file_download_manager.rs index bdb2dd5a2..eda5cd6bd 100644 --- a/node/src/services/file_download_manager.rs +++ b/node/src/services/file_download_manager.rs @@ -1,7 +1,37 @@ +use anyhow::{anyhow, Result}; +use codec::Decode; +use futures::future::join_all; +use log::*; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use sc_network::PeerId; +use shc_common::types::{ + BucketId, Chunk, ChunkId, FileKeyProof, FileMetadata, HashT, Proven, + StorageProofsMerkleTrieLayout, +}; +use shc_file_manager::traits::FileStorage; +use shc_file_transfer_service::{ + commands::FileTransferServiceInterface, schema::v1::provider::RemoteDownloadDataResponse, +}; +use sp_core::H256; +use std::collections::HashSet; use std::sync::Arc; -use tokio::sync::Semaphore; +use std::time::Duration; +use tokio::sync::{RwLock, Semaphore}; -/// Constants for file download rate-limiting +use super::bsp_peer_manager::BspPeerManager; + +const LOG_TARGET: &str = "file_download_manager"; + +/// Constants for file download and operation rate-limiting +const MAX_CONCURRENT_FILE_DOWNLOADS: usize = 10; +const MAX_CONCURRENT_CHUNKS_PER_FILE: usize = 5; +const MAX_CHUNKS_PER_REQUEST: usize = 10; +const CHUNK_REQUEST_PEER_RETRY_ATTEMPTS: usize = 5; +const DOWNLOAD_RETRY_ATTEMPTS: usize = 2; +const BEST_PEERS_TO_SELECT: usize = 2; +const RANDOM_PEERS_TO_SELECT: usize = 3; + +/// Configuration for file download limits and parallelism settings pub struct FileDownloadLimits { /// Maximum number of files to download in parallel pub max_concurrent_file_downloads: usize, @@ -13,41 +43,89 @@ pub struct FileDownloadLimits { pub chunk_request_peer_retry_attempts: usize, /// Number of retries per peer for a single chunk request pub download_retry_attempts: usize, + /// Number of best performing peers to select + pub best_peers_to_select: usize, + /// Number of random peers to select + pub random_peers_to_select: usize, } impl Default for FileDownloadLimits { fn default() -> Self { Self { - max_concurrent_file_downloads: 10, - max_concurrent_chunks_per_file: 5, - max_chunks_per_request: 10, - chunk_request_peer_retry_attempts: 5, - download_retry_attempts: 2, + max_concurrent_file_downloads: MAX_CONCURRENT_FILE_DOWNLOADS, + max_concurrent_chunks_per_file: MAX_CONCURRENT_CHUNKS_PER_FILE, + max_chunks_per_request: MAX_CHUNKS_PER_REQUEST, + chunk_request_peer_retry_attempts: CHUNK_REQUEST_PEER_RETRY_ATTEMPTS, + download_retry_attempts: DOWNLOAD_RETRY_ATTEMPTS, + best_peers_to_select: BEST_PEERS_TO_SELECT, + random_peers_to_select: RANDOM_PEERS_TO_SELECT, } } } -/// Manages file downloads with rate limiting +impl Clone for FileDownloadLimits { + fn clone(&self) -> Self { + Self { + max_concurrent_file_downloads: self.max_concurrent_file_downloads, + max_concurrent_chunks_per_file: self.max_concurrent_chunks_per_file, + max_chunks_per_request: self.max_chunks_per_request, + chunk_request_peer_retry_attempts: self.chunk_request_peer_retry_attempts, + download_retry_attempts: self.download_retry_attempts, + best_peers_to_select: self.best_peers_to_select, + random_peers_to_select: self.random_peers_to_select, + } + } +} + +/// Manages file downloads and operations with rate limiting +/// +/// # Parallelism Implementation +/// The download process uses a multi-level parallelism approach: +/// +/// 1. File-Level Parallelism: +/// - Multiple files can be downloaded simultaneously +/// - Controlled by a top-level semaphore to prevent system overload +/// +/// 2. Chunk-Level Parallelism: +/// - For each file, multiple chunk batches can be downloaded in parallel +/// - Each chunk download is managed by a separate task +/// - Chunk downloads are batched (multiple chunks per request) for efficiency +/// +/// 3. Peer Selection and Retry Strategy: +/// - For each chunk batch: +/// - Selects peers (best performing + random) +/// - Tries each selected peer multiple times +/// - First successful download stops the retry process pub struct FileDownloadManager { /// Configuration for download limits - limits: FileDownloadLimits, + pub limits: FileDownloadLimits, /// Semaphore for controlling file-level parallelism file_semaphore: Arc, + /// BSP peer manager for tracking and selecting peers + peer_manager: Arc, } impl FileDownloadManager { - /// Creates a new FileDownloadManager with default limits - pub fn new() -> Self { - Self::with_limits(FileDownloadLimits::default()) + /// Create a new FileDownloadManager with default limits + /// + /// # Arguments + /// * `peer_manager` - The peer manager to use for peer selection and tracking + pub fn new(peer_manager: Arc) -> Self { + Self::with_limits(FileDownloadLimits::default(), peer_manager) } - /// Creates a new FileDownloadManager with custom limits - pub fn with_limits(limits: FileDownloadLimits) -> Self { + /// Create a new FileDownloadManager with specified limits + /// + /// # Arguments + /// * `limits` - The download limits to use + /// * `peer_manager` - The peer manager to use for peer selection and tracking + pub fn with_limits(limits: FileDownloadLimits, peer_manager: Arc) -> Self { let file_semaphore = Arc::new(Semaphore::new(limits.max_concurrent_file_downloads)); Self { limits, file_semaphore, + peer_manager, } } @@ -61,39 +139,379 @@ impl FileDownloadManager { Arc::new(Semaphore::new(self.limits.max_concurrent_chunks_per_file)) } - /// Get the maximum number of chunks to request in a single batch - pub fn max_chunks_per_request(&self) -> usize { - self.limits.max_chunks_per_request + /// Create a batch of chunks to download + pub fn create_chunk_batch(&self, chunk_start: u64, chunks_count: u64) -> HashSet { + let chunk_end = std::cmp::min( + chunk_start + (self.limits.max_chunks_per_request as u64), + chunks_count, + ); + (chunk_start..chunk_end).map(ChunkId::new).collect() } - /// Get the number of peer retry attempts for chunk downloads - pub fn chunk_request_peer_retry_attempts(&self) -> usize { - self.limits.chunk_request_peer_retry_attempts + /// Process a single proven chunk, writing it to file storage + async fn process_proven_chunk( + &self, + file_key: H256, + proven_chunk: Proven, + file_storage: &mut FS, + ) -> Result<()> + where + FS: FileStorage + Send + Sync, + { + // Handle the proven chunk based on its variant + match proven_chunk { + Proven::ExactKey(leaf) => { + let chunk_id = leaf.key; + let chunk_data = leaf.data; + let chunk_idx = chunk_id.as_u64(); + + // Chunk size has already been validated in process_chunk_download_response + file_storage + .write_chunk(&file_key, &chunk_id, &chunk_data) + .map_err(|error| anyhow!("Failed to write chunk {}: {:?}", chunk_idx, error))?; + + Ok(()) + } + unexpected => { + warn!( + target: LOG_TARGET, + "Unexpected Proven variant encountered: {:?}", unexpected + ); + Err(anyhow!( + "Unexpected Proven variant: only ExactKey is supported for file chunks" + )) + } + } } - /// Get the number of download retry attempts per peer - pub fn download_retry_attempts(&self) -> usize { - self.limits.download_retry_attempts + /// Extract proven chunks from a download response + fn extract_chunks_from_response( + response: &RemoteDownloadDataResponse, + ) -> Result>> { + // Access the file_key_proof field + let file_key_proof_bytes = &response.file_key_proof; + + // Decode the file key proof + let file_key_proof = FileKeyProof::decode(&mut file_key_proof_bytes.as_slice()) + .map_err(|e| anyhow!("Failed to decode FileKeyProof: {:?}", e))?; + + // Extract proven chunks from the proof using the correct layout type + let proven_leaves = file_key_proof + .proven::() + .map_err(|e| anyhow!("Failed to extract chunks from proof: {:?}", e))?; + + // Convert Leaf to Proven + let proven_chunks = proven_leaves + .into_iter() + .map(|leaf| Proven::new_exact_key(leaf.key, leaf.data)) + .collect(); + + Ok(proven_chunks) } -} -impl Clone for FileDownloadManager { - fn clone(&self) -> Self { - Self { - limits: self.limits.clone(), - file_semaphore: Arc::clone(&self.file_semaphore), + /// Process the response from a chunk download request + async fn process_chunk_download_response( + &self, + file_key: H256, + file_metadata: &FileMetadata, + chunk_batch: &HashSet, + peer_id: PeerId, + download_request: RemoteDownloadDataResponse, + start_time: std::time::Instant, + file_storage: &mut FS, + ) -> Result + where + FS: FileStorage + Send + Sync, + { + let elapsed = start_time.elapsed(); + + let chunks = Self::extract_chunks_from_response(&download_request)?; + + if chunks.is_empty() { + return Err(anyhow!("No chunks in response from peer {:?}", peer_id)); + } + + let mut total_bytes = 0; + let mut processed_chunks = 0; + + for proven_chunk in chunks { + // Validate the chunk before processing it + if let Proven::ExactKey(leaf) = &proven_chunk { + let chunk_id = &leaf.key; + let chunk_data = &leaf.data; + let chunk_idx = chunk_id.as_u64(); + + // Validate chunk size using is_valid_chunk_size + if !file_metadata.is_valid_chunk_size(chunk_idx, chunk_data.len()) { + let expected_size = file_metadata.chunk_size_at(chunk_idx); + return Err(anyhow!( + "Invalid chunk size for chunk {}: Expected: {}, got: {}", + chunk_idx, + expected_size, + chunk_data.len() + )); + } + + total_bytes += chunk_data.len(); + + // Only process chunks that were requested + if !chunk_batch.contains(chunk_id) { + warn!( + target: LOG_TARGET, + "Received chunk {:?} that was not requested from peer {:?}", chunk_id, peer_id + ); + continue; + } + } else { + warn!( + target: LOG_TARGET, + "Unexpected Proven variant encountered: {:?}", proven_chunk + ); + continue; + } + + self.process_proven_chunk(file_key, proven_chunk, file_storage) + .await?; + processed_chunks += 1; + } + + info!( + target: LOG_TARGET, + "Downloaded {} chunks ({} bytes) from peer {:?} in {:?} ({:.2} MB/s)", + processed_chunks, + total_bytes, + peer_id, + elapsed, + (total_bytes as f64 / 1024.0 / 1024.0) / elapsed.as_secs_f64() + ); + + self.peer_manager + .record_success(peer_id, total_bytes as u64, elapsed.as_millis() as u64) + .await; + Ok(true) + } + + /// Attempts to download a batch of chunks from a specific peer with retries + pub async fn try_download_chunk_batch( + &self, + peer_id: PeerId, + file_key: H256, + file_metadata: &FileMetadata, + chunk_batch: &HashSet, + bucket: &BucketId, + file_transfer: &FT, + file_storage: &mut FS, + ) -> Result + where + FT: FileTransferServiceInterface + Send + Sync, + FS: FileStorage + Send + Sync, + { + // Retry the download up to the configured number of times + for attempt in 0..=self.limits.download_retry_attempts { + if attempt > 0 { + warn!( + target: LOG_TARGET, + "Retrying download from peer {:?} (attempt {}/{})", + peer_id, + attempt + 1, + self.limits.download_retry_attempts + 1 + ); + } + + let start_time = std::time::Instant::now(); + + match file_transfer + .download_request(peer_id, file_key.into(), chunk_batch.clone(), Some(*bucket)) + .await + { + Ok(response) => { + return self + .process_chunk_download_response( + file_key, + file_metadata, + chunk_batch, + peer_id, + response, + start_time, + file_storage, + ) + .await; + } + Err(e) => { + warn!( + target: LOG_TARGET, + "Download attempt {} failed with peer {:?}: {:?}", + attempt + 1, + peer_id, + e + ); + + if attempt == self.limits.download_retry_attempts { + self.peer_manager.record_failure(peer_id).await; + return Err(anyhow!( + "Failed to download after {} attempts: {:?}", + attempt + 1, + e + )); + } + } + } + + // Delay before retry + tokio::time::sleep(Duration::from_millis(500)).await; + } + + // This should not be reachable due to the return in the loop + Err(anyhow!("Failed to download chunk batch after all attempts")) + } + + /// Downloads a file by breaking it into chunk batches and downloading them in parallel + pub async fn download_file( + &self, + file_metadata: FileMetadata, + bucket: BucketId, + file_transfer: FT, + file_storage: Arc>, + ) -> Result<()> + where + FT: FileTransferServiceInterface + Send + Sync + Clone + 'static, + FS: FileStorage + Send + Sync + 'static, + { + // Acquire the file semaphore permit + let semaphore = self.file_semaphore(); + let _permit = semaphore + .acquire() + .await + .map_err(|e| anyhow!("Failed to acquire file semaphore: {:?}", e))?; + + let file_key = file_metadata.file_key::>(); + let chunks_count = file_metadata.chunks_count(); + + info!( + target: LOG_TARGET, + "Downloading file {:?} with {} chunks", file_key, chunks_count + ); + + // Create a new chunk semaphore for this file + let chunk_semaphore = self.new_chunk_semaphore(); + let manager = self.clone(); + + // Create tasks for chunk batches + let chunk_tasks: Vec<_> = (0..chunks_count) + .step_by(self.limits.max_chunks_per_request) + .map(|chunk_start| { + let semaphore = Arc::clone(&chunk_semaphore); + let file_metadata = file_metadata.clone(); + let bucket = bucket.clone(); + let file_transfer = file_transfer.clone(); + let file_storage = Arc::clone(&file_storage); + let file_key = file_key; + let manager = manager.clone(); + + tokio::spawn(async move { + // Acquire semaphore permit for this chunk batch + let _permit = semaphore + .acquire() + .await + .map_err(|e| anyhow!("Failed to acquire chunk semaphore: {:?}", e))?; + + // Create chunk batch + let chunk_batch = manager.create_chunk_batch(chunk_start, chunks_count); + + // Get peers to try for this download + let mut peers = manager + .peer_manager + .select_peers( + manager.limits.best_peers_to_select, + manager.limits.random_peers_to_select, + &file_key, + ) + .await; + + // Shuffle peers for randomization using a thread-safe RNG + let mut rng = StdRng::from_entropy(); + peers.shuffle(&mut rng); + + // Try each selected peer + for peer_id in peers { + let download_result = { + let mut file_storage_guard = file_storage.write().await; + manager + .try_download_chunk_batch( + peer_id, + file_key, + &file_metadata, + &chunk_batch, + &bucket, + &file_transfer, + &mut *file_storage_guard, + ) + .await + }; + + match download_result { + Ok(_) => return Ok(()), + Err(e) => { + warn!( + target: LOG_TARGET, + "Failed to download chunk batch from peer {:?}: {:?}", + peer_id, + e + ); + // Try next peer + continue; + } + } + } + + // All peers failed + Err(anyhow!( + "Failed to download chunk batch starting at {} after trying all peers", + chunk_start + )) + }) + }) + .collect(); + + // Wait for all chunk tasks to complete + let results = join_all(chunk_tasks).await; + + // Process results and check for errors + let mut errors = Vec::new(); + for (i, result) in results.into_iter().enumerate() { + match result { + Ok(Ok(_)) => {} + Ok(Err(e)) => { + errors.push(format!("Chunk task {} failed: {:?}", i, e)); + } + Err(e) => { + errors.push(format!("Chunk task {} panicked: {:?}", i, e)); + } + } + } + + if !errors.is_empty() { + Err(anyhow!( + "Failed to download file {:?}: {}", + file_key, + errors.join(", ") + )) + } else { + info!( + target: LOG_TARGET, + "Successfully downloaded all chunks for file {:?}", file_key + ); + Ok(()) } } } -impl Clone for FileDownloadLimits { +impl Clone for FileDownloadManager { fn clone(&self) -> Self { Self { - max_concurrent_file_downloads: self.max_concurrent_file_downloads, - max_concurrent_chunks_per_file: self.max_concurrent_chunks_per_file, - max_chunks_per_request: self.max_chunks_per_request, - chunk_request_peer_retry_attempts: self.chunk_request_peer_retry_attempts, - download_retry_attempts: self.download_retry_attempts, + limits: self.limits.clone(), + file_semaphore: Arc::clone(&self.file_semaphore), + peer_manager: Arc::clone(&self.peer_manager), } } } diff --git a/node/src/services/handler.rs b/node/src/services/handler.rs index 8c375a85d..e817a5ab0 100644 --- a/node/src/services/handler.rs +++ b/node/src/services/handler.rs @@ -118,6 +118,9 @@ where indexer_db_pool: Option, peer_manager: Arc, ) -> Self { + // Create a FileDownloadManager with the peer manager already initialized + let file_download_manager = Arc::new(FileDownloadManager::new(Arc::clone(&peer_manager))); + Self { task_spawner, file_transfer, @@ -127,7 +130,7 @@ where provider_config, indexer_db_pool, peer_manager, - file_download_manager: Arc::new(FileDownloadManager::new()), + file_download_manager, } } } diff --git a/node/src/tasks/msp_move_bucket.rs b/node/src/tasks/msp_move_bucket.rs index 099f513a0..56da59d13 100644 --- a/node/src/tasks/msp_move_bucket.rs +++ b/node/src/tasks/msp_move_bucket.rs @@ -1,14 +1,11 @@ use anyhow::anyhow; -use codec::Decode; use futures::future::join_all; -use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use rand::{rngs::StdRng, SeedableRng}; use std::{ - collections::HashSet, sync::{Arc, Mutex}, time::Duration, }; -use sc_network::PeerId; use sc_tracing::tracing::*; use sp_core::H256; @@ -21,61 +18,25 @@ use shc_blockchain_service::{ types::RetryStrategy, }; use shc_common::types::{ - BucketId, FileKeyProof, FileMetadata, HashT, ProviderId, StorageProofsMerkleTrieLayout, - StorageProviderId, + BucketId, HashT, ProviderId, StorageProofsMerkleTrieLayout, StorageProviderId, }; use shc_file_manager::traits::FileStorage; -use shc_file_transfer_service::{ - commands::FileTransferServiceInterface, schema::v1::provider::RemoteDownloadDataResponse, -}; use shc_forest_manager::traits::{ForestStorage, ForestStorageHandler}; -use shp_constants::FILE_CHUNK_SIZE; -use shp_file_metadata::{Chunk, ChunkId, Leaf as ProvenLeaf}; use crate::services::{ - bsp_peer_manager::BspPeerManager, handler::StorageHubHandler, types::{MspForestStorageHandlerT, ShNodeType}, }; +// Constants +const LOG_TARGET: &str = "storage-hub::msp-move-bucket"; lazy_static::lazy_static! { + // A global RNG available for peer selection static ref GLOBAL_RNG: Mutex = Mutex::new(StdRng::from_entropy()); } -const LOG_TARGET: &str = "msp-move-bucket-task"; - -/// [`MspRespondMoveBucketTask`] handles bucket move requests between MSPs. -/// -/// # Event Handling -/// This task handles both: -/// - [`MoveBucketRequestedForMsp`] event which is emitted when a user requests to move their bucket -/// - [`StartMovedBucketDownload`] event which is emitted when a bucket move is confirmed -/// -/// # Lifecycle -/// 1. When a move bucket request is received: -/// - Verifies that indexer is enabled and accessible -/// - Checks if there is sufficient storage capacity via [`MspMoveBucketTask::check_and_increase_capacity`] -/// - Validates that all files in the bucket can be handled -/// - Inserts file metadata into local storage and forest storage -/// - Verifies BSP peer IDs are available for each file -/// -/// 2. If all validations pass: -/// - Accepts the move request via [`MspMoveBucketTask::accept_bucket_move`] -/// - Downloads all files in parallel using [`MspMoveBucketTask::download_file`] with controlled concurrency -/// - Updates local forest root to match on-chain state -/// -/// 3. If any validation fails: -/// - Rejects the move request via [`MspMoveBucketTask::reject_bucket_move`] -/// - Cleans up any partially inserted file metadata -/// - Removes any created forest storage -/// -/// # Error Handling -/// The task will reject the bucket move request if: -/// - Indexer is disabled or inaccessible -/// - Insufficient storage capacity and unable to increase -/// - File metadata insertion fails -/// - BSP peer IDs are unavailable -/// - Database connection issues occur +/// Handles requests for MSP (Main Storage Provider) to respond to bucket move requests. +/// Downloads bucket files from BSPs (Backup Storage Providers). pub struct MspRespondMoveBucketTask where NT: ShNodeType + 'static, @@ -92,9 +53,9 @@ where NT::FSH: MspForestStorageHandlerT, { fn clone(&self) -> MspRespondMoveBucketTask { - Self { + MspRespondMoveBucketTask { storage_hub_handler: self.storage_hub_handler.clone(), - pending_bucket_id: self.pending_bucket_id.clone(), + pending_bucket_id: self.pending_bucket_id, file_storage_inserted_file_keys: self.file_storage_inserted_file_keys.clone(), } } @@ -201,37 +162,7 @@ where .await .map_err(|e| anyhow!("Failed to acquire file semaphore: {:?}", e))?; - let file_metadata = file - .to_file_metadata(bucket_id.as_ref().to_vec()) - .map_err(|e| anyhow!("Failed to convert file to file metadata: {:?}", e))?; - let file_key = file_metadata.file_key::>(); - - // Get BSP peer IDs and register them - let bsp_peer_ids = file - .get_bsp_peer_ids( - &mut task - .storage_hub_handler - .indexer_db_pool - .as_ref() - .unwrap() - .get() - .await?, - ) - .await?; - - if bsp_peer_ids.is_empty() { - return Err(anyhow!("No BSP peer IDs found for file {:?}", file_key)); - } - - // Register BSP peers for file transfer - for &peer_id in &bsp_peer_ids { - task.storage_hub_handler - .peer_manager - .add_peer(peer_id, file_key) - .await; - } - - // Download file using existing download_file method + // Download file using the simplified download method task.download_file(&file, &bucket_id).await }) }) @@ -597,215 +528,12 @@ where Ok(()) } - /// Processes a single chunk download response - async fn process_chunk_download_response( - &self, - file_key: H256, - file_metadata: &FileMetadata, - chunk_batch: &HashSet, - peer_id: PeerId, - download_request: RemoteDownloadDataResponse, - peer_manager: &Arc, - batch_size_bytes: u64, - start_time: std::time::Instant, - ) -> Result { - let file_key_proof = FileKeyProof::decode(&mut download_request.file_key_proof.as_ref()) - .map_err(|e| anyhow!("Failed to decode file key proof: {:?}", e))?; - - // Verify fingerprint - let expected_fingerprint = file_metadata.fingerprint(); - if file_key_proof.file_metadata.fingerprint() != expected_fingerprint { - peer_manager.record_failure(peer_id).await; - return Err(anyhow!( - "Fingerprint mismatch. Expected: {:?}, got: {:?}", - expected_fingerprint, - file_key_proof.file_metadata.fingerprint() - )); - } - - let proven = file_key_proof - .proven::() - .map_err(|e| anyhow!("Failed to get proven data: {:?}", e))?; - - if proven.len() != chunk_batch.len() { - peer_manager.record_failure(peer_id).await; - return Err(anyhow!( - "Expected {} proven chunks but got {}", - chunk_batch.len(), - proven.len() - )); - } - - // Process each proven chunk - for proven_chunk in proven { - self.process_proven_chunk(file_key, file_metadata, proven_chunk) - .await?; - } - - let download_time = start_time.elapsed(); - peer_manager - .record_success(peer_id, batch_size_bytes, download_time.as_millis() as u64) - .await; - - Ok(true) - } - - /// Processes a single proven chunk - async fn process_proven_chunk( - &self, - file_key: H256, - file_metadata: &FileMetadata, - proven_chunk: ProvenLeaf, - ) -> Result<(), anyhow::Error> { - let chunk_id = proven_chunk.key; - let chunk_data = proven_chunk.data; - - // Validate chunk size - let chunk_idx = chunk_id.as_u64(); - let expected_chunk_size = file_metadata.chunk_size_at(chunk_idx); - - if chunk_data.len() != expected_chunk_size { - return Err(anyhow!( - "Invalid chunk size for chunk {}: Expected: {}, got: {}", - chunk_idx, - expected_chunk_size, - chunk_data.len() - )); - } - - self.storage_hub_handler - .file_storage - .write() - .await - .write_chunk(&file_key, &chunk_id, &chunk_data) - .map_err(|error| anyhow!("Failed to write chunk {}: {:?}", chunk_idx, error))?; - - Ok(()) - } - - /// Attempts to download a batch of chunks from a specific peer - async fn try_download_chunk_batch( - &self, - peer_id: PeerId, - file_key: H256, - file_metadata: &FileMetadata, - chunk_batch: &HashSet, - bucket: &BucketId, - peer_manager: &Arc, - batch_size_bytes: u64, - ) -> Result { - // Get retry attempts from the FileDownloadManager - let download_retry_attempts = self - .storage_hub_handler - .file_download_manager - .download_retry_attempts(); - - // Retry the download up to the configured number of times - for attempt in 0..=download_retry_attempts { - if attempt > 0 { - warn!( - target: LOG_TARGET, - "Retrying download with peer {:?} (attempt {}/{})", - peer_id, - attempt + 1, - download_retry_attempts + 1 - ); - } - - let start_time = std::time::Instant::now(); - - match self - .storage_hub_handler - .file_transfer - .download_request(peer_id, file_key.into(), chunk_batch.clone(), Some(*bucket)) - .await - { - Ok(download_request) => { - match self - .process_chunk_download_response( - file_key, - file_metadata, - chunk_batch, - peer_id, - download_request, - peer_manager, - batch_size_bytes, - start_time, - ) - .await - { - Ok(success) => return Ok(success), - Err(e) if attempt < download_retry_attempts => { - warn!( - target: LOG_TARGET, - "Download attempt {} failed for peer {:?}: {:?}", - attempt + 1, - peer_id, - e - ); - continue; - } - Err(e) => { - peer_manager.record_failure(peer_id).await; - return Err(e); - } - } - } - Err(e) if attempt < download_retry_attempts => { - warn!( - target: LOG_TARGET, - "Download attempt {} failed for peer {:?}: {:?}", - attempt + 1, - peer_id, - e - ); - continue; - } - Err(e) => { - peer_manager.record_failure(peer_id).await; - return Err(anyhow!( - "Download request failed after {} attempts to peer {:?}: {:?}", - download_retry_attempts + 1, - peer_id, - e - )); - } - } - } - - Ok(false) - } - - /// Creates a batch of chunk IDs to request together - fn create_chunk_batch( - chunk_start: u64, - chunks_count: u64, - max_chunks_per_request: usize, - ) -> HashSet { - let chunk_end = std::cmp::min(chunk_start + (max_chunks_per_request as u64), chunks_count); - (chunk_start..chunk_end).map(ChunkId::new).collect() - } - - /// Downloads a file from BSPs (Backup Storage Providers) chunk by chunk. - /// - /// # Parallelism Implementation - /// The download process uses a multi-level parallelism approach: - /// - /// 1. File-Level Parallelism: - /// - Up to [`MAX_CONCURRENT_FILE_DOWNLOADS`] files can be downloaded simultaneously - /// - Controlled by a top-level semaphore to prevent system overload + /// Downloads a file from BSPs (Backup Storage Providers). /// - /// 2. Chunk-Level Parallelism: - /// - For each file, up to [`MAX_CONCURRENT_CHUNKS_PER_FILE`] chunk batches can be downloaded in parallel - /// - Each chunk download is managed by a separate task - /// - Chunk downloads are batched ([`MAX_CHUNKS_PER_REQUEST`] chunks per request) for efficiency - /// - /// 3. Peer Selection and Retry Strategy: - /// - For each chunk batch: - /// - Selects [`CHUNK_REQUEST_PEER_RETRY_ATTEMPTS`] peers (2 best performing + remaining random) - /// - Tries each selected peer up to [`DOWNLOAD_RETRY_ATTEMPTS`] times - /// - First successful download stops the retry process - /// - Total retry attempts per chunk = [`CHUNK_REQUEST_PEER_RETRY_ATTEMPTS`] * [`DOWNLOAD_RETRY_ATTEMPTS`] + /// Uses the FileDownloadManager which implements multi-level parallelism: + /// - File-level parallelism: Multiple files can be downloaded simultaneously + /// - Chunk-level parallelism: For each file, multiple chunk batches are downloaded in parallel + /// - Peer selection and retry strategy: Selects and tries multiple peers async fn download_file( &self, file: &shc_indexer_db::models::File, @@ -815,130 +543,44 @@ where .to_file_metadata(bucket.as_ref().to_vec()) .map_err(|e| anyhow!("Failed to convert file to file metadata: {:?}", e))?; let file_key = file_metadata.file_key::>(); - let chunks_count = file_metadata.chunks_count(); info!( target: LOG_TARGET, - "MSP: downloading file {:?}", file_key, + "Starting file download for file_key: {:?}", file_key ); - // Get a new chunk semaphore from the download manager - let chunk_semaphore = self - .storage_hub_handler - .file_download_manager - .new_chunk_semaphore(); - let peer_manager = Arc::clone(&self.storage_hub_handler.peer_manager); - let download_manager = self.storage_hub_handler.file_download_manager.clone(); - - let chunk_tasks: Vec<_> = (0..chunks_count) - .step_by(download_manager.max_chunks_per_request()) - .map(|chunk_start| { - let semaphore = Arc::clone(&chunk_semaphore); - let task = self.clone(); - let file_metadata = file_metadata.clone(); - let bucket = *bucket; - let peer_manager = Arc::clone(&peer_manager); - - tokio::spawn(async move { - let _permit = semaphore - .acquire() - .await - .map_err(|e| anyhow!("Failed to acquire chunk semaphore: {:?}", e))?; - - let chunk_batch = Self::create_chunk_batch( - chunk_start, - chunks_count, - task.storage_hub_handler - .file_download_manager - .max_chunks_per_request(), - ); - let batch_size_bytes = chunk_batch.len() as u64 * FILE_CHUNK_SIZE as u64; - - // Get the best performing peers for this request and shuffle them - let peer_retry_attempts = task - .storage_hub_handler - .file_download_manager - .chunk_request_peer_retry_attempts(); - let mut peers = peer_manager - .select_peers(2, peer_retry_attempts - 2, &file_key) - .await; - peers.shuffle(&mut *GLOBAL_RNG.lock().unwrap()); - - // Try each selected peer - for peer_id in peers { - match task - .try_download_chunk_batch( - peer_id, - file_key, - &file_metadata, - &chunk_batch, - &bucket, - &peer_manager, - batch_size_bytes, - ) - .await - { - Ok(true) => return Ok(()), - Ok(false) | Err(_) => continue, - } - } - - Err(anyhow!( - "Failed to download chunk {} after all retries", - chunk_start - )) - }) - }) - .collect(); - - // Wait for all downloads to complete and collect results - let results = join_all(chunk_tasks).await; + // Register BSP peers with the peer manager for this file + let bsp_peer_ids = file + .get_bsp_peer_ids( + &mut self + .storage_hub_handler + .indexer_db_pool + .as_ref() + .unwrap() + .get() + .await?, + ) + .await?; - // Process results and count failures - let mut failed_downloads = 0; - for result in results { - match result { - Ok(download_result) => { - if let Err(e) = download_result { - error!( - target: LOG_TARGET, - "File download chunk task failed: {:?}", e - ); - failed_downloads += 1; - } - } - Err(e) => { - error!( - target: LOG_TARGET, - "File download chunk task panicked: {:?}", e - ); - failed_downloads += 1; - } - } + for &peer_id in &bsp_peer_ids { + self.storage_hub_handler + .peer_manager + .add_peer(peer_id, file_key) + .await; } - // Log summary of download results - if failed_downloads > 0 { - error!( - target: LOG_TARGET, - "Failed to download {}/{} chunks for file {:?}", - failed_downloads, - chunks_count, - file_key - ); - return Err(anyhow!( - "Failed to download all chunks for file {:?}", - file_key - )); - } else { - info!( - target: LOG_TARGET, - "Successfully downloaded {} chunks for file {:?}", - chunks_count, - file_key - ); - } + // Get the file storage reference + let file_storage = self.storage_hub_handler.file_storage.clone(); - Ok(()) + // Use the simplified FileDownloadManager interface + self.storage_hub_handler + .file_download_manager + .download_file( + file_metadata, + *bucket, + self.storage_hub_handler.file_transfer.clone(), + file_storage, + ) + .await } } From 9aee4e4bcdf89904696bfb6e30de2734873513d9 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Thu, 6 Mar 2025 02:04:17 +0200 Subject: [PATCH 07/11] remove .only --- test/suites/integration/msp/move-bucket.test.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/suites/integration/msp/move-bucket.test.ts b/test/suites/integration/msp/move-bucket.test.ts index 73b4440da..81452a3a1 100644 --- a/test/suites/integration/msp/move-bucket.test.ts +++ b/test/suites/integration/msp/move-bucket.test.ts @@ -42,7 +42,7 @@ describeMspNet( } }); - it.only("postgres DB is ready", async () => { + it("postgres DB is ready", async () => { await userApi.docker.waitForLog({ containerName: "docker-sh-postgres-1", searchString: "database system is ready to accept connections", @@ -50,7 +50,7 @@ describeMspNet( }); }); - it.only("Network launches and can be queried", async () => { + it("Network launches and can be queried", async () => { const userNodePeerId = await userApi.rpc.system.localPeerId(); strictEqual(userNodePeerId.toString(), userApi.shConsts.NODE_INFOS.user.expectedPeerId); @@ -58,7 +58,7 @@ describeMspNet( strictEqual(mspNodePeerId.toString(), userApi.shConsts.NODE_INFOS.msp1.expectedPeerId); }); - it.only("Add 2 more BSPs (3 total) and set the replication target to 2", async () => { + it("Add 2 more BSPs (3 total) and set the replication target to 2", async () => { // Replicate to 2 BSPs, 5 blocks to maxthreshold const maxReplicationTargetRuntimeParameter = { RuntimeConfig: { @@ -104,7 +104,7 @@ describeMspNet( }); }); - it.only("User submits 3 storage requests in the same bucket for first MSP", async () => { + it("User submits 3 storage requests in the same bucket for first MSP", async () => { // Get value propositions form the MSP to use, and use the first one (can be any). const valueProps = await userApi.call.storageProvidersApi.queryValuePropositionsForMsp( userApi.shConsts.DUMMY_MSP_ID @@ -150,7 +150,7 @@ describeMspNet( await userApi.block.seal({ calls: txs, signer: shUser }); }); - it.only("MSP 1 receives files from user and accepts them", async () => { + it("MSP 1 receives files from user and accepts them", async () => { const originalRoot = await msp1Api.rpc.storagehubclient.getForestRoot(bucketId); // Get the events of the storage requests to extract the file keys and check From 718a945b231ac5afd61648c1147c33a8fbbfa873 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Thu, 6 Mar 2025 13:09:31 +0200 Subject: [PATCH 08/11] fix lint --- test/suites/integration/msp/move-bucket.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/suites/integration/msp/move-bucket.test.ts b/test/suites/integration/msp/move-bucket.test.ts index 81452a3a1..1edf31e83 100644 --- a/test/suites/integration/msp/move-bucket.test.ts +++ b/test/suites/integration/msp/move-bucket.test.ts @@ -307,7 +307,7 @@ describeMspNet( } }); - it.only("User moves bucket to second MSP", async () => { + it("User moves bucket to second MSP", async () => { // Get the value propositions of the second MSP to use, and use the first one (can be any). const valueProps = await userApi.call.storageProvidersApi.queryValuePropositionsForMsp( userApi.shConsts.DUMMY_MSP_ID_2 From e427e673190495ae4b7cec17cb35d8ce836e2c62 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Fri, 7 Mar 2025 01:30:54 +0200 Subject: [PATCH 09/11] add download persistent state to be used by FileDownloadManager --- node/src/services/download_state_store.rs | 262 +++++++++++++++++++++ node/src/services/file_download_manager.rs | 135 ++++++++--- node/src/services/handler.rs | 9 +- node/src/services/mod.rs | 1 + primitives/file-metadata/src/lib.rs | 6 + 5 files changed, 376 insertions(+), 37 deletions(-) create mode 100644 node/src/services/download_state_store.rs diff --git a/node/src/services/download_state_store.rs b/node/src/services/download_state_store.rs new file mode 100644 index 000000000..8238fd972 --- /dev/null +++ b/node/src/services/download_state_store.rs @@ -0,0 +1,262 @@ +use anyhow::Result; +use log::*; +use rocksdb::{ColumnFamilyDescriptor, Options, DB}; +use std::path::PathBuf; + +use shc_common::{ + typed_store::{ + BufferedWriteSupport, CFRangeMapAPI, CompositeKey, ProvidesDbContext, + ProvidesTypedDbAccess, ScaleDbCodec, ScaleEncodedCf, TypedCf, TypedDbContext, TypedRocksDB, + }, + types::FileMetadata, +}; +use shp_file_metadata::ChunkId; + +use sp_core::H256; + +const LOG_TARGET: &str = "download_state_store"; + +// Column family definitions +/// Column family that tracks missing chunks for files being downloaded. +/// +/// This CF implements a range map pattern where: +/// - The primary key is a file key (H256 hash) +/// - The values are ChunkIds representing chunks that still need to be downloaded +/// +/// When a file download starts, all chunks are added to this CF. +/// As chunks are successfully downloaded, they are removed from the CF. +/// When all chunks for a file are downloaded, no entries with that file key remain. +pub struct MissingChunksCf; + +impl Default for MissingChunksCf { + fn default() -> Self { + Self + } +} + +impl ScaleEncodedCf for MissingChunksCf { + type Key = H256; // File key + type Value = ChunkId; // Chunk ID + + const SCALE_ENCODED_NAME: &'static str = "missing_chunks"; +} + +/// A separate column family for the composite key implementation +pub struct MissingChunksCompositeCf; + +impl Default for MissingChunksCompositeCf { + fn default() -> Self { + Self + } +} + +impl TypedCf for MissingChunksCompositeCf { + type Key = CompositeKey; + type Value = (); + type KeyCodec = ScaleDbCodec; + type ValueCodec = ScaleDbCodec; + + const NAME: &'static str = "missing_chunks"; +} + +/// Column family that stores file metadata for files being downloaded. +/// +/// This CF uses a simple key-value structure where: +/// - The key is a file key (H256 hash) +/// - The value is the complete FileMetadata for that file +/// +/// File metadata is stored when a download begins and is used to validate +/// downloaded chunks and provide information about the file (size, owner, etc.). +/// It is removed when a download is completed or cancelled. +pub struct FileMetadataCf; + +impl Default for FileMetadataCf { + fn default() -> Self { + Self + } +} + +impl ScaleEncodedCf for FileMetadataCf { + type Key = H256; // File key + type Value = FileMetadata; // Original file metadata + + const SCALE_ENCODED_NAME: &'static str = "file_metadata"; +} + +// List of all column families used by the download state store +const ALL_COLUMN_FAMILIES: &[&str] = &[ + MissingChunksCompositeCf::NAME, + FileMetadataCf::SCALE_ENCODED_NAME, +]; + +/// Persistent store for file download state using RocksDB. +/// +/// This store manages two main pieces of download state: +/// 1. Missing chunks for each file (which chunks still need to be downloaded) +/// 2. File metadata for each file being downloaded +/// +/// The store uses separate column families to store different types of data +/// and provides a context-based API for reading and writing data. +pub struct DownloadStateStore { + /// The RocksDB database + rocks: TypedRocksDB, +} + +impl DownloadStateStore { + pub fn new(root_path: PathBuf) -> Result { + let mut path = root_path; + path.push("storagehub/download_state/"); + + let db_path_str = path.to_str().expect("Failed to convert path to string"); + info!(target: LOG_TARGET, "Download state store path: {}", db_path_str); + std::fs::create_dir_all(&db_path_str).expect("Failed to create directory"); + + let mut db_opts = Options::default(); + db_opts.create_if_missing(true); + db_opts.create_missing_column_families(true); + + let column_families: Vec = ALL_COLUMN_FAMILIES + .iter() + .map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), Options::default())) + .collect(); + + let db = DB::open_cf_descriptors(&db_opts, db_path_str, column_families)?; + + Ok(DownloadStateStore { + rocks: TypedRocksDB { db }, + }) + } + + /// Starts a read/write interaction with the DB + pub fn open_rw_context(&self) -> DownloadStateStoreRwContext<'_> { + DownloadStateStoreRwContext::new(TypedDbContext::new( + &self.rocks, + BufferedWriteSupport::new(&self.rocks), + )) + } +} + +/// Read/write transaction context for interacting with the download state store. +/// +/// This context manages a transaction with the underlying RocksDB database and +/// provides methods to access the different components of the download state: +/// - Missing chunks map for tracking which chunks need to be downloaded +/// - File metadata for storing and retrieving metadata about files being downloaded +/// +/// Changes are not persisted until the `commit()` method is called, which flushes +/// all pending changes to the database. +pub struct DownloadStateStoreRwContext<'a> { + /// The RocksDB database context + db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, +} + +impl<'a> DownloadStateStoreRwContext<'a> { + pub fn new( + db_context: TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, + ) -> Self { + Self { db_context } + } + + pub fn missing_chunks_map(&'a self) -> MissingChunksMap<'a> { + MissingChunksMap { + db_context: &self.db_context, + } + } + + pub fn commit(self) { + self.db_context.flush(); + } +} + +impl<'a> ProvidesDbContext for DownloadStateStoreRwContext<'a> { + fn db_context(&self) -> &TypedDbContext> { + &self.db_context + } +} + +impl<'a> ProvidesTypedDbAccess for DownloadStateStoreRwContext<'a> {} + +/// Map-like interface for tracking missing chunks per file. +/// +/// This structure provides methods to: +/// - Initialize a file's missing chunks +/// - Mark chunks as downloaded +/// - Check if a file download is complete +/// - Get a list of missing chunks for a file +/// +/// It uses the MissingChunksCf column family as its backing storage. +pub struct MissingChunksMap<'a> { + db_context: &'a TypedDbContext<'a, TypedRocksDB, BufferedWriteSupport<'a, TypedRocksDB>>, +} + +impl<'a> ProvidesDbContext for MissingChunksMap<'a> { + fn db_context(&self) -> &TypedDbContext> { + self.db_context + } +} + +impl<'a> ProvidesTypedDbAccess for MissingChunksMap<'a> {} + +impl<'a> CFRangeMapAPI for MissingChunksMap<'a> { + type Key = H256; // File key + type Value = ChunkId; // Chunk ID + type MapCF = MissingChunksCompositeCf; +} + +impl<'a> MissingChunksMap<'a> { + // Initialize missing chunks for a file + pub fn initialize_file(&self, metadata: &FileMetadata) { + let file_key = metadata.file_key::(); + let file_key = H256::from_slice(file_key.as_ref()); + let chunks_count = metadata.chunks_count(); + + // Remove any existing chunks first (clean state) + self.remove_key(&file_key); + + // Add all chunks as missing + for chunk_id in 0..chunks_count { + self.insert(&file_key, ChunkId::new(chunk_id)); + } + + // Commit changes + self.db_context().flush(); + } + + // Mark a chunk as successfully downloaded (remove from missing) + pub fn mark_chunk_downloaded(&self, file_key: &H256, chunk_id: ChunkId) -> bool { + let result = self.remove(file_key, &chunk_id); + self.db_context().flush(); + result + } + + // Check if a file download is complete (no missing chunks) + pub fn is_file_complete(&self, file_key: &H256) -> bool { + !self.contains_key(file_key) + } + + // Get all missing chunks for a file + pub fn get_missing_chunks(&self, file_key: &H256) -> Vec { + self.values_for_key(file_key) + } +} + +// Methods to store and retrieve file metadata +impl<'a> DownloadStateStoreRwContext<'a> { + pub fn store_file_metadata(&self, file_key: &H256, metadata: &FileMetadata) { + self.db_context + .cf(&FileMetadataCf::default()) + .put(file_key, metadata); + self.db_context.flush(); + } + + pub fn get_file_metadata(&self, file_key: &H256) -> Option { + self.db_context.cf(&FileMetadataCf::default()).get(file_key) + } + + pub fn delete_file_metadata(&self, file_key: &H256) { + self.db_context + .cf(&FileMetadataCf::default()) + .delete(file_key); + self.db_context.flush(); + } +} diff --git a/node/src/services/file_download_manager.rs b/node/src/services/file_download_manager.rs index a009d633d..85c57b659 100644 --- a/node/src/services/file_download_manager.rs +++ b/node/src/services/file_download_manager.rs @@ -1,24 +1,24 @@ use anyhow::{anyhow, Result}; -use codec::Decode; use futures::future::join_all; use log::*; use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use std::{collections::HashSet, path::PathBuf, sync::Arc, time::Duration}; +use tokio::sync::{RwLock, Semaphore}; + +use codec::Decode; use sc_network::PeerId; +use sp_core::H256; + use shc_common::types::{ - BucketId, Chunk, ChunkId, FileKeyProof, FileMetadata, Fingerprint, HashT, Proven, - StorageProofsMerkleTrieLayout, + BucketId, FileKeyProof, FileMetadata, Fingerprint, HashT, Proven, StorageProofsMerkleTrieLayout, }; use shc_file_manager::traits::FileStorage; use shc_file_transfer_service::{ commands::FileTransferServiceInterface, schema::v1::provider::RemoteDownloadDataResponse, }; -use sp_core::H256; -use std::collections::HashSet; -use std::sync::Arc; -use std::time::Duration; -use tokio::sync::{RwLock, Semaphore}; +use shp_file_metadata::{Chunk, ChunkId}; -use super::bsp_peer_manager::BspPeerManager; +use crate::services::{bsp_peer_manager::BspPeerManager, download_state_store::DownloadStateStore}; const LOG_TARGET: &str = "file_download_manager"; @@ -103,6 +103,8 @@ pub struct FileDownloadManager { file_semaphore: Arc, /// BSP peer manager for tracking and selecting peers peer_manager: Arc, + /// Download state store for persistence + download_state_store: Arc, } impl FileDownloadManager { @@ -110,8 +112,8 @@ impl FileDownloadManager { /// /// # Arguments /// * `peer_manager` - The peer manager to use for peer selection and tracking - pub fn new(peer_manager: Arc) -> Self { - Self::with_limits(FileDownloadLimits::default(), peer_manager) + pub fn new(peer_manager: Arc, data_dir: PathBuf) -> Result { + Self::with_limits(FileDownloadLimits::default(), peer_manager, data_dir) } /// Create a new FileDownloadManager with specified limits @@ -119,14 +121,20 @@ impl FileDownloadManager { /// # Arguments /// * `limits` - The download limits to use /// * `peer_manager` - The peer manager to use for peer selection and tracking - pub fn with_limits(limits: FileDownloadLimits, peer_manager: Arc) -> Self { - let file_semaphore = Arc::new(Semaphore::new(limits.max_concurrent_file_downloads)); - - Self { + pub fn with_limits( + limits: FileDownloadLimits, + peer_manager: Arc, + data_dir: PathBuf, + ) -> Result { + // Create a new download state store + let download_state_store = Arc::new(DownloadStateStore::new(data_dir)?); + + Ok(Self { + file_semaphore: Arc::new(Semaphore::new(limits.max_concurrent_file_downloads)), limits, - file_semaphore, peer_manager, - } + download_state_store, + }) } /// Get a reference to the file semaphore for file-level parallelism @@ -139,15 +147,6 @@ impl FileDownloadManager { Arc::new(Semaphore::new(self.limits.max_concurrent_chunks_per_file)) } - /// Create a batch of chunks to download - pub fn create_chunk_batch(&self, chunk_start: u64, chunks_count: u64) -> HashSet { - let chunk_end = std::cmp::min( - chunk_start + (self.limits.max_chunks_per_request as u64), - chunks_count, - ); - (chunk_start..chunk_end).map(ChunkId::new).collect() - } - /// Process a single proven chunk, writing it to file storage async fn process_proven_chunk( &self, @@ -170,6 +169,13 @@ impl FileDownloadManager { .write_chunk(&file_key, &chunk_id, &chunk_data) .map_err(|error| anyhow!("Failed to write chunk {}: {:?}", chunk_idx, error))?; + // Mark chunk as downloaded in persistent state + let context = self.download_state_store.open_rw_context(); + context + .missing_chunks_map() + .mark_chunk_downloaded(&file_key, chunk_id); + context.commit(); + Ok(()) } unexpected => { @@ -412,14 +418,60 @@ impl FileDownloadManager { "Downloading file {:?} with {} chunks", file_key, chunks_count ); + // Check if we already have state for this file + let context = self.download_state_store.open_rw_context(); + let missing_chunks = { + let existing_metadata = context.get_file_metadata(&file_key); + + if let Some(_existing_metadata) = existing_metadata { + info!( + target: LOG_TARGET, + "Resuming download of file {:?} with {} chunks", file_key, chunks_count + ); + // We already have state for this file, use it + } else { + // New file download, initialize state + info!( + target: LOG_TARGET, + "Starting new download of file {:?} with {} chunks", file_key, chunks_count + ); + context.store_file_metadata(&file_key, &file_metadata); + context.missing_chunks_map().initialize_file(&file_metadata); + } + + // Get missing chunks from the store in both cases + context.missing_chunks_map().get_missing_chunks(&file_key) + }; + context.commit(); + + if missing_chunks.is_empty() { + info!( + target: LOG_TARGET, + "File {:?} is already fully downloaded", file_key + ); + return Ok(()); + } + + info!( + target: LOG_TARGET, + "File {:?} has {} missing chunks to download", + file_key, + missing_chunks.len() + ); + // Create a new chunk semaphore for this file let chunk_semaphore = self.new_chunk_semaphore(); let manager = self.clone(); - // Create tasks for chunk batches - let chunk_tasks: Vec<_> = (0..chunks_count) - .step_by(self.limits.max_chunks_per_request) - .map(|chunk_start| { + // Group missing chunks into batches + let max_chunks_per_request = self.limits.max_chunks_per_request as u64; + let mut missing_chunks_sorted = missing_chunks; + missing_chunks_sorted.sort(); + + // Create tasks for missing chunk batches + let chunk_tasks: Vec<_> = missing_chunks_sorted + .chunks(max_chunks_per_request as usize) + .map(|chunk_ids| { let semaphore = Arc::clone(&chunk_semaphore); let file_metadata = file_metadata.clone(); let bucket = bucket.clone(); @@ -427,6 +479,7 @@ impl FileDownloadManager { let file_storage = Arc::clone(&file_storage); let file_key = file_key; let manager = manager.clone(); + let chunk_batch: HashSet = chunk_ids.iter().copied().collect(); tokio::spawn(async move { // Acquire semaphore permit for this chunk batch @@ -435,9 +488,6 @@ impl FileDownloadManager { .await .map_err(|e| anyhow!("Failed to acquire chunk semaphore: {:?}", e))?; - // Create chunk batch - let chunk_batch = manager.create_chunk_batch(chunk_start, chunks_count); - // Get peers to try for this download let mut peers = manager .peer_manager @@ -486,8 +536,7 @@ impl FileDownloadManager { // All peers failed Err(anyhow!( - "Failed to download chunk batch starting at {} after trying all peers", - chunk_start + "Failed to download chunk batch after trying all peers" )) }) }) @@ -496,6 +545,19 @@ impl FileDownloadManager { // Wait for all chunk tasks to complete let results = join_all(chunk_tasks).await; + // Check if download is complete + let is_complete = { + let context = self.download_state_store.open_rw_context(); + let is_complete = context.missing_chunks_map().is_file_complete(&file_key); + + if is_complete { + // Clean up metadata if download is complete + context.delete_file_metadata(&file_key); + } + + is_complete + }; + // Process results and check for errors let mut errors = Vec::new(); for (i, result) in results.into_iter().enumerate() { @@ -510,7 +572,7 @@ impl FileDownloadManager { } } - if !errors.is_empty() { + if !errors.is_empty() && !is_complete { Err(anyhow!( "Failed to download file {:?}: {}", file_key, @@ -532,6 +594,7 @@ impl Clone for FileDownloadManager { limits: self.limits.clone(), file_semaphore: Arc::clone(&self.file_semaphore), peer_manager: Arc::clone(&self.peer_manager), + download_state_store: Arc::clone(&self.download_state_store), } } } diff --git a/node/src/services/handler.rs b/node/src/services/handler.rs index e817a5ab0..2fd1d7533 100644 --- a/node/src/services/handler.rs +++ b/node/src/services/handler.rs @@ -118,8 +118,15 @@ where indexer_db_pool: Option, peer_manager: Arc, ) -> Self { + // Get the data directory path from the peer manager's directory + // This assumes the peer manager stores data in a similar location to where we want our download state + let data_dir = std::env::temp_dir().join("storagehub"); + // Create a FileDownloadManager with the peer manager already initialized - let file_download_manager = Arc::new(FileDownloadManager::new(Arc::clone(&peer_manager))); + let file_download_manager = Arc::new( + FileDownloadManager::new(Arc::clone(&peer_manager), data_dir) + .expect("Failed to initialize FileDownloadManager"), + ); Self { task_spawner, diff --git a/node/src/services/mod.rs b/node/src/services/mod.rs index 4ed8f4790..dc32bc66e 100644 --- a/node/src/services/mod.rs +++ b/node/src/services/mod.rs @@ -1,5 +1,6 @@ pub mod bsp_peer_manager; pub mod builder; +pub mod download_state_store; pub mod file_download_manager; pub mod forest_storage; pub mod handler; diff --git a/primitives/file-metadata/src/lib.rs b/primitives/file-metadata/src/lib.rs index d8367ae00..4d6522a60 100644 --- a/primitives/file-metadata/src/lib.rs +++ b/primitives/file-metadata/src/lib.rs @@ -354,6 +354,12 @@ impl PartialEq<[u8]> for Fingerprint { #[derive(Debug, Clone, Copy, PartialEq, Eq, TypeInfo, Encode, Decode, Ord, PartialOrd, Hash)] pub struct ChunkId(u64); +impl Default for ChunkId { + fn default() -> Self { + Self(0) + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ChunkIdError { InvalidChunkId, From 2db6e973c55148c369808a77f0afcbdb23269e26 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Fri, 7 Mar 2025 16:08:47 +0200 Subject: [PATCH 10/11] add resume/retry move bucket task --- client/file-transfer-service/src/events.rs | 16 ++ client/file-transfer-service/src/handler.rs | 37 ++- node/src/services/download_state_store.rs | 87 +++++- node/src/services/file_download_manager.rs | 96 +++++++ node/src/services/handler.rs | 20 +- node/src/tasks/mod.rs | 1 + node/src/tasks/msp_move_bucket.rs | 291 +++++++------------- node/src/tasks/retry_bucket_move.rs | 259 +++++++++++++++++ 8 files changed, 604 insertions(+), 203 deletions(-) create mode 100644 node/src/tasks/retry_bucket_move.rs diff --git a/client/file-transfer-service/src/events.rs b/client/file-transfer-service/src/events.rs index 7e4270ba6..3546d9e84 100644 --- a/client/file-transfer-service/src/events.rs +++ b/client/file-transfer-service/src/events.rs @@ -44,10 +44,19 @@ pub struct RemoteDownloadRequest { impl EventBusMessage for RemoteDownloadRequest {} +/// Event triggered to retry pending bucket move downloads. +/// This is emitted on startup and will be periodically emitted later to ensure +/// any interrupted downloads can be resumed. +#[derive(Debug, Clone)] +pub struct RetryBucketMoveDownload; + +impl EventBusMessage for RetryBucketMoveDownload {} + #[derive(Clone, Default)] pub struct FileTransferServiceEventBusProvider { remote_upload_request_event_bus: EventBus, remote_download_request_event_bus: EventBus, + retry_bucket_move_download_event_bus: EventBus, } impl FileTransferServiceEventBusProvider { @@ -55,6 +64,7 @@ impl FileTransferServiceEventBusProvider { Self { remote_upload_request_event_bus: EventBus::new(), remote_download_request_event_bus: EventBus::new(), + retry_bucket_move_download_event_bus: EventBus::new(), } } } @@ -70,3 +80,9 @@ impl ProvidesEventBus for FileTransferServiceEventBusProv &self.remote_download_request_event_bus } } + +impl ProvidesEventBus for FileTransferServiceEventBusProvider { + fn event_bus(&self) -> &EventBus { + &self.retry_bucket_move_download_event_bus + } +} diff --git a/client/file-transfer-service/src/handler.rs b/client/file-transfer-service/src/handler.rs index a06f3427c..8dc84f0b4 100644 --- a/client/file-transfer-service/src/handler.rs +++ b/client/file-transfer-service/src/handler.rs @@ -46,7 +46,7 @@ use shc_common::types::{ }; use shp_file_metadata::ChunkId; -use crate::events::RemoteUploadRequest; +use crate::events::{RemoteUploadRequest, RetryBucketMoveDownload}; use super::{ commands::{FileTransferServiceCommand, RequestError}, @@ -56,6 +56,9 @@ use super::{ const LOG_TARGET: &str = "file-transfer-service"; +/// Time interval between bucket move download retry attempts (in seconds) +const BUCKET_MOVE_RETRY_INTERVAL_SECONDS: u64 = 3 * 60 * 60; // 3 hours + #[derive(Eq)] pub struct BucketIdWithExpiration { bucket_id: BucketId, @@ -122,6 +125,8 @@ pub struct FileTransferService { HashMap>, /// Counter for generating unique upload request IDs upload_pending_response_nonce: UploadRequestId, + /// Timestamp of the last bucket move retry check + last_bucket_move_retry: Option>, } impl Actor for FileTransferService { @@ -517,6 +522,9 @@ impl ActorEventLoop for FileTransferServiceEventLoop { Some(MergedEventLoopMessage::Tick) => { // Handle expired buckets self.actor.handle_expired_buckets(); + + // Check for pending bucket move downloads to retry + self.actor.handle_retry_bucket_move(); } None => { warn!(target: LOG_TARGET, "FileTransferService event loop terminated."); @@ -548,6 +556,7 @@ impl FileTransferService { download_pending_response_nonce: DownloadRequestId::new(0), upload_pending_responses: HashMap::new(), upload_pending_response_nonce: UploadRequestId::new(0), + last_bucket_move_retry: None, } } @@ -788,4 +797,30 @@ impl FileTransferService { bucket_to_check = self.bucket_allow_list_grace_period_time.first(); } } + + fn handle_retry_bucket_move(&mut self) { + let now = chrono::Utc::now(); + + // Check if it's time to retry bucket move downloads + let should_retry = match self.last_bucket_move_retry { + // If we've never retried or enough time has passed + None => true, + Some(last_retry) => { + let duration_since_last_retry = now.signed_duration_since(last_retry); + duration_since_last_retry.num_seconds() as u64 >= BUCKET_MOVE_RETRY_INTERVAL_SECONDS + } + }; + + if should_retry { + // Update the last retry timestamp + self.last_bucket_move_retry = Some(now); + + // Emit the retry event + info!( + target: LOG_TARGET, + "Emitting RetryBucketMoveDownload event to check for pending bucket downloads" + ); + self.emit(RetryBucketMoveDownload); + } + } } diff --git a/node/src/services/download_state_store.rs b/node/src/services/download_state_store.rs index 8238fd972..717b69f19 100644 --- a/node/src/services/download_state_store.rs +++ b/node/src/services/download_state_store.rs @@ -8,7 +8,7 @@ use shc_common::{ BufferedWriteSupport, CFRangeMapAPI, CompositeKey, ProvidesDbContext, ProvidesTypedDbAccess, ScaleDbCodec, ScaleEncodedCf, TypedCf, TypedDbContext, TypedRocksDB, }, - types::FileMetadata, + types::{BucketId, FileMetadata}, }; use shp_file_metadata::ChunkId; @@ -70,6 +70,29 @@ impl TypedCf for MissingChunksCompositeCf { /// It is removed when a download is completed or cancelled. pub struct FileMetadataCf; +/// Column family that tracks pending bucket downloads. +/// +/// This CF uses a simple key-value structure where: +/// - The key is a bucket ID (BucketId) +/// - The value is a boolean flag indicating whether the download is in progress +/// +/// This CF is used to track which buckets are being downloaded so that +/// downloads can be resumed if interrupted. +pub struct PendingBucketDownloadsCf; + +impl Default for PendingBucketDownloadsCf { + fn default() -> Self { + Self + } +} + +impl ScaleEncodedCf for PendingBucketDownloadsCf { + type Key = BucketId; // Bucket ID + type Value = bool; // Download in progress flag + + const SCALE_ENCODED_NAME: &'static str = "pending_bucket_downloads"; +} + impl Default for FileMetadataCf { fn default() -> Self { Self @@ -87,6 +110,7 @@ impl ScaleEncodedCf for FileMetadataCf { const ALL_COLUMN_FAMILIES: &[&str] = &[ MissingChunksCompositeCf::NAME, FileMetadataCf::SCALE_ENCODED_NAME, + PendingBucketDownloadsCf::SCALE_ENCODED_NAME, ]; /// Persistent store for file download state using RocksDB. @@ -166,6 +190,60 @@ impl<'a> DownloadStateStoreRwContext<'a> { pub fn commit(self) { self.db_context.flush(); } + + pub fn delete_file_metadata(&self, file_key: &H256) { + self.db_context + .cf(&FileMetadataCf::default()) + .delete(file_key); + self.db_context.flush(); + } + + // Methods to store and retrieve pending bucket downloads + pub fn mark_bucket_download_started(&self, bucket_id: &BucketId) { + self.db_context + .cf(&PendingBucketDownloadsCf::default()) + .put(bucket_id, &true); + self.db_context.flush(); + } + + pub fn mark_bucket_download_completed(&self, bucket_id: &BucketId) { + self.db_context + .cf(&PendingBucketDownloadsCf::default()) + .delete(bucket_id); + self.db_context.flush(); + } + + pub fn is_bucket_download_in_progress(&self, bucket_id: &BucketId) -> bool { + self.db_context + .cf(&PendingBucketDownloadsCf::default()) + .get(bucket_id) + .is_some() + } + + pub fn get_all_pending_bucket_downloads(&self) -> Vec { + self.db_context + .cf(&PendingBucketDownloadsCf::default()) + .iterate_with_range(..) + .map(|(bucket_id, _)| bucket_id) + .collect() + } + + /// Get all file keys that need to be downloaded for a specific bucket + pub fn get_missing_files_for_bucket(&self, bucket_id: &BucketId) -> Vec { + // If the bucket is not in progress, return empty list + if !self.is_bucket_download_in_progress(bucket_id) { + return Vec::new(); + } + + // Get all files with pending downloads for this bucket + // For now, we'll just return all files in the store since we don't track by bucket + self.missing_chunks_map() + .db_context() + .cf(&MissingChunksCf::default()) + .iterate_with_range(..) + .map(|(file_key, _)| file_key) + .collect() + } } impl<'a> ProvidesDbContext for DownloadStateStoreRwContext<'a> { @@ -252,11 +330,4 @@ impl<'a> DownloadStateStoreRwContext<'a> { pub fn get_file_metadata(&self, file_key: &H256) -> Option { self.db_context.cf(&FileMetadataCf::default()).get(file_key) } - - pub fn delete_file_metadata(&self, file_key: &H256) { - self.db_context - .cf(&FileMetadataCf::default()) - .delete(file_key); - self.db_context.flush(); - } } diff --git a/node/src/services/file_download_manager.rs b/node/src/services/file_download_manager.rs index 85c57b659..3484043b3 100644 --- a/node/src/services/file_download_manager.rs +++ b/node/src/services/file_download_manager.rs @@ -30,6 +30,7 @@ const CHUNK_REQUEST_PEER_RETRY_ATTEMPTS: usize = 5; const DOWNLOAD_RETRY_ATTEMPTS: usize = 2; const BEST_PEERS_TO_SELECT: usize = 2; const RANDOM_PEERS_TO_SELECT: usize = 3; +const MAX_CONCURRENT_BUCKET_DOWNLOADS: usize = 2; /// Configuration for file download limits and parallelism settings pub struct FileDownloadLimits { @@ -47,6 +48,8 @@ pub struct FileDownloadLimits { pub best_peers_to_select: usize, /// Number of random peers to select pub random_peers_to_select: usize, + /// Maximum number of bucket downloads to process in parallel + pub max_concurrent_bucket_downloads: usize, } impl Default for FileDownloadLimits { @@ -59,6 +62,7 @@ impl Default for FileDownloadLimits { download_retry_attempts: DOWNLOAD_RETRY_ATTEMPTS, best_peers_to_select: BEST_PEERS_TO_SELECT, random_peers_to_select: RANDOM_PEERS_TO_SELECT, + max_concurrent_bucket_downloads: MAX_CONCURRENT_BUCKET_DOWNLOADS, } } } @@ -73,6 +77,7 @@ impl Clone for FileDownloadLimits { download_retry_attempts: self.download_retry_attempts, best_peers_to_select: self.best_peers_to_select, random_peers_to_select: self.random_peers_to_select, + max_concurrent_bucket_downloads: self.max_concurrent_bucket_downloads, } } } @@ -101,6 +106,8 @@ pub struct FileDownloadManager { pub limits: FileDownloadLimits, /// Semaphore for controlling file-level parallelism file_semaphore: Arc, + /// Semaphore for controlling bucket-level parallelism + bucket_semaphore: Arc, /// BSP peer manager for tracking and selecting peers peer_manager: Arc, /// Download state store for persistence @@ -131,6 +138,7 @@ impl FileDownloadManager { Ok(Self { file_semaphore: Arc::new(Semaphore::new(limits.max_concurrent_file_downloads)), + bucket_semaphore: Arc::new(Semaphore::new(limits.max_concurrent_bucket_downloads)), limits, peer_manager, download_state_store, @@ -142,6 +150,16 @@ impl FileDownloadManager { Arc::clone(&self.file_semaphore) } + /// Get a reference to the bucket semaphore for bucket-level parallelism + pub fn bucket_semaphore(&self) -> Arc { + self.bucket_semaphore.clone() + } + + /// Returns a reference to the download state store + pub fn download_state_store(&self) -> Arc { + self.download_state_store.clone() + } + /// Create a new chunk semaphore for chunk-level parallelism within a file pub fn new_chunk_semaphore(&self) -> Arc { Arc::new(Semaphore::new(self.limits.max_concurrent_chunks_per_file)) @@ -586,6 +604,83 @@ impl FileDownloadManager { Ok(()) } } + + /// Mark a bucket download as started + pub fn mark_bucket_download_started(&self, bucket_id: &BucketId) { + let context = self.download_state_store.open_rw_context(); + context.mark_bucket_download_started(bucket_id); + context.commit(); + } + + /// Mark a bucket download as completed + pub fn mark_bucket_download_completed(&self, bucket_id: &BucketId) { + let context = self.download_state_store.open_rw_context(); + context.mark_bucket_download_completed(bucket_id); + context.commit(); + } + + /// Check if a bucket download is in progress + pub fn is_bucket_download_in_progress(&self, bucket_id: &BucketId) -> bool { + let context = self.download_state_store.open_rw_context(); + let result = context.is_bucket_download_in_progress(bucket_id); + result + } + + /// Download an entire bucket's files + /// This method accepts a list of file metadata objects rather than using a callback + pub async fn download_bucket( + &self, + bucket_id: BucketId, + file_metadatas: Vec, + file_transfer: FT, + file_storage: Arc>, + ) -> Result<()> + where + FT: FileTransferServiceInterface + Send + Sync + Clone + 'static, + FS: FileStorage + Send + Sync + 'static, + { + // Check if bucket download is already in progress + if self.is_bucket_download_in_progress(&bucket_id) { + info!( + target: LOG_TARGET, + "Resuming download of bucket {:?}", bucket_id + ); + } else { + info!( + target: LOG_TARGET, + "Starting new download of bucket {:?}", bucket_id + ); + self.mark_bucket_download_started(&bucket_id); + } + + // Acquire a bucket download permit + let _bucket_permit = self.bucket_semaphore.clone().acquire_owned().await?; + + info!( + target: LOG_TARGET, + "Downloading {} files for bucket {:?}", file_metadatas.len(), bucket_id + ); + + // Download each file in the bucket + for file_metadata in file_metadatas { + self.download_file( + file_metadata.clone(), + bucket_id, + file_transfer.clone(), + Arc::clone(&file_storage), + ) + .await?; + } + + // Mark bucket download as completed + self.mark_bucket_download_completed(&bucket_id); + info!( + target: LOG_TARGET, + "Completed download of bucket {:?}", bucket_id + ); + + Ok(()) + } } impl Clone for FileDownloadManager { @@ -593,6 +688,7 @@ impl Clone for FileDownloadManager { Self { limits: self.limits.clone(), file_semaphore: Arc::clone(&self.file_semaphore), + bucket_semaphore: Arc::clone(&self.bucket_semaphore), peer_manager: Arc::clone(&self.peer_manager), download_state_store: Arc::clone(&self.download_state_store), } diff --git a/node/src/services/handler.rs b/node/src/services/handler.rs index 2fd1d7533..b5054ff50 100644 --- a/node/src/services/handler.rs +++ b/node/src/services/handler.rs @@ -22,7 +22,7 @@ use shc_blockchain_service::{ }; use shc_common::consts::CURRENT_FOREST_KEY; use shc_file_transfer_service::{ - events::{RemoteDownloadRequest, RemoteUploadRequest}, + events::{RemoteDownloadRequest, RemoteUploadRequest, RetryBucketMoveDownload}, FileTransferService, }; use shc_forest_manager::traits::ForestStorageHandler; @@ -44,8 +44,8 @@ use crate::{ msp_charge_fees::MspChargeFeesTask, msp_delete_bucket::MspDeleteBucketTask, msp_delete_file::MspDeleteFileTask, msp_move_bucket::MspRespondMoveBucketTask, msp_stop_storing_insolvent_user::MspStopStoringInsolventUserTask, - msp_upload_file::MspUploadFileTask, sp_slash_provider::SlashProviderTask, - user_sends_file::UserSendsFileTask, + msp_upload_file::MspUploadFileTask, retry_bucket_move::RetryBucketMoveTask, + sp_slash_provider::SlashProviderTask, user_sends_file::UserSendsFileTask, }, }; @@ -341,6 +341,20 @@ where .clone() .subscribe_to(&self.task_spawner, &self.blockchain, true); notify_period_event_bus_listener.start(); + + // Create the RetryBucketMoveTask and subscribe to events + let retry_bucket_move_download_task = RetryBucketMoveTask::new(self.clone()); + + // Subscribing to RetryBucketMoveDownload event from the FileTransferService. + let retry_bucket_move_download_event_bus_listener: EventBusListener< + RetryBucketMoveDownload, + _, + > = retry_bucket_move_download_task.clone().subscribe_to( + &self.task_spawner, + &self.file_transfer, + false, + ); + retry_bucket_move_download_event_bus_listener.start(); } } diff --git a/node/src/tasks/mod.rs b/node/src/tasks/mod.rs index b25e48865..c55e9fc58 100644 --- a/node/src/tasks/mod.rs +++ b/node/src/tasks/mod.rs @@ -12,5 +12,6 @@ pub mod msp_delete_file; pub mod msp_move_bucket; pub mod msp_stop_storing_insolvent_user; pub mod msp_upload_file; +pub mod retry_bucket_move; pub mod sp_slash_provider; pub mod user_sends_file; diff --git a/node/src/tasks/msp_move_bucket.rs b/node/src/tasks/msp_move_bucket.rs index 1ccc4640f..b217ed703 100644 --- a/node/src/tasks/msp_move_bucket.rs +++ b/node/src/tasks/msp_move_bucket.rs @@ -1,10 +1,6 @@ use anyhow::anyhow; -use futures::future::join_all; use rand::{rngs::StdRng, SeedableRng}; -use std::{ - sync::{Arc, Mutex}, - time::Duration, -}; +use std::{sync::Mutex, time::Duration}; use sc_tracing::tracing::*; use sp_core::H256; @@ -21,7 +17,7 @@ use shc_common::types::{ BucketId, HashT, ProviderId, StorageProofsMerkleTrieLayout, StorageProviderId, }; use shc_file_manager::traits::FileStorage; -use shc_forest_manager::traits::{ForestStorage, ForestStorageHandler}; +use shc_forest_manager::traits::ForestStorageHandler; use crate::services::{ handler::StorageHubHandler, @@ -113,99 +109,70 @@ where event.bucket_id ); - let indexer_db_pool = if let Some(indexer_db_pool) = - self.storage_hub_handler.indexer_db_pool.clone() - { - indexer_db_pool - } else { - return Err(anyhow!("Indexer is disabled but a move bucket event was received. Please provide a database URL (and enable indexer) for it to use this feature.")); - }; + // Get all files for this bucket from the indexer + let indexer_db_pool = + if let Some(indexer_db_pool) = self.storage_hub_handler.indexer_db_pool.clone() { + indexer_db_pool + } else { + return Err(anyhow!( + "Indexer is disabled but a StartMovedBucketDownload event was received" + )); + }; - let mut indexer_connection = indexer_db_pool.get().await.map_err(|error| { - anyhow!( - "Failed to get indexer connection after timeout: {:?}", - error - ) - })?; + let mut indexer_connection = indexer_db_pool.get().await?; - let bucket = event.bucket_id.as_ref().to_vec(); let files = shc_indexer_db::models::File::get_by_onchain_bucket_id( &mut indexer_connection, - bucket.clone(), + event.bucket_id.as_ref().to_vec(), ) .await?; - let total_files = files.len(); - - // Create forest storage for the bucket if it doesn't exist - let _ = self - .storage_hub_handler - .forest_storage_handler - .get_or_create(&bucket) - .await; + if files.is_empty() { + info!( + target: LOG_TARGET, + "No files to download for bucket {:?}", event.bucket_id + ); + self.pending_bucket_id = None; + return Ok(()); + } - // Get the file semaphore from the download manager - let file_semaphore = self - .storage_hub_handler - .file_download_manager - .file_semaphore(); - let file_tasks: Vec<_> = files - .into_iter() - .map(|file| { - let semaphore = Arc::clone(&file_semaphore); - let task = self.clone(); - let bucket_id = event.bucket_id; - - tokio::spawn(async move { - let _permit = semaphore - .acquire() - .await - .map_err(|e| anyhow!("Failed to acquire file semaphore: {:?}", e))?; - - // Download file using the simplified download method - task.download_file(&file, &bucket_id).await - }) - }) - .collect(); - - // Wait for all file downloads to complete - let results = join_all(file_tasks).await; - - // Process results and count failures - let mut failed_downloads = 0; - for result in results { - match result { - Ok(download_result) => { - if let Err(e) = download_result { + // Convert indexer files to FileMetadata + let file_metadatas = files + .iter() + .filter_map( + |file| match file.to_file_metadata(event.bucket_id.as_ref().to_vec()) { + Ok(metadata) => Some(metadata), + Err(e) => { error!( target: LOG_TARGET, - "File download task failed: {:?}", e + "Failed to convert file to metadata: {:?}", e ); - failed_downloads += 1; + None } - } - Err(e) => { - error!( - target: LOG_TARGET, - "File download task panicked: {:?}", e - ); - failed_downloads += 1; - } - } - } + }, + ) + .collect::>(); + + // Now download all files using the FileDownloadManager + let file_download_manager = &self.storage_hub_handler.file_download_manager; + let file_transfer_service = self.storage_hub_handler.file_transfer.clone(); + + file_download_manager + .download_bucket( + event.bucket_id, + file_metadatas, + file_transfer_service, + self.storage_hub_handler.file_storage.clone(), + ) + .await?; - if failed_downloads > 0 { - return Err(anyhow!( - "Failed to download {} out of {} files", - failed_downloads, - total_files - )); - } else { - info!( - target: LOG_TARGET, - "Successfully completed bucket move with all files downloaded" - ); - } + // After download is complete, update status + self.pending_bucket_id = None; + + info!( + target: LOG_TARGET, + "Bucket move completed for bucket {:?}", event.bucket_id + ); Ok(()) } @@ -237,22 +204,25 @@ where error ) })?; - let bucket = event.bucket_id.as_ref().to_vec(); - - let forest_storage = self - .storage_hub_handler - .forest_storage_handler - .get_or_create(&bucket) - .await; - - self.pending_bucket_id = Some(event.bucket_id); + // First, retrieve all the files for this bucket from the indexer let files = shc_indexer_db::models::File::get_by_onchain_bucket_id( &mut indexer_connection, - bucket.clone(), + event.bucket_id.as_ref().to_vec(), ) .await?; + if files.is_empty() { + warn!( + target: LOG_TARGET, + "No files found for bucket {:?}", event.bucket_id + ); + // We still accept since there's nothing to download + self.accept_bucket_move(event.bucket_id).await?; + return Ok(()); + } + + // Calculate total size to check capacity let total_size: u64 = files .iter() .try_fold(0u64, |acc, file| acc.checked_add(file.size as u64)) @@ -266,63 +236,60 @@ where .query_storage_provider_id(None) .await?; + // Convert to the expected ProviderId type let own_msp_id = match own_provider_id { Some(StorageProviderId::MainStorageProvider(id)) => id, Some(StorageProviderId::BackupStorageProvider(_)) => { - return Err(anyhow!("CRITICAL ❗️❗️❗️: Current node account is a Backup Storage Provider. Expected a Main Storage Provider ID.")); + return Err(anyhow!("Current node is a BSP. Expected an MSP ID.")); } None => { - return Err(anyhow!("CRITICAL ❗️❗️❗️: Failed to get own MSP ID.")); + return Err(anyhow!("Failed to get own provider ID.")); } }; - // Check and increase capacity if needed + // Validate capacity - might trigger capacity increase self.check_and_increase_capacity(total_size, own_msp_id) .await?; - // Try to insert all files before accepting the request + // Register BSP peers and prepare file metadata + let mut file_metadatas = Vec::with_capacity(files.len()); + for file in &files { let file_metadata = file - .to_file_metadata(bucket.clone()) + .to_file_metadata(event.bucket_id.as_ref().to_vec()) .map_err(|e| anyhow!("Failed to convert file to file metadata: {:?}", e))?; - let file_key = file_metadata.file_key::>(); - - self.storage_hub_handler - .file_storage - .write() - .await - .insert_file(file_key, file_metadata.clone()) - .map_err(|error| { - anyhow!( - "CRITICAL ❗️❗️❗️: Failed to insert file {:?} into file storage: {:?}", - file_key, - error - ) - })?; - - self.file_storage_inserted_file_keys.push(file_key); - forest_storage - .write() - .await - .insert_files_metadata(&[file_metadata.clone()]) - .map_err(|error| { - anyhow!( - "CRITICAL ❗️❗️❗️: Failed to insert file {:?} into forest storage: {:?}", - file_key, - error - ) - })?; + let file_key = file_metadata.file_key::>(); + // Register the BSP peers with the peer manager for this file let bsp_peer_ids = file.get_bsp_peer_ids(&mut indexer_connection).await?; if bsp_peer_ids.is_empty() { return Err(anyhow!("No BSP peer IDs found for file {:?}", file_key)); } + + for peer_id in &bsp_peer_ids { + self.storage_hub_handler + .peer_manager + .add_peer(*peer_id, file_key) + .await; + } + + // Add the file metadata to our list + file_metadatas.push(file_metadata); } - // Accept the request since we've verified we can handle all files + // Store bucket ID for tracking purposes + self.pending_bucket_id = Some(event.bucket_id); + + // All validation passed, now accept the request self.accept_bucket_move(event.bucket_id).await?; + // File downloads will be initiated by the StartMovedBucketDownload event handler + info!( + target: LOG_TARGET, + "Bucket move request accepted for bucket {:?}, waiting for on-chain confirmation", event.bucket_id + ); + Ok(()) } @@ -458,9 +425,8 @@ where .query_available_storage_capacity(own_msp_id) .await .map_err(|e| { - let err_msg = format!("Failed to query available storage capacity: {:?}", e); - error!(target: LOG_TARGET, err_msg); - anyhow::anyhow!(err_msg) + error!(target: LOG_TARGET, "Failed to query available storage capacity: {:?}", e); + anyhow::anyhow!("Failed to query available storage capacity: {:?}", e) })?; // Increase storage capacity if the available capacity is less than the required size @@ -478,9 +444,8 @@ where .query_storage_provider_capacity(own_msp_id) .await .map_err(|e| { - let err_msg = format!("Failed to query storage provider capacity: {:?}", e); - error!(target: LOG_TARGET, err_msg); - anyhow::anyhow!(err_msg) + error!(target: LOG_TARGET, "Failed to query storage provider capacity: {:?}", e); + anyhow::anyhow!("Failed to query storage provider capacity: {:?}", e) })?; let max_storage_capacity = self @@ -527,60 +492,4 @@ where Ok(()) } - - /// Downloads a file from BSPs (Backup Storage Providers). - /// - /// Uses the FileDownloadManager which implements multi-level parallelism: - /// - File-level parallelism: Multiple files can be downloaded simultaneously - /// - Chunk-level parallelism: For each file, multiple chunk batches are downloaded in parallel - /// - Peer selection and retry strategy: Selects and tries multiple peers - async fn download_file( - &self, - file: &shc_indexer_db::models::File, - bucket: &BucketId, - ) -> anyhow::Result<()> { - let file_metadata = file - .to_file_metadata(bucket.as_ref().to_vec()) - .map_err(|e| anyhow!("Failed to convert file to file metadata: {:?}", e))?; - let file_key = file_metadata.file_key::>(); - - info!( - target: LOG_TARGET, - "Starting file download for file_key: {:?}", file_key - ); - - // Register BSP peers with the peer manager for this file - let bsp_peer_ids = file - .get_bsp_peer_ids( - &mut self - .storage_hub_handler - .indexer_db_pool - .as_ref() - .unwrap() - .get() - .await?, - ) - .await?; - - for &peer_id in &bsp_peer_ids { - self.storage_hub_handler - .peer_manager - .add_peer(peer_id, file_key) - .await; - } - - // Get the file storage reference - let file_storage = self.storage_hub_handler.file_storage.clone(); - - // Use the simplified FileDownloadManager interface - self.storage_hub_handler - .file_download_manager - .download_file( - file_metadata, - *bucket, - self.storage_hub_handler.file_transfer.clone(), - file_storage, - ) - .await - } } diff --git a/node/src/tasks/retry_bucket_move.rs b/node/src/tasks/retry_bucket_move.rs new file mode 100644 index 000000000..2c6dd34a5 --- /dev/null +++ b/node/src/tasks/retry_bucket_move.rs @@ -0,0 +1,259 @@ +use log::{error, info, warn}; +use shc_actors_framework::event_bus::EventHandler; +use shc_common::types::{HashT, StorageProofsMerkleTrieLayout}; +use shc_file_transfer_service::events::RetryBucketMoveDownload; +use std::sync::Arc; + +use crate::services::{ + download_state_store::DownloadStateStore, + handler::StorageHubHandler, + types::{MspForestStorageHandlerT, ShNodeType}, +}; + +const LOG_TARGET: &str = "retry-bucket-move-task"; + +/// Task that handles retrying and resuming bucket move downloads +/// that might have been interrupted. +pub struct RetryBucketMoveTask +where + NT: ShNodeType + 'static, + NT::FSH: MspForestStorageHandlerT, +{ + storage_hub_handler: StorageHubHandler, + download_state_store: Arc, +} + +impl RetryBucketMoveTask +where + NT: ShNodeType + 'static, + NT::FSH: MspForestStorageHandlerT, +{ + pub fn new(storage_hub_handler: StorageHubHandler) -> Self { + Self { + storage_hub_handler: storage_hub_handler.clone(), + download_state_store: storage_hub_handler + .file_download_manager + .download_state_store(), + } + } +} + +impl Clone for RetryBucketMoveTask +where + NT: ShNodeType + 'static, + NT::FSH: MspForestStorageHandlerT, +{ + fn clone(&self) -> Self { + Self { + storage_hub_handler: self.storage_hub_handler.clone(), + download_state_store: self.download_state_store.clone(), + } + } +} + +impl EventHandler for RetryBucketMoveTask +where + NT: ShNodeType + 'static, + NT::FSH: MspForestStorageHandlerT, +{ + async fn handle_event(&mut self, _event: RetryBucketMoveDownload) -> anyhow::Result<()> { + info!( + target: LOG_TARGET, + "Checking for pending bucket downloads to resume" + ); + + // Get all pending bucket downloads from the state store + let context = self.download_state_store.open_rw_context(); + let pending_buckets = context.get_all_pending_bucket_downloads(); + + if pending_buckets.is_empty() { + info!( + target: LOG_TARGET, + "No pending bucket downloads to resume" + ); + return Ok(()); + } + + info!( + target: LOG_TARGET, + "Found {} pending bucket downloads to resume", pending_buckets.len() + ); + + // Get indexer DB pool for fetching file metadata + let indexer_db_pool = + if let Some(indexer_db_pool) = self.storage_hub_handler.indexer_db_pool.clone() { + indexer_db_pool + } else { + warn!( + target: LOG_TARGET, + "Indexer is disabled but there are pending bucket downloads" + ); + return Ok(()); + }; + + // For each pending bucket, directly use the file download manager + for bucket_id in pending_buckets { + info!( + target: LOG_TARGET, + "Resuming download for bucket {:?}", bucket_id + ); + + // Get connection to indexer DB + let mut indexer_connection = match indexer_db_pool.get().await { + Ok(conn) => conn, + Err(e) => { + error!( + target: LOG_TARGET, + "Failed to get indexer connection: {:?}", e + ); + continue; + } + }; + + // Get files for this bucket from indexer only to update BSP peer information + let indexer_files = match shc_indexer_db::models::File::get_by_onchain_bucket_id( + &mut indexer_connection, + bucket_id.as_ref().to_vec(), + ) + .await + { + Ok(files) => files, + Err(e) => { + error!( + target: LOG_TARGET, + "Failed to get files for bucket {:?} from indexer: {:?}", bucket_id, e + ); + // Continue with download attempt even if we couldn't get indexer info + // We'll just have fewer peers to try + Vec::new() + } + }; + + // Get missing files from download state store + let context = self.download_state_store.open_rw_context(); + let missing_files = context.get_missing_files_for_bucket(&bucket_id); + + if missing_files.is_empty() { + info!( + target: LOG_TARGET, + "No missing files found for bucket {:?}, marking as completed", bucket_id + ); + + // Mark as completed if no missing files in download state + context.mark_bucket_download_completed(&bucket_id); + context.commit(); + continue; + } + context.commit(); + + // Register BSP peers from indexer files + for file in &indexer_files { + if let Ok(metadata) = file.to_file_metadata(bucket_id.as_ref().to_vec()) { + // Register BSP peers for this file + let file_key = metadata.file_key::>(); + if let Ok(peer_ids) = + futures::executor::block_on(file.get_bsp_peer_ids(&mut indexer_connection)) + { + for peer_id in &peer_ids { + // Register peer for file + let _ = self + .storage_hub_handler + .peer_manager + .add_peer(*peer_id, file_key); + } + } + } + } + + // Get list of file metadatas from indexer files + let file_metadatas = indexer_files + .iter() + .filter_map( + |file| match file.to_file_metadata(bucket_id.as_ref().to_vec()) { + Ok(metadata) => Some(metadata), + Err(e) => { + error!( + target: LOG_TARGET, + "Failed to convert file to metadata: {:?}", e + ); + None + } + }, + ) + .collect::>(); + + info!( + target: LOG_TARGET, + "Starting download of {} files for bucket {:?}", + file_metadatas.len(), bucket_id + ); + + // Acquire a bucket semaphore permit to respect concurrency limits + let bucket_semaphore = self + .storage_hub_handler + .file_download_manager + .bucket_semaphore(); + let permit = match tokio::time::timeout( + std::time::Duration::from_secs(10), + bucket_semaphore.acquire(), + ) + .await + { + Ok(permit_result) => match permit_result { + Ok(permit) => { + info!( + target: LOG_TARGET, + "Acquired bucket semaphore permit for bucket {:?}", bucket_id + ); + permit + } + Err(e) => { + error!( + target: LOG_TARGET, + "Failed to acquire bucket semaphore: {:?}", e + ); + continue; + } + }, + Err(_) => { + warn!( + target: LOG_TARGET, + "Timed out waiting for bucket semaphore for bucket {:?}", bucket_id + ); + continue; + } + }; + + // Create a _permit variable that drops at the end of scope, releasing the semaphore + let _permit = permit; + + let file_transfer_service = self.storage_hub_handler.file_transfer.clone(); + let file_storage = self.storage_hub_handler.file_storage.clone(); + + if let Err(e) = self + .storage_hub_handler + .file_download_manager + .download_bucket( + bucket_id, + file_metadatas, + file_transfer_service, + file_storage, + ) + .await + { + error!( + target: LOG_TARGET, + "Failed to resume bucket download for {:?}: {:?}", bucket_id, e + ); + // Note: We don't mark as completed here so it can be retried later + } else { + info!( + target: LOG_TARGET, + "Successfully resumed bucket download for {:?}", bucket_id + ); + } + } + + Ok(()) + } +} From 7cfa2b96a14910234a8dfb1dea298928132108b2 Mon Sep 17 00:00:00 2001 From: Alexandru Murtaza Date: Mon, 10 Mar 2025 14:44:02 +0200 Subject: [PATCH 11/11] Fix MSP move bucket and BSP download whitelist update --- client/blockchain-service/src/handler_bsp.rs | 46 +++++------------- client/file-transfer-service/src/handler.rs | 1 + node/src/tasks/msp_move_bucket.rs | 50 +++++++++++++++++++- 3 files changed, 63 insertions(+), 34 deletions(-) diff --git a/client/blockchain-service/src/handler_bsp.rs b/client/blockchain-service/src/handler_bsp.rs index 07b85828e..948830d29 100644 --- a/client/blockchain-service/src/handler_bsp.rs +++ b/client/blockchain-service/src/handler_bsp.rs @@ -22,7 +22,7 @@ use storage_hub_runtime::RuntimeEvent; use crate::{ events::{ - BspConfirmStoppedStoring, FinalisedBspConfirmStoppedStoring, FinalisedBucketMovedAway, + BspConfirmStoppedStoring, FinalisedBspConfirmStoppedStoring, FinalisedTrieRemoveMutationsApplied, ForestWriteLockTaskData, MoveBucketAccepted, MoveBucketExpired, MoveBucketRejected, MoveBucketRequested, MultipleNewChallengeSeeds, ProcessConfirmStoringRequest, ProcessConfirmStoringRequestData, @@ -139,6 +139,18 @@ where }); } } + RuntimeEvent::FileSystem(pallet_file_system::Event::MoveBucketRequested { + who: _, + bucket_id, + new_msp_id, + new_value_prop_id: _, + }) => { + // As a BSP, this node is interested in the event to allow the new MSP to request files from it. + self.emit(MoveBucketRequested { + bucket_id, + new_msp_id, + }); + } // Ignore all other events. _ => {} } @@ -185,38 +197,6 @@ where }); } } - RuntimeEvent::FileSystem(pallet_file_system::Event::MoveBucketRequested { - who: _, - bucket_id, - new_msp_id, - new_value_prop_id: _, - }) => { - // As a BSP, this node is interested in the event to allow the new MSP to request files from it. - self.emit(MoveBucketRequested { - bucket_id, - new_msp_id, - }); - } - RuntimeEvent::FileSystem(pallet_file_system::Event::MoveBucketAccepted { - bucket_id, - old_msp_id, - new_msp_id, - value_prop_id: _, - }) => { - // This event is relevant in case the Provider managed is the old MSP, - // in which case we should clean up the bucket. - // Note: we do this in finality to ensure we don't lose data in case - // of a reorg. - if let Some(old_msp_id) = old_msp_id { - if managed_bsp_id == &old_msp_id { - self.emit(FinalisedBucketMovedAway { - bucket_id, - old_msp_id, - new_msp_id, - }); - } - } - } // Ignore all other events. _ => {} } diff --git a/client/file-transfer-service/src/handler.rs b/client/file-transfer-service/src/handler.rs index 8dc84f0b4..f01ccce35 100644 --- a/client/file-transfer-service/src/handler.rs +++ b/client/file-transfer-service/src/handler.rs @@ -409,6 +409,7 @@ impl Actor for FileTransferService { bucket_id, callback, } => { + info!(target: LOG_TARGET, "Registering new bucket peer {:?} for bucket {:?}", peer_id, bucket_id); let result = match self.peer_bucket_allow_list.insert((peer_id, bucket_id)) { true => Ok(()), false => Err(RequestError::BucketAlreadyRegisteredForPeer), diff --git a/node/src/tasks/msp_move_bucket.rs b/node/src/tasks/msp_move_bucket.rs index b217ed703..ae5ba5217 100644 --- a/node/src/tasks/msp_move_bucket.rs +++ b/node/src/tasks/msp_move_bucket.rs @@ -17,7 +17,7 @@ use shc_common::types::{ BucketId, HashT, ProviderId, StorageProofsMerkleTrieLayout, StorageProviderId, }; use shc_file_manager::traits::FileStorage; -use shc_forest_manager::traits::ForestStorageHandler; +use shc_forest_manager::traits::{ForestStorage, ForestStorageHandler}; use crate::services::{ handler::StorageHubHandler, @@ -109,6 +109,14 @@ where event.bucket_id ); + // Important: Add a delay after receiving the on-chain confirmation + // This gives the BSPs time to process the chain event and prepare to serve files + info!( + target: LOG_TARGET, + "Waiting for BSPs to be ready to serve files for bucket {:?}", event.bucket_id + ); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + // Get all files for this bucket from the indexer let indexer_db_pool = if let Some(indexer_db_pool) = self.storage_hub_handler.indexer_db_pool.clone() { @@ -157,6 +165,11 @@ where let file_download_manager = &self.storage_hub_handler.file_download_manager; let file_transfer_service = self.storage_hub_handler.file_transfer.clone(); + info!( + target: LOG_TARGET, + "Starting new download of bucket {:?}", event.bucket_id + ); + file_download_manager .download_bucket( event.bucket_id, @@ -222,6 +235,14 @@ where return Ok(()); } + let bucket = event.bucket_id.as_ref().to_vec(); + + let forest_storage = self + .storage_hub_handler + .forest_storage_handler + .get_or_create(&bucket) + .await; + // Calculate total size to check capacity let total_size: u64 = files .iter() @@ -261,6 +282,33 @@ where let file_key = file_metadata.file_key::>(); + self.storage_hub_handler + .file_storage + .write() + .await + .insert_file(file_key, file_metadata.clone()) + .map_err(|error| { + anyhow!( + "CRITICAL ❗️❗️❗️: Failed to insert file {:?} into file storage: {:?}", + file_key, + error + ) + })?; + + self.file_storage_inserted_file_keys.push(file_key); + + forest_storage + .write() + .await + .insert_files_metadata(&[file_metadata.clone()]) + .map_err(|error| { + anyhow!( + "CRITICAL ❗️❗️❗️: Failed to insert file {:?} into forest storage: {:?}", + file_key, + error + ) + })?; + // Register the BSP peers with the peer manager for this file let bsp_peer_ids = file.get_bsp_peer_ids(&mut indexer_connection).await?; if bsp_peer_ids.is_empty() {