diff --git a/Cargo.lock b/Cargo.lock index cc6e0830f6..a4fa22a014 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3272,16 +3272,18 @@ dependencies = [ [[package]] name = "igd-next" -version = "0.14.3" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "064d90fec10d541084e7b39ead8875a5a80d9114a2b18791565253bae25f49e4" +checksum = "76b0d7d4541def58a37bf8efc559683f21edce7c82f0d866c93ac21f7e098f93" dependencies = [ "async-trait", "attohttpc", "bytes 1.7.2", "futures 0.3.30", - "http 0.2.9", - "hyper 0.14.27", + "http 1.1.0", + "http-body-util", + "hyper 1.4.1", + "hyper-util", "log", "rand", "tokio", @@ -3632,7 +3634,7 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libp2p" version = "0.54.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "bytes 1.7.2", "either", @@ -3667,8 +3669,8 @@ dependencies = [ [[package]] name = "libp2p-allow-block-list" -version = "0.4.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.4.1" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "libp2p-core", "libp2p-identity", @@ -3678,8 +3680,8 @@ dependencies = [ [[package]] name = "libp2p-autonat" -version = "0.13.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.13.1" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "async-trait", "asynchronous-codec", @@ -3693,7 +3695,7 @@ dependencies = [ "libp2p-request-response", "libp2p-swarm", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "rand", "rand_core", "thiserror", @@ -3705,7 +3707,7 @@ dependencies = [ [[package]] name = "libp2p-connection-limits" version = "0.4.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "libp2p-core", "libp2p-identity", @@ -3716,7 +3718,7 @@ dependencies = [ [[package]] name = "libp2p-core" version = "0.42.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "either", "fnv", @@ -3744,7 +3746,7 @@ dependencies = [ [[package]] name = "libp2p-dcutr" version = "0.12.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "either", @@ -3756,7 +3758,7 @@ dependencies = [ "libp2p-swarm", "lru", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "thiserror", "tracing", "void", @@ -3766,7 +3768,7 @@ dependencies = [ [[package]] name = "libp2p-dns" version = "0.42.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "async-trait", "futures 0.3.30", @@ -3780,8 +3782,8 @@ dependencies = [ [[package]] name = "libp2p-gossipsub" -version = "0.47.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.48.0" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "base64 0.22.1", @@ -3798,7 +3800,7 @@ dependencies = [ "libp2p-swarm", "prometheus-client", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "rand", "regex", "serde", @@ -3811,8 +3813,8 @@ dependencies = [ [[package]] name = "libp2p-identify" -version = "0.45.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.45.1" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "either", @@ -3824,7 +3826,7 @@ dependencies = [ "libp2p-swarm", "lru", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "smallvec 1.13.2", "thiserror", "tracing", @@ -3834,7 +3836,7 @@ dependencies = [ [[package]] name = "libp2p-identity" version = "0.2.9" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "bs58 0.5.1", "ed25519-dalek", @@ -3852,8 +3854,8 @@ dependencies = [ [[package]] name = "libp2p-kad" -version = "0.46.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.47.0" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "arrayvec", "asynchronous-codec", @@ -3867,7 +3869,7 @@ dependencies = [ "libp2p-identity", "libp2p-swarm", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "rand", "serde", "sha2 0.10.8", @@ -3882,7 +3884,7 @@ dependencies = [ [[package]] name = "libp2p-mdns" version = "0.46.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "data-encoding", "futures 0.3.30", @@ -3914,7 +3916,7 @@ dependencies = [ [[package]] name = "libp2p-metrics" version = "0.15.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "futures 0.3.30", "libp2p-core", @@ -3934,7 +3936,7 @@ dependencies = [ [[package]] name = "libp2p-noise" version = "0.45.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "bytes 1.7.2", @@ -3976,7 +3978,7 @@ dependencies = [ [[package]] name = "libp2p-ping" version = "0.45.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "either", "futures 0.3.30", @@ -3993,7 +3995,7 @@ dependencies = [ [[package]] name = "libp2p-quic" version = "0.11.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "bytes 1.7.2", "futures 0.3.30", @@ -4016,7 +4018,7 @@ dependencies = [ [[package]] name = "libp2p-relay" version = "0.18.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "bytes 1.7.2", @@ -4028,7 +4030,7 @@ dependencies = [ "libp2p-identity", "libp2p-swarm", "quick-protobuf", - "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558)", + "quick-protobuf-codec 0.3.1 (git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d)", "rand", "static_assertions", "thiserror", @@ -4040,7 +4042,7 @@ dependencies = [ [[package]] name = "libp2p-request-response" version = "0.27.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "async-trait", "futures 0.3.30", @@ -4067,8 +4069,8 @@ dependencies = [ [[package]] name = "libp2p-swarm" -version = "0.45.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.45.2" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "either", "fnv", @@ -4091,7 +4093,7 @@ dependencies = [ [[package]] name = "libp2p-swarm-derive" version = "0.35.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -4102,7 +4104,7 @@ dependencies = [ [[package]] name = "libp2p-tcp" version = "0.42.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "futures 0.3.30", "futures-timer", @@ -4118,7 +4120,7 @@ dependencies = [ [[package]] name = "libp2p-tls" version = "0.5.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "futures 0.3.30", "futures-rustls", @@ -4135,8 +4137,8 @@ dependencies = [ [[package]] name = "libp2p-upnp" -version = "0.3.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +version = "0.3.1" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "futures 0.3.30", "futures-timer", @@ -4151,7 +4153,7 @@ dependencies = [ [[package]] name = "libp2p-yamux" version = "0.46.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "either", "futures 0.3.30", @@ -4937,7 +4939,7 @@ dependencies = [ [[package]] name = "multiaddr" version = "0.18.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "arrayref", "byteorder", @@ -5008,7 +5010,7 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "multistream-select" version = "0.13.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "bytes 1.7.2", "futures 0.3.30", @@ -6278,7 +6280,7 @@ dependencies = [ [[package]] name = "quick-protobuf-codec" version = "0.3.1" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "asynchronous-codec", "bytes 1.7.2", @@ -6885,7 +6887,7 @@ dependencies = [ [[package]] name = "rw-stream-sink" version = "0.4.0" -source = "git+https://github.com/tari-project/rust-libp2p.git?rev=3d918ccbf5ae1cbec0815a2156079b0fba4ba558#3d918ccbf5ae1cbec0815a2156079b0fba4ba558" +source = "git+https://github.com/tari-project/rust-libp2p.git?rev=ed117cac06f99aa2a85b1bad8c911b155f4b4f5d#ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" dependencies = [ "futures 0.3.30", "pin-project 1.1.5", diff --git a/Cargo.toml b/Cargo.toml index 7b5819f882..986ce712b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,9 +72,9 @@ tari_shutdown = { path = "infrastructure/shutdown" } tari_metrics = { path = "infrastructure/metrics" } # Use Tari's libp2p fork that adds support for Schnorr-Ristretto -libp2p-identity = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "3d918ccbf5ae1cbec0815a2156079b0fba4ba558" } -libp2p = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "3d918ccbf5ae1cbec0815a2156079b0fba4ba558", default-features = false } -multiaddr = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "3d918ccbf5ae1cbec0815a2156079b0fba4ba558" } +libp2p-identity = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" } +libp2p = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "ed117cac06f99aa2a85b1bad8c911b155f4b4f5d", default-features = false } +multiaddr = { git = "https://github.com/tari-project/rust-libp2p.git", rev = "ed117cac06f99aa2a85b1bad8c911b155f4b4f5d" } anyhow = "1.0.89" async-trait = "0.1.83" diff --git a/applications/minotari_console_wallet/log4rs_sample.yml b/applications/minotari_console_wallet/log4rs_sample.yml index 7efb92e618..f8e27b7640 100644 --- a/applications/minotari_console_wallet/log4rs_sample.yml +++ b/applications/minotari_console_wallet/log4rs_sample.yml @@ -63,7 +63,7 @@ appenders: encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m}{n}" - # An appender named "base_layer" that writes to a file with a custom pattern encoder + # An appender named "other" that writes to a file with a custom pattern encoder other: kind: rolling_file path: "{{log_dir}}/log/wallet/other.log" @@ -97,6 +97,23 @@ appenders: encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m}{n}" + # An appender named "libp2p" that writes to a file with a custom pattern encoder + libp2p: + kind: rolling_file + path: "{{log_dir}}/log/wallet/libp2p.log" + policy: + kind: compound + trigger: + kind: size + limit: 10mb + roller: + kind: fixed_window + base: 1 + count: 5 + pattern: "{{log_dir}}/log/wallet/libp2p.{}.log" + encoder: + pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m}{n}" + # An appender named "fail2ban" that writes to a file with a custom pattern encoder fail2ban: kind: file @@ -118,6 +135,7 @@ loggers: - base_layer - stdout additive: false + # other h2: level: info @@ -134,6 +152,19 @@ loggers: appenders: - other additive: false + r2d2: + level: warn + appenders: + - other + additive: false + + # contacts + contacts: + level: info + appenders: + - contacts + additive: false + # network network: level: debug @@ -145,11 +176,6 @@ loggers: appenders: - network additive: false - contacts: - level: info - appenders: - - contacts - additive: false comms::noise: level: error appenders: @@ -159,7 +185,6 @@ loggers: level: debug appenders: - network - # Route log events sent to the "mio" logger to the "other" appender mio: level: error appenders: @@ -175,12 +200,7 @@ loggers: appenders: - network additive: false - # Route R2D2 log events - r2d2: - level: warn - appenders: - - other - additive: false + # fail2ban logs fail2ban: level: info @@ -188,3 +208,84 @@ loggers: - fail2ban additive: false + # libp2p + libp2p_identify: + level: debug + appenders: + - libp2p + additive: false + libp2p_noise: + level: debug + appenders: + - libp2p + additive: false + libp2p_peersync: + level: info + appenders: + - libp2p + additive: false + libp2p_gossipsub: + level: info + appenders: + - libp2p + additive: false + quinn: + level: info + appenders: + - libp2p + additive: false + quinn_proto: + level: info + appenders: + - libp2p + additive: false + libp2p_core: + level: info + appenders: + - libp2p + additive: false + libp2p_mdns: + level: debug + appenders: + - libp2p + additive: false + libp2p_swarm: + level: debug + appenders: + - libp2p + additive: false + hickory_proto: + level: info + appenders: + - libp2p + additive: false + multistream_select: + level: info + appenders: + - libp2p + additive: false + libp2p_tcp: + level: debug + appenders: + - libp2p + additive: false + libp2p_quic: + level: debug + appenders: + - libp2p + additive: false + libp2p_kad: + level: debug + appenders: + - libp2p + additive: false + libp2p_ping: + level: debug + appenders: + - libp2p + additive: false + libp2p_messaging: + level: debug + appenders: + - libp2p + additive: false diff --git a/applications/minotari_console_wallet/src/automation/commands.rs b/applications/minotari_console_wallet/src/automation/commands.rs index 24cfe263a2..965475e7dc 100644 --- a/applications/minotari_console_wallet/src/automation/commands.rs +++ b/applications/minotari_console_wallet/src/automation/commands.rs @@ -390,7 +390,7 @@ async fn wait_for_comms(network: &NetworkHandle) -> Result<(), CommandError> { loop { tokio::select! { // Wait for the first base node to identify - Ok(NetworkEvent::IdentifiedPeer { agent_version, .. }) = events.recv() => { + Ok(NetworkEvent::PeerIdentified { agent_version, .. }) = events.recv() => { if agent_version.contains("basenode") { println!("✅"); return Ok(()); diff --git a/applications/minotari_console_wallet/src/cli.rs b/applications/minotari_console_wallet/src/cli.rs index fb26fa512b..2b56f2a70a 100644 --- a/applications/minotari_console_wallet/src/cli.rs +++ b/applications/minotari_console_wallet/src/cli.rs @@ -34,7 +34,7 @@ use tari_common::configuration::{ConfigOverrideProvider, Network}; use tari_common_types::tari_address::TariAddress; use tari_core::transactions::{tari_amount, tari_amount::MicroMinotari}; use tari_key_manager::SeedWords; -use tari_network::multiaddr::Multiaddr; +use tari_network::{multiaddr::Multiaddr, ReachabilityMode}; use tari_utilities::{ hex::{Hex, HexError}, SafePassword, @@ -94,6 +94,9 @@ pub struct Cli { pub view_private_key: Option, #[clap(long)] pub spend_key: Option, + /// Set the node to unreachable mode, the node will not act as a relay and will attempt relayed connections. + #[clap(long)] + pub reachability: Option, } impl ConfigOverrideProvider for Cli { @@ -118,15 +121,18 @@ impl ConfigOverrideProvider for Cli { } else { // GRPC is disabled } + if let Some(reachability) = self.reachability { + replace_or_add_override(&mut overrides, "wallet.p2p.reachability_mode", reachability.to_string()); + } overrides } } -fn replace_or_add_override(overrides: &mut Vec<(String, String)>, key: &str, value: &str) { +fn replace_or_add_override>(overrides: &mut Vec<(String, String)>, key: &str, value: T) { if let Some(index) = overrides.iter().position(|(k, _)| k == key) { overrides.remove(index); } - overrides.push((key.to_string(), value.to_string())); + overrides.push((key.to_string(), value.into())); } #[allow(clippy::large_enum_variant)] diff --git a/applications/minotari_console_wallet/src/lib.rs b/applications/minotari_console_wallet/src/lib.rs index 6a06217422..438928ceaa 100644 --- a/applications/minotari_console_wallet/src/lib.rs +++ b/applications/minotari_console_wallet/src/lib.rs @@ -106,6 +106,7 @@ pub fn run_wallet(shutdown: &mut Shutdown, runtime: Runtime, config: &mut Applic profile_with_tokio_console: false, view_private_key: None, spend_key: None, + reachability: None, }; run_wallet_with_cli(shutdown, runtime, config, cli) diff --git a/applications/minotari_console_wallet/src/ui/state/app_state.rs b/applications/minotari_console_wallet/src/ui/state/app_state.rs index ef0f0ea2dc..a32aac9aa1 100644 --- a/applications/minotari_console_wallet/src/ui/state/app_state.rs +++ b/applications/minotari_console_wallet/src/ui/state/app_state.rs @@ -232,7 +232,7 @@ impl AppState { pub async fn upsert_contact(&mut self, alias: String, tari_emoji: String) -> Result<(), UiError> { let mut inner = self.inner.write().await; - let address = TariAddress::from_str(&tari_emoji).map_err(|_| UiError::PublicKeyParseError)?; + let address = TariAddress::from_str(&tari_emoji).map_err(|_| UiError::TariAddressParseError)?; let contact = Contact::new(alias, address, None, None, false); inner.wallet.contacts_service.upsert_contact(contact).await?; @@ -262,7 +262,7 @@ impl AppState { pub async fn delete_contact(&mut self, tari_emoji: String) -> Result<(), UiError> { let mut inner = self.inner.write().await; - let address = TariAddress::from_str(&tari_emoji).map_err(|_| UiError::PublicKeyParseError)?; + let address = TariAddress::from_str(&tari_emoji).map_err(|_| UiError::TariAddressParseError)?; inner.wallet.contacts_service.remove_contact(address).await?; @@ -298,7 +298,7 @@ impl AppState { result_tx: watch::Sender, ) -> Result<(), UiError> { let inner = self.inner.write().await; - let address = TariAddress::from_str(&address).map_err(|_| UiError::PublicKeyParseError)?; + let address = TariAddress::from_str(&address).map_err(|_| UiError::TariAddressParseError)?; let output_features = OutputFeatures { ..Default::default() }; @@ -329,7 +329,7 @@ impl AppState { result_tx: watch::Sender, ) -> Result<(), UiError> { let inner = self.inner.write().await; - let address = TariAddress::from_str(&address).map_err(|_| UiError::PublicKeyParseError)?; + let address = TariAddress::from_str(&address).map_err(|_| UiError::TariAddressParseError)?; let payment_id = if payment_id_str.is_empty() { PaymentId::Empty } else { @@ -384,10 +384,10 @@ impl AppState { let fee_per_gram = fee_per_gram * uT; let tx_service_handle = inner.wallet.transaction_service.clone(); let claim_public_key = match claim_public_key { - None => return Err(UiError::PublicKeyParseError), + None => return Err(UiError::TariAddressParseError), Some(claim_public_key) => match PublicKey::from_hex(claim_public_key.as_str()) { Ok(claim_public_key) => Some(claim_public_key), - Err(_) => return Err(UiError::PublicKeyParseError), + Err(_) => return Err(UiError::TariAddressParseError), }, }; @@ -1015,7 +1015,7 @@ impl AppStateInner { .public_key() .clone() .try_into_sr25519() - .map_err(|_| UiError::PublicKeyParseError)? + .map_err(|_| UiError::TariAddressParseError)? .inner_key() .clone(); info!( @@ -1047,7 +1047,7 @@ impl AppStateInner { .public_key() .clone() .try_into_sr25519() - .map_err(|_| UiError::PublicKeyParseError)? + .map_err(|_| UiError::TariAddressParseError)? .inner_key() .clone(); self.wallet @@ -1097,7 +1097,7 @@ impl AppStateInner { .public_key() .clone() .try_into_sr25519() - .map_err(|_| UiError::PublicKeyParseError)? + .map_err(|_| UiError::TariAddressParseError)? .inner_key() .clone(); self.wallet diff --git a/applications/minotari_console_wallet/src/ui/state/wallet_event_monitor.rs b/applications/minotari_console_wallet/src/ui/state/wallet_event_monitor.rs index d24e3b8edd..a999cf0d71 100644 --- a/applications/minotari_console_wallet/src/ui/state/wallet_event_monitor.rs +++ b/applications/minotari_console_wallet/src/ui/state/wallet_event_monitor.rs @@ -90,11 +90,7 @@ impl WalletEventMonitor { result = transaction_service_events.recv() => { match result { Ok(msg) => { - trace!( - target: LOG_TARGET, - "Wallet Event Monitor received wallet transaction service event {:?}", - msg - ); + trace!(target: LOG_TARGET, "Wallet transaction service event {:?}", msg); self.app_state_inner.write().await.add_event(EventListItem{ event_type: "TransactionEvent".to_string(), desc: (*msg).to_string() @@ -184,27 +180,17 @@ impl WalletEventMonitor { } }, Ok(_) = connectivity_status.changed() => { - trace!(target: LOG_TARGET, "Wallet Event Monitor received wallet connectivity status changed"); + trace!( + target: LOG_TARGET, + "Wallet connectivity status changed to {:?}", + connectivity_status.borrow().clone() + ); self.trigger_peer_state_refresh().await; }, - // Ok(_) = software_update_notif.changed() => { - // trace!(target: LOG_TARGET, "Wallet Event Monitor received wallet auto update status changed"); - // let update = software_update_notif.borrow().as_ref().cloned(); - // if let Some(update) = update { - // self.add_notification(format!( - // "Version {} of the {} is available: {} (sha: {})", - // update.version(), - // update.app(), - // update.download_url(), - // update.to_hash_hex() - // )).await; - // } - // }, - result = network_events.recv() => { - match result { - Ok(msg) => { - trace!(target: LOG_TARGET, "Wallet Event Monitor received wallet connectivity event {:?}", msg - ); + result = network_events.recv() => { + match result { + Ok(msg) => { + trace!(target: LOG_TARGET, "Wallet connectivity event {:?}", msg); match msg { NetworkEvent::PeerConnected{..} | NetworkEvent::PeerDisconnected{..} => { @@ -244,14 +230,16 @@ impl WalletEventMonitor { _ = base_node_changed.changed() => { let peer = base_node_changed.borrow().as_ref().cloned(); if let Some(peer) = peer { - self.trigger_base_node_peer_refresh(peer.get_current_peer().clone()).await; + let current_peer = peer.get_current_peer().clone(); + trace!(target: LOG_TARGET, "Base node changed to '{}'", current_peer.peer_id()); + self.trigger_base_node_peer_refresh(current_peer).await; self.trigger_balance_refresh(); } } result = base_node_events.recv() => { match result { Ok(msg) => { - trace!(target: LOG_TARGET, "Wallet Event Monitor received base node event {:?}", msg); + trace!(target: LOG_TARGET, "Base node event {:?}", msg); if let BaseNodeEvent::BaseNodeStateChanged(state) = (*msg).clone() { self.trigger_base_node_state_refresh(state).await; } diff --git a/applications/minotari_console_wallet/src/ui/ui_error.rs b/applications/minotari_console_wallet/src/ui/ui_error.rs index 0c4a3eedf0..0707407130 100644 --- a/applications/minotari_console_wallet/src/ui/ui_error.rs +++ b/applications/minotari_console_wallet/src/ui/ui_error.rs @@ -44,8 +44,8 @@ pub enum UiError { WalletError(#[from] WalletError), #[error(transparent)] WalletStorageError(#[from] WalletStorageError), - #[error("Could not convert string into Public Key")] - PublicKeyParseError, + #[error("The provided Tari address is invalid")] + TariAddressParseError, #[error("Could not convert string into Net Address")] AddressParseError, #[error("Peer did not include an address")] diff --git a/applications/minotari_node/log4rs_sample.yml b/applications/minotari_node/log4rs_sample.yml index 8005e3867d..133234b173 100644 --- a/applications/minotari_node/log4rs_sample.yml +++ b/applications/minotari_node/log4rs_sample.yml @@ -36,7 +36,8 @@ appenders: pattern: "{{log_dir}}/log/base_node/network.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m} // {f}:{L}{n}" - # An appender named "network" that writes to a file with a custom pattern encoder + + # An appender named "message_logging" that writes to a file with a custom pattern encoder message_logging: kind: rolling_file path: "{{log_dir}}/log/base_node/messages.log" @@ -70,7 +71,7 @@ appenders: encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [{X(node-public-key)},{X(node-id)}] {l:5} {m} // {f}:{L}{n}" - # An appender named "base_layer" that writes to a file with a custom pattern encoder + # An appender named "grpc" that writes to a file with a custom pattern encoder grpc: kind: rolling_file path: "{{log_dir}}/log/base_node/grpc.log" @@ -104,6 +105,23 @@ appenders: encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m}{n} // {f}:{L} " + # An appender named "libp2p" that writes to a file with a custom pattern encoder + libp2p: + kind: rolling_file + path: "{{log_dir}}/log/base_node/libp2p.log" + policy: + kind: compound + trigger: + kind: size + limit: 10mb + roller: + kind: fixed_window + base: 1 + count: 5 + pattern: "{{log_dir}}/log/base_node/libp2p.{}.log" + encoder: + pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{t}] [Thread:{I}] {l:5} {m}{n} // {f}:{L} " + # An appender named "fail2ban" that writes to a file with a custom pattern encoder fail2ban: kind: file @@ -132,7 +150,7 @@ loggers: appenders: - grpc - # Route log events sent to the "core" logger to the "base_layer" appender + # base_layer c: level: debug appenders: @@ -141,70 +159,149 @@ loggers: level: debug appenders: - base_layer - - # Route log events sent to the "wallet" logger to the "base_layer" appender wallet: level: debug appenders: - base_layer - # Route log events sent to the "comms" logger to the "network" appender + tari_mm_proxy: + level: info + appenders: + - base_layer + + # network + network: + level: debug + appenders: + - network comms: level: debug appenders: - network - # Route log events sent to the "p2p" logger to the "network" appender p2p: level: debug appenders: - network - # Route log events sent to the "yamux" logger to the "network" appender yamux: level: info appenders: - network - # Route log events sent to the "mio" logger to the "network" appender mio: level: error appenders: - network - # Route log events sent to the "rustyline" logger to the "other" appender + + # other rustyline: level: error appenders: - other additive: false - - # Route log events sent to the "tokio_util" logger to the "other" appender tokio_util: level: error appenders: - other - # Route PGP log events pgp: level: warn appenders: - other - # Route log events sent to the "minotari_mm_proxy" logger to the "base_layer" appender - tari_mm_proxy: - level: info - appenders: - - base_layer - # Route R2D2 log events r2d2: level: warn appenders: - other additive: false + # message_logging comms::middleware::message_logging: - # Set to `trace` to retrieve message logging - level: warn + level: warn # Set to `trace` to retrieve message logging appenders: - message_logging + # fail2ban logs fail2ban: level: info appenders: - fail2ban - additive: false \ No newline at end of file + additive: false + + # libp2p + libp2p_identify: + level: debug + appenders: + - libp2p + additive: false + libp2p_noise: + level: debug + appenders: + - libp2p + additive: false + libp2p_peersync: + level: info + appenders: + - libp2p + additive: false + libp2p_gossipsub: + level: info + appenders: + - libp2p + additive: false + quinn: + level: info + appenders: + - libp2p + additive: false + quinn_proto: + level: info + appenders: + - libp2p + additive: false + libp2p_core: + level: info + appenders: + - libp2p + additive: false + libp2p_mdns: + level: debug + appenders: + - libp2p + additive: false + libp2p_swarm: + level: debug + appenders: + - libp2p + additive: false + hickory_proto: + level: info + appenders: + - libp2p + additive: false + multistream_select: + level: info + appenders: + - libp2p + additive: false + libp2p_tcp: + level: debug + appenders: + - libp2p + additive: false + libp2p_quic: + level: debug + appenders: + - libp2p + additive: false + libp2p_kad: + level: debug + appenders: + - libp2p + additive: false + libp2p_ping: + level: debug + appenders: + - libp2p + additive: false + libp2p_messaging: + level: debug + appenders: + - libp2p + additive: false diff --git a/applications/minotari_node/src/cli.rs b/applications/minotari_node/src/cli.rs index 3105dd4574..85397a0b9b 100644 --- a/applications/minotari_node/src/cli.rs +++ b/applications/minotari_node/src/cli.rs @@ -23,6 +23,7 @@ use clap::Parser; use minotari_app_utilities::common_cli_args::CommonCliArgs; use tari_common::configuration::{ConfigOverrideProvider, Network}; +use tari_network::ReachabilityMode; #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] @@ -51,6 +52,9 @@ pub struct Cli { pub mining_enabled: bool, #[clap(long, env = "MINOTARI_NODE_SECOND_LAYER_GRPC_ENABLED", alias = "enable-second-layer")] pub second_layer_grpc_enabled: bool, + /// Set the node to unreachable mode, the node will not act as a relay and will attempt relayed connections. + #[clap(long)] + pub reachability: Option, } impl ConfigOverrideProvider for Cli { @@ -80,13 +84,20 @@ impl ConfigOverrideProvider for Cli { replace_or_add_override(&mut overrides, "base_node.grpc_enabled", "true"); replace_or_add_override(&mut overrides, "base_node.second_layer_grpc_enabled", "true"); } + if let Some(reachability) = self.reachability { + replace_or_add_override( + &mut overrides, + "base_node.p2p.reachability_mode", + reachability.to_string(), + ); + } overrides } } -fn replace_or_add_override(overrides: &mut Vec<(String, String)>, key: &str, value: &str) { +fn replace_or_add_override>(overrides: &mut Vec<(String, String)>, key: &str, value: T) { if let Some(index) = overrides.iter().position(|(k, _)| k == key) { overrides.remove(index); } - overrides.push((key.to_string(), value.to_string())); + overrides.push((key.to_string(), value.into())); } diff --git a/applications/minotari_node/src/commands/command/add_peer.rs b/applications/minotari_node/src/commands/command/add_peer.rs index 01cc4bb2b7..265d1da46c 100644 --- a/applications/minotari_node/src/commands/command/add_peer.rs +++ b/applications/minotari_node/src/commands/command/add_peer.rs @@ -26,7 +26,14 @@ use anyhow::Error; use async_trait::async_trait; use clap::Parser; use minotari_app_utilities::utilities::UniPublicKey; -use tari_network::{multiaddr::Multiaddr, swarm::dial_opts::DialOpts, NetworkingService, ToPeerId}; +use tari_network::{ + identity, + multiaddr::Multiaddr, + swarm::dial_opts::{DialOpts, PeerCondition}, + NetworkingService, + Peer, + ToPeerId, +}; use super::{CommandContext, HandleCommand}; @@ -48,9 +55,15 @@ impl HandleCommand for CommandContext { return Err(Error::msg("Cannot add self as peer")); } let timer = Instant::now(); + self.network + .add_peer(Peer::new( + identity::PublicKey::from(identity::sr25519::PublicKey::from(public_key)), + vec![args.address], + )) + .await?; let dial = self .network - .dial_peer(DialOpts::peer_id(peer_id).addresses(vec![args.address]).build()) + .dial_peer(DialOpts::peer_id(peer_id).condition(PeerCondition::Always).build()) .await?; println!("Peer with node id '{}' was added to the base node. Dialing...", peer_id); diff --git a/applications/minotari_node/src/commands/command/status.rs b/applications/minotari_node/src/commands/command/status.rs index 1aab0e630a..67fc8c409b 100644 --- a/applications/minotari_node/src/commands/command/status.rs +++ b/applications/minotari_node/src/commands/command/status.rs @@ -131,10 +131,10 @@ impl CommandContext { match self.network.get_autonat_status() { AutonatStatus::ConfiguredPrivate => { - status_line.add(format!("️🔌(conf) {avg_latency}")); + status_line.add(avg_latency.to_string()); }, AutonatStatus::Checking => { - status_line.add(format!("{avg_latency}")); + status_line.add(avg_latency.to_string()); }, AutonatStatus::Private => { status_line.add(format!("️🔌 {avg_latency}")); diff --git a/applications/minotari_node/src/config.rs b/applications/minotari_node/src/config.rs index f30745535a..a3e8a34515 100644 --- a/applications/minotari_node/src/config.rs +++ b/applications/minotari_node/src/config.rs @@ -144,6 +144,15 @@ pub struct BaseNodeConfig { impl Default for BaseNodeConfig { fn default() -> Self { + let p2p = P2pConfig { + enable_relay: true, + enable_mdns: true, + // We should reduce this to something like 5 - however there seems (unconfirmed) to be an issue in some + // behaviours when the limit is hit (continuous connection closing) + max_inbound_connections_per_peer: Some(50), + ..Default::default() + }; + Self { override_from: None, network: Network::default(), @@ -156,7 +165,7 @@ impl Default for BaseNodeConfig { second_layer_grpc_enabled: false, identity_file: PathBuf::from("config/base_node_id.json"), use_libtor: true, - p2p: P2pConfig::default(), + p2p, db_type: DatabaseType::Lmdb, lmdb: Default::default(), data_dir: PathBuf::from("data/base_node"), diff --git a/applications/minotari_node/src/lib.rs b/applications/minotari_node/src/lib.rs index fc6a7201f1..65ec5d39f9 100644 --- a/applications/minotari_node/src/lib.rs +++ b/applications/minotari_node/src/lib.rs @@ -95,6 +95,7 @@ pub async fn run_base_node( grpc_enabled: false, mining_enabled: false, second_layer_grpc_enabled: false, + reachability: None, }; run_base_node_with_cli(node_identity, config, cli, shutdown).await diff --git a/base_layer/contacts/src/contacts_service/handle.rs b/base_layer/contacts/src/contacts_service/handle.rs index bd1afeba21..36b72ea3fb 100644 --- a/base_layer/contacts/src/contacts_service/handle.rs +++ b/base_layer/contacts/src/contacts_service/handle.rs @@ -108,7 +108,7 @@ impl Display for ContactsLivenessData { f, "Liveness event '{}' for contact {} ({}) {}", self.message_type, - self.address, + self.address.to_hex(), self.peer_id, if let Some(time) = self.last_seen { let local_time = DateTime::::from_naive_utc_and_offset(time, Local::now().offset().to_owned()) diff --git a/base_layer/core/src/base_node/service/service.rs b/base_layer/core/src/base_node/service/service.rs index 98ed032b20..a550cbac95 100644 --- a/base_layer/core/src/base_node/service/service.rs +++ b/base_layer/core/src/base_node/service/service.rs @@ -30,9 +30,11 @@ use log::*; use rand::rngs::OsRng; use tari_common_types::types::BlockHash; use tari_network::{ + gossipsub, identity::PeerId, GossipMessage, GossipSubscription, + InboundGossipError, NetworkHandle, NetworkingService, OutboundMessager, @@ -164,8 +166,8 @@ where B: BlockchainBackend + 'static }, // Incoming block messages from the network - Some(Ok(msg)) = block_subscription.next_message() => { - self.spawn_handle_incoming_block(msg); + Some(result) = block_subscription.next_message() => { + self.spawn_handle_incoming_block(result).await; } // Incoming local request messages from the LocalNodeCommsInterface and other local services @@ -304,32 +306,135 @@ where B: BlockchainBackend + 'static }); } - fn spawn_handle_incoming_block(&self, new_block: GossipMessage) { + #[allow(clippy::too_many_lines)] + async fn spawn_handle_incoming_block( + &self, + result: Result, InboundGossipError>, + ) { // Determine if we are bootstrapped let status_watch = self.state_machine_handle.get_status_info_watch(); - if !(status_watch.borrow()).bootstrapped { + if !status_watch.borrow().bootstrapped { debug!( target: LOG_TARGET, - "Propagated block from peer `{}` not processed while busy with initial sync.", - new_block.origin_or_source() + "Propagated block from peer not processed while busy with initial sync.", ); + + let (message_id, propagation_source) = match result { + Ok(msg) => (msg.message_id, msg.propagation_source), + Err(err) => (err.message_id, err.propagation_source), + }; + if let Err(err) = self + .network + .report_gossip_message_validation_result( + message_id, + propagation_source, + // We have not validated this message, so we do not propagate it however we do not want to penalise + // the peer, therefore Ignore is used + gossipsub::MessageAcceptance::Ignore, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {err}"); + } return; } - let inbound_nch = self.inbound_nch.clone(); + + let mut inbound_nch = self.inbound_nch.clone(); let mut network = self.network.clone(); - let source_peer = new_block.origin_or_source(); let short_ban = self.base_node_config.blockchain_sync_config.short_ban_period; let long_ban = self.base_node_config.blockchain_sync_config.ban_period; task::spawn(async move { - let result = handle_incoming_block(inbound_nch, new_block).await; + let new_block = match result { + Ok(msg) => msg, + Err(err) => { + if let Err(err) = network + .report_gossip_message_validation_result( + err.message_id, + err.propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {err}"); + } + return; + }, + }; + let source_peer = new_block.origin_or_source(); + let GossipMessage::<_> { + message_id, + propagation_source, + origin, + message, + message_size, + } = new_block; + + let from = origin.unwrap_or(propagation_source); + + let new_block = match NewBlock::try_from(message).map_err(BaseNodeServiceError::InvalidBlockMessage) { + Ok(msg) => msg, + Err(e) => { + error!(target: LOG_TARGET, "Failed to convert GossipMessage to Block: {e}"); + if let Err(e) = network + .report_gossip_message_validation_result( + message_id, + propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {e}"); + } + return; + }, + }; + debug!( + target: LOG_TARGET, + "New candidate block with hash `{}` received from `{}`. ({} bytes)", + new_block.header.hash().to_hex(), + from, + message_size + ); + + let result = inbound_nch.handle_new_block_message(new_block, from).await; match result { - Ok(()) => {}, - Err(BaseNodeServiceError::CommsInterfaceError(CommsInterfaceError::ChainStorageError( - ChainStorageError::AddBlockOperationLocked, - ))) => { - // Special case, dont log this again as an error + // Special case, dont log this again as an error + Ok(()) | Err(CommsInterfaceError::ChainStorageError(ChainStorageError::AddBlockOperationLocked)) => { + if let Err(e) = network + .report_gossip_message_validation_result( + message_id, + propagation_source, + gossipsub::MessageAcceptance::Accept, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {e}"); + } + }, + Err(CommsInterfaceError::ChainStorageError(e @ ChainStorageError::ValidationError { .. })) => { + if let Some(ban_reason) = e.get_ban_reason() { + let duration = match ban_reason.ban_duration { + BanPeriod::Short => short_ban, + BanPeriod::Long => long_ban, + }; + let _drop = network + .ban_peer(source_peer, ban_reason.reason, Some(duration)) + .await + .map_err(|e| error!(target: LOG_TARGET, "Failed to ban peer: {:?}", e)); + } + error!(target: LOG_TARGET, "Failed to handle incoming block message: {}", e); + if let Err(e) = network + .report_gossip_message_validation_result( + message_id, + propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {e}"); + } }, Err(e) => { if let Some(ban_reason) = e.get_ban_reason() { @@ -342,7 +447,19 @@ where B: BlockchainBackend + 'static .await .map_err(|e| error!(target: LOG_TARGET, "Failed to ban peer: {:?}", e)); } - error!(target: LOG_TARGET, "Failed to handle incoming block message: {}", e) + error!(target: LOG_TARGET, "Failed to handle incoming block message: {}", e); + if let Err(e) = network + .report_gossip_message_validation_result( + message_id, + propagation_source, + // Some other error occurred, we will not propagate, but we'll also not penalise the peer + // for a failure not related to the message contents. + gossipsub::MessageAcceptance::Ignore, + ) + .await + { + warn!(target: LOG_TARGET, "spawn_handle_incoming_block call network: {e}"); + } }, } }); @@ -548,29 +665,3 @@ fn spawn_request_timeout(timeout_sender: mpsc::Sender, request_key: let _ = timeout_sender.send(request_key).await; }); } - -async fn handle_incoming_block( - mut inbound_nch: InboundNodeCommsHandlers, - domain_block_msg: GossipMessage, -) -> Result<(), BaseNodeServiceError> { - let GossipMessage::<_> { - source, - origin, - message, - .. - } = domain_block_msg; - - let from = origin.unwrap_or(source); - - let new_block = NewBlock::try_from(message).map_err(BaseNodeServiceError::InvalidBlockMessage)?; - debug!( - target: LOG_TARGET, - "New candidate block with hash `{}` received from `{}`.", - new_block.header.hash().to_hex(), - from - ); - - inbound_nch.handle_new_block_message(new_block, from).await?; - - Ok(()) -} diff --git a/base_layer/core/src/chain_storage/blockchain_database.rs b/base_layer/core/src/chain_storage/blockchain_database.rs index 2f00871ac3..3ce6fa1870 100644 --- a/base_layer/core/src/chain_storage/blockchain_database.rs +++ b/base_layer/core/src/chain_storage/blockchain_database.rs @@ -286,8 +286,9 @@ where B: BlockchainBackend // lets load the smt into memory let mut smt = blockchain_db.smt_write_access()?; warn!(target: LOG_TARGET, "Loading SMT into memory from stored db"); + let timer = Instant::now(); *smt = blockchain_db.db_write_access()?.calculate_tip_smt()?; - warn!(target: LOG_TARGET, "Finished loading SMT into memory from stored db"); + warn!(target: LOG_TARGET, "Finished loading SMT into memory from stored db in {:.2?}", timer.elapsed()); } if config.cleanup_orphans_at_startup { match blockchain_db.cleanup_all_orphans() { diff --git a/base_layer/core/src/mempool/service/initializer.rs b/base_layer/core/src/mempool/service/initializer.rs index fdc79734f7..a37a19d06c 100644 --- a/base_layer/core/src/mempool/service/initializer.rs +++ b/base_layer/core/src/mempool/service/initializer.rs @@ -92,7 +92,7 @@ impl ServiceInitializer for MempoolServiceInitializer { request_receiver, }; debug!(target: LOG_TARGET, "Mempool service started"); - if let Err(err) = MempoolService::new(inbound_handlers).start(streams).await { + if let Err(err) = MempoolService::new(inbound_handlers, network).start(streams).await { error!(target: LOG_TARGET, "Mempool service error: {}", err); } }); diff --git a/base_layer/core/src/mempool/service/service.rs b/base_layer/core/src/mempool/service/service.rs index af0a470214..a567d975e9 100644 --- a/base_layer/core/src/mempool/service/service.rs +++ b/base_layer/core/src/mempool/service/service.rs @@ -20,11 +20,11 @@ // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE // USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -use std::{convert::TryFrom, io, sync::Arc}; +use std::{convert::TryFrom, sync::Arc}; use futures::{pin_mut, stream::StreamExt, Stream}; use log::*; -use tari_network::{GossipMessage, GossipSubscription}; +use tari_network::{gossipsub, GossipMessage, GossipSubscription, InboundGossipError, NetworkHandle}; use tari_p2p::proto; use tari_service_framework::{reply_channel, reply_channel::RequestContext}; use tari_utilities::hex::Hex; @@ -55,11 +55,15 @@ pub struct MempoolStreams { /// Mempools of remote Base nodes. pub struct MempoolService { inbound_handlers: MempoolInboundHandlers, + network: NetworkHandle, } impl MempoolService { - pub fn new(inbound_handlers: MempoolInboundHandlers) -> Self { - Self { inbound_handlers } + pub fn new(inbound_handlers: MempoolInboundHandlers, network: NetworkHandle) -> Self { + Self { + inbound_handlers, + network, + } } pub async fn start(mut self, streams: MempoolStreams) -> Result<(), MempoolServiceError> @@ -78,9 +82,8 @@ impl MempoolService { let _result = reply.send(self.handle_request(request).await); }, - // Incoming transaction messages from the Comms layer - Some(transaction_msg) = transaction_subscription.next_message() => self.handle_incoming_tx(transaction_msg), + Some(transaction_msg) = transaction_subscription.next_message() => self.handle_incoming_tx(transaction_msg).await, // Incoming local request messages from the LocalMempoolServiceInterface and other local services Some(local_request_context) = local_request_stream.next() => { @@ -138,16 +141,27 @@ impl MempoolService { }); } - fn handle_incoming_tx(&self, result: io::Result>) { + async fn handle_incoming_tx(&self, result: Result, InboundGossipError>) { let msg = match result { Ok(msg) => msg, Err(err) => { warn!(target: LOG_TARGET, "Failed to decode gossip message: {err}"); + if let Err(err) = self + .network + .report_gossip_message_validation_result( + err.message_id, + err.propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "handle_incoming_tx call network: {err}"); + } return; }, }; - let source_peer_id = msg.source; + let source_peer_id = msg.propagation_source; let transaction = match Transaction::try_from(msg.message) { Ok(tx) => tx, Err(e) => { @@ -155,6 +169,17 @@ impl MempoolService { target: LOG_TARGET, "Received transaction message from {} with invalid transaction: {:?}", source_peer_id, e ); + if let Err(err) = self + .network + .report_gossip_message_validation_result( + msg.message_id, + msg.propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "handle_incoming_tx call network: {err}"); + } return; }, }; @@ -173,13 +198,36 @@ impl MempoolService { source_peer_id, ); let mut inbound_handlers = self.inbound_handlers.clone(); - task::spawn(async move { - if let Err(e) = inbound_handlers.handle_transaction(transaction, source_peer_id).await { - error!( - target: LOG_TARGET, - "Failed to handle incoming transaction message: {:?}", e - ); + if let Err(e) = inbound_handlers.handle_transaction(transaction, source_peer_id).await { + error!( + target: LOG_TARGET, + "Failed to handle incoming transaction message: {:?}", e + ); + // NOTE: We assume that all errors are due to a "bad" transaction. + if let Err(err) = self + .network + .report_gossip_message_validation_result( + msg.message_id, + msg.propagation_source, + gossipsub::MessageAcceptance::Reject, + ) + .await + { + warn!(target: LOG_TARGET, "handle_incoming_tx call network: {err}"); } - }); + } else { + // Transaction message OK + if let Err(err) = self + .network + .report_gossip_message_validation_result( + msg.message_id, + msg.propagation_source, + gossipsub::MessageAcceptance::Accept, + ) + .await + { + warn!(target: LOG_TARGET, "handle_incoming_tx call network: {err}"); + } + } } } diff --git a/base_layer/core/src/mempool/sync_protocol/initializer.rs b/base_layer/core/src/mempool/sync_protocol/initializer.rs index a9925c2336..90b601c7c4 100644 --- a/base_layer/core/src/mempool/sync_protocol/initializer.rs +++ b/base_layer/core/src/mempool/sync_protocol/initializer.rs @@ -20,12 +20,10 @@ // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE // USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -use std::time::Duration; - use log::*; use tari_network::NetworkHandle; use tari_service_framework::{async_trait, ServiceInitializationError, ServiceInitializer, ServiceInitializerContext}; -use tokio::{sync::mpsc, time::sleep}; +use tokio::sync::mpsc; use crate::{ base_node::{comms_interface::LocalNodeCommsInterface, StateMachineHandle}, @@ -68,6 +66,8 @@ impl ServiceInitializer for MempoolSyncInitializer { let state_machine = handles.expect_handle::(); let base_node = handles.expect_handle::(); + // Subscribe early to not miss connect events + let network_events = network.subscribe_events(); let mut status_watch = state_machine.get_status_info_watch(); if !status_watch.borrow().state_info.is_synced() { debug!(target: LOG_TARGET, "Waiting for node to do initial sync..."); @@ -84,14 +84,13 @@ impl ServiceInitializer for MempoolSyncInitializer { target: LOG_TARGET, "Mempool sync still on hold, waiting for node to do initial sync", ); - sleep(Duration::from_secs(30)).await; } log_mdc::extend(mdc.clone()); } let base_node_events = base_node.get_block_event_stream(); MempoolSyncProtocol::new(config, notif_rx, mempool, network, base_node_events) - .run() + .run(network_events) .await; }); diff --git a/base_layer/core/src/mempool/sync_protocol/mod.rs b/base_layer/core/src/mempool/sync_protocol/mod.rs index e75b4ab6df..8294ac146e 100644 --- a/base_layer/core/src/mempool/sync_protocol/mod.rs +++ b/base_layer/core/src/mempool/sync_protocol/mod.rs @@ -64,10 +64,11 @@ //! ``` use std::{ + collections::HashSet, convert::TryFrom, iter, sync::{ - atomic::{AtomicUsize, Ordering}, + atomic::{AtomicBool, Ordering}, Arc, }, time::Duration, @@ -83,7 +84,7 @@ use tari_network::{identity::PeerId, NetworkEvent, NetworkHandle, StreamProtocol use tari_p2p::{framing, framing::CanonicalFraming, proto as shared_proto, proto::mempool as proto}; use tari_utilities::{hex::Hex, ByteArray}; use tokio::{ - sync::{mpsc, Semaphore}, + sync::{broadcast, mpsc, Semaphore}, task, time, }; @@ -113,7 +114,8 @@ pub struct MempoolSyncProtocol { config: MempoolServiceConfig, protocol_notifier: mpsc::UnboundedReceiver>, mempool: Mempool, - num_synched: Arc, + peers_attempted: HashSet, + is_done: Arc, permits: Arc, network: NetworkHandle, block_event_stream: BlockEventReceiver, @@ -131,17 +133,17 @@ impl MempoolSyncProtocol { config, protocol_notifier, mempool, - num_synched: Arc::new(AtomicUsize::new(0)), + peers_attempted: HashSet::new(), + is_done: Arc::new(AtomicBool::new(false)), permits: Arc::new(Semaphore::new(1)), network, block_event_stream, } } - pub async fn run(mut self) { + pub async fn run(mut self, mut network_events: broadcast::Receiver) { info!(target: LOG_TARGET, "Mempool protocol handler has started"); - let mut network_events = self.network.subscribe_events(); loop { tokio::select! { Ok(block_event) = self.block_event_stream.recv() => { @@ -159,10 +161,17 @@ impl MempoolSyncProtocol { } async fn handle_network_event(&mut self, event: NetworkEvent) { + #[allow(clippy::single_match)] match event { - // If this node is connecting to a peer - NetworkEvent::PeerConnected { peer_id, direction } if direction.is_outbound() => { - if !self.is_synched() { + NetworkEvent::PeerIdentified { + peer_id, + supported_protocols, + .. + } => { + if !self.is_synched() && + !self.has_attempted_peer(peer_id) && + supported_protocols.iter().any(|p| *p == MEMPOOL_SYNC_PROTOCOL) + { self.spawn_initiator_protocol(peer_id).await; } }, @@ -191,7 +200,7 @@ impl MempoolSyncProtocol { // we want to at least sync initial_sync_num_peers, so we reset the num_synced to 0, so it can run till // initial_sync_num_peers again. This is made to run as a best effort in that it will at least run the // initial_sync_num_peers - self.num_synched.store(0, Ordering::SeqCst); + self.peers_attempted.clear(); let connections = match self .network .select_random_connections(self.config.initial_sync_num_peers, Default::default()) @@ -218,7 +227,11 @@ impl MempoolSyncProtocol { } fn is_synched(&self) -> bool { - self.num_synched.load(Ordering::SeqCst) >= self.config.initial_sync_num_peers + self.is_done.load(Ordering::SeqCst) + } + + fn has_attempted_peer(&self, peer_id: PeerId) -> bool { + self.peers_attempted.contains(&peer_id) } fn handle_protocol_notification(&mut self, notification: ProtocolNotification) { @@ -232,13 +245,15 @@ impl MempoolSyncProtocol { async fn spawn_initiator_protocol(&mut self, peer_id: PeerId) { let mempool = self.mempool.clone(); let permits = self.permits.clone(); - let num_synched = self.num_synched.clone(); + let is_done = self.is_done.clone(); let config = self.config.clone(); let network = self.network.clone(); + let num_synced = self.peers_attempted.len(); + self.peers_attempted.insert(peer_id); task::spawn(async move { // Only initiate this protocol with a single peer at a time let _permit = permits.acquire().await; - if num_synched.load(Ordering::SeqCst) >= config.initial_sync_num_peers { + if is_done.load(Ordering::SeqCst) { return; } match network @@ -246,6 +261,7 @@ impl MempoolSyncProtocol { .await { Ok(framed) => { + let initial_sync_num_peers = config.initial_sync_num_peers; let protocol = MempoolPeerProtocol::new(config, framed, peer_id, mempool); match protocol.start_initiator().await { Ok(_) => { @@ -254,7 +270,9 @@ impl MempoolSyncProtocol { "Mempool initiator protocol completed successfully for peer `{}`", peer_id, ); - num_synched.fetch_add(1, Ordering::SeqCst); + if num_synced >= initial_sync_num_peers { + is_done.store(true, Ordering::SeqCst); + } }, Err(err) => { debug!( @@ -266,7 +284,7 @@ impl MempoolSyncProtocol { }, } }, - Err(err) => error!( + Err(err) => warn!( target: LOG_TARGET, "Unable to establish mempool protocol substream to peer `{}`: {}", peer_id, diff --git a/base_layer/p2p/src/config.rs b/base_layer/p2p/src/config.rs index 1075694966..30e73f099a 100644 --- a/base_layer/p2p/src/config.rs +++ b/base_layer/p2p/src/config.rs @@ -103,6 +103,8 @@ pub struct P2pConfig { deserialize_with = "deserialize_from_str", serialize_with = "serialize_string" )] + /// If set to `Private`, the node assume it has no public address and will try to establish a relay connection as + /// soon as possible. `Auto` will use auto NAT to try determine this automatically. pub reachability_mode: ReachabilityMode, /// The global maximum allowed RPC sessions. /// Default: 100 @@ -112,6 +114,7 @@ pub struct P2pConfig { pub rpc_max_sessions_per_peer: usize, pub enable_mdns: bool, pub enable_relay: bool, + pub max_inbound_connections_per_peer: Option, } impl Default for P2pConfig { @@ -125,6 +128,7 @@ impl Default for P2pConfig { rpc_max_sessions_per_peer: 10, enable_mdns: true, enable_relay: false, + max_inbound_connections_per_peer: None, } } } diff --git a/base_layer/p2p/src/initialization.rs b/base_layer/p2p/src/initialization.rs index 12f987ae7a..dad75fd9d2 100644 --- a/base_layer/p2p/src/initialization.rs +++ b/base_layer/p2p/src/initialization.rs @@ -326,6 +326,7 @@ impl ServiceInitializer for P2pInitializer { user_agent: self.user_agent.clone(), enable_mdns: self.config.enable_mdns, enable_relay: self.config.enable_relay, + max_inbound_connections_per_peer: self.config.max_inbound_connections_per_peer, ..Default::default() }, listener_addrs: self.config.listen_addresses.to_vec(), diff --git a/base_layer/wallet/src/base_node_service/config.rs b/base_layer/wallet/src/base_node_service/config.rs index 282d3b1a25..b8ecd8badd 100644 --- a/base_layer/wallet/src/base_node_service/config.rs +++ b/base_layer/wallet/src/base_node_service/config.rs @@ -31,8 +31,11 @@ pub struct BaseNodeServiceConfig { /// The refresh interval #[serde(with = "serializers::seconds")] pub base_node_monitor_max_refresh_interval: Duration, - /// The RPC client pool size - pub base_node_rpc_pool_size: usize, + /// The maximum client pool size for base node RPC. This should (but not required) be less than or equal to the per + /// peer RPC limit (rpc_max_sessions_per_peer currently defaults 10) on the base node. If it is greater, then this + /// will incur a communication overhead (base node has to first reject the new session, then the client pool + /// will reuse a previous session). + pub max_base_node_rpc_pool_size: usize, /// This is the size of the event channel used to communicate base node events to the wallet pub event_channel_size: usize, } @@ -41,7 +44,7 @@ impl Default for BaseNodeServiceConfig { fn default() -> Self { Self { base_node_monitor_max_refresh_interval: Duration::from_secs(30), - base_node_rpc_pool_size: 10, + max_base_node_rpc_pool_size: 8, event_channel_size: 250, } } diff --git a/base_layer/wallet/src/config.rs b/base_layer/wallet/src/config.rs index 9e7f9d9688..b3f8b58b0e 100644 --- a/base_layer/wallet/src/config.rs +++ b/base_layer/wallet/src/config.rs @@ -33,7 +33,7 @@ use tari_common::{ SubConfigPath, }; use tari_common_types::grpc_authentication::GrpcAuthentication; -use tari_network::multiaddr::Multiaddr; +use tari_network::{multiaddr::Multiaddr, ReachabilityMode}; use tari_p2p::P2pConfig; use tari_utilities::SafePassword; @@ -128,6 +128,8 @@ impl Default for WalletConfig { let p2p = P2pConfig { enable_relay: false, enable_mdns: true, + reachability_mode: ReachabilityMode::Private, + max_inbound_connections_per_peer: Some(3), ..Default::default() }; Self { diff --git a/base_layer/wallet/src/connectivity_service/base_node_peer_manager.rs b/base_layer/wallet/src/connectivity_service/base_node_peer_manager.rs index 5d6c01793e..808a7248e4 100644 --- a/base_layer/wallet/src/connectivity_service/base_node_peer_manager.rs +++ b/base_layer/wallet/src/connectivity_service/base_node_peer_manager.rs @@ -22,7 +22,7 @@ use std::{ fmt::Display, - time::{Duration, Instant}, + sync::{atomic, atomic::AtomicUsize, Arc}, }; use tari_network::{identity::PeerId, Peer}; @@ -30,19 +30,12 @@ use tari_network::{identity::PeerId, Peer}; use crate::connectivity_service::WalletConnectivityError; /// The selected peer is a current base node and an optional list of backup peers. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct BaseNodePeerManager { // The current base node that the wallet is connected to - current_peer_index: usize, + current_peer_index: Arc, // The other base nodes that the wallet can connect to if the selected peer is not available - peer_list: Vec, - last_connection_attempt: Option, -} - -#[derive(Clone, Debug)] -pub struct LastConnectionAttempt { - pub peer_index: usize, - pub attempt_time: Instant, + peer_list: Arc>, } impl BaseNodePeerManager { @@ -60,9 +53,8 @@ impl BaseNodePeerManager { ))); } Ok(Self { - current_peer_index: preferred_peer_index, - peer_list, - last_connection_attempt: None, + current_peer_index: Arc::new(AtomicUsize::new(preferred_peer_index)), + peer_list: Arc::new(peer_list), }) } @@ -71,57 +63,44 @@ impl BaseNodePeerManager { self.get_current_peer().peer_id() } - /// Get the current peer + /// Get the current peer. pub fn get_current_peer(&self) -> &Peer { self.peer_list - .get(self.current_peer_index) + .get(self.current_peer_index()) + // Panic: cannot panic because this instance cannot be constructed with an empty peer_list .unwrap_or(&self.peer_list[0]) } /// Changes to the next peer in the list, returning that peer pub fn select_next_peer(&mut self) -> &Peer { - self.current_peer_index = (self.current_peer_index + 1) % self.peer_list.len(); - &self.peer_list[self.current_peer_index] + self.set_current_peer_index((self.current_peer_index() + 1) % self.peer_list.len()); + &self.peer_list[self.current_peer_index()] + } + + pub fn peer_list(&self) -> &[Peer] { + &self.peer_list } /// Get the base node peer manager state pub fn get_state(&self) -> (usize, &[Peer]) { - (self.current_peer_index, &self.peer_list) + (self.current_peer_index(), &self.peer_list) } - /// Set the last connection attempt stats - pub fn set_last_connection_attempt(&mut self) { - self.last_connection_attempt = Some(LastConnectionAttempt { - peer_index: self.current_peer_index, - attempt_time: Instant::now(), - }) + fn set_current_peer_index(&self, index: usize) { + self.current_peer_index.store(index, atomic::Ordering::SeqCst); } - /// Get the last connection attempt stats - pub fn time_since_last_connection_attempt(&self) -> Option { - if let Some(stats) = self.last_connection_attempt.clone() { - if stats.peer_index == self.current_peer_index { - Some(stats.attempt_time.elapsed()) - } else { - None - } - } else { - None - } + fn current_peer_index(&self) -> usize { + self.current_peer_index.load(atomic::Ordering::SeqCst) } } impl Display for BaseNodePeerManager { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let last_connection_attempt = match self.time_since_last_connection_attempt() { - Some(stats) => format!("{:?}", stats.as_secs()), - None => "Never".to_string(), - }; write!( f, - "BaseNodePeerManager {{ current index: {}, last attempt (s): {}, peer list: {} entries }}", - self.current_peer_index, - last_connection_attempt, + "BaseNodePeerManager {{ current index: {}, peer list: {} entries }}", + self.current_peer_index(), self.peer_list.len() ) } diff --git a/base_layer/wallet/src/connectivity_service/error.rs b/base_layer/wallet/src/connectivity_service/error.rs index 23feee6e02..77d32b301b 100644 --- a/base_layer/wallet/src/connectivity_service/error.rs +++ b/base_layer/wallet/src/connectivity_service/error.rs @@ -22,6 +22,7 @@ use futures::channel::{mpsc, oneshot}; use tari_network::{DialError, NetworkError}; +use tari_rpc_framework::pool::RpcClientPoolError; #[derive(Debug, thiserror::Error)] pub enum WalletConnectivityError { @@ -33,8 +34,12 @@ pub enum WalletConnectivityError { DialError(#[from] DialError), #[error("Service is terminated and can no longer response to requests")] ServiceTerminated, + #[error("Client is_connected returned false after connecting")] + ClientConnectionLost, #[error("Preferred peer index is out of bounds: {0}")] PeerIndexOutOfBounds(String), + #[error("Rpc client pool error: {0}")] + RpcClientPoolError(#[from] RpcClientPoolError), } impl From for WalletConnectivityError { diff --git a/base_layer/wallet/src/connectivity_service/handle.rs b/base_layer/wallet/src/connectivity_service/handle.rs index f1dd5517f5..b28afcee84 100644 --- a/base_layer/wallet/src/connectivity_service/handle.rs +++ b/base_layer/wallet/src/connectivity_service/handle.rs @@ -61,11 +61,6 @@ impl WalletConnectivityHandle { #[async_trait::async_trait] impl WalletConnectivityInterface for WalletConnectivityHandle { fn set_base_node(&mut self, base_node_peer_manager: BaseNodePeerManager) { - if let Some(selected_peer) = self.base_node_watch.borrow().as_ref() { - if selected_peer.get_current_peer_id() == base_node_peer_manager.get_current_peer_id() { - return; - } - } self.base_node_watch.send(Some(base_node_peer_manager)); } diff --git a/base_layer/wallet/src/connectivity_service/service.rs b/base_layer/wallet/src/connectivity_service/service.rs index fe81f74fc5..eb3f10b3dd 100644 --- a/base_layer/wallet/src/connectivity_service/service.rs +++ b/base_layer/wallet/src/connectivity_service/service.rs @@ -20,22 +20,26 @@ // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE // USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -use std::{ - collections::{HashMap, HashSet}, - mem, - time::Duration, -}; +use std::{mem, pin::pin, time::Duration}; +use futures::{future, future::Either}; use log::*; use tari_core::base_node::{rpc::BaseNodeWalletRpcClient, sync::rpc::BaseNodeSyncRpcClient}; -use tari_network::{identity::PeerId, DialError, NetworkHandle, NetworkingService, ToPeerId}; +use tari_network::{ + identity::PeerId, + swarm::dial_opts::{DialOpts, PeerCondition}, + DialError, + NetworkHandle, + NetworkingService, + Peer, +}; use tari_rpc_framework::{ pool::{RpcClientLease, RpcClientPool}, RpcClient, RpcConnector, }; use tokio::{ - sync::{mpsc, oneshot, watch}, + sync::{mpsc, oneshot}, time, time::MissedTickBehavior, }; @@ -47,7 +51,7 @@ use crate::{ }; const LOG_TARGET: &str = "wallet::connectivity"; -pub(crate) const CONNECTIVITY_WAIT: u64 = 5; +pub(crate) const CONNECTIVITY_WAIT: Duration = Duration::from_secs(5); /// Connection status of the Base Node #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -61,18 +65,26 @@ pub struct WalletConnectivityService { config: BaseNodeServiceConfig, request_receiver: mpsc::Receiver, network_handle: NetworkHandle, - base_node_watch_receiver: watch::Receiver>, base_node_watch: Watch>, - pools: HashMap, + current_pool: Option, online_status_watch: Watch, pending_requests: Vec, + last_attempted_peer: Option, } struct ClientPoolContainer { + pub peer_id: PeerId, pub base_node_wallet_rpc_client: RpcClientPool, pub base_node_sync_rpc_client: RpcClientPool, } +impl ClientPoolContainer { + pub async fn close(self) { + self.base_node_wallet_rpc_client.close().await; + self.base_node_sync_rpc_client.close().await; + } +} + impl WalletConnectivityService { pub(super) fn new( config: BaseNodeServiceConfig, @@ -85,11 +97,11 @@ impl WalletConnectivityService { config, request_receiver, network_handle, - base_node_watch_receiver: base_node_watch.get_receiver(), base_node_watch, - pools: HashMap::new(), + current_pool: None, pending_requests: Vec::new(), online_status_watch, + last_attempted_peer: None, } } @@ -105,11 +117,11 @@ impl WalletConnectivityService { // BIASED: select branches are in order of priority biased; - Ok(_) = self.base_node_watch_receiver.changed() => { - if self.base_node_watch_receiver.borrow().is_some() { + _ = self.base_node_watch.changed() => { + if self.base_node_watch.borrow().is_some() { // This will block the rest until the connection is established. This is what we want. trace!(target: LOG_TARGET, "start: base_node_watch_receiver.changed"); - self.check_connection().await; + self.check_connection_and_connect_if_required().await; } }, @@ -119,35 +131,40 @@ impl WalletConnectivityService { _ = check_connection.tick() => { trace!(target: LOG_TARGET, "start: check_connection.tick"); - self.check_connection().await; + self.check_connection_and_connect_if_required().await; } } } } - async fn check_connection(&mut self) { + async fn check_connection_and_connect_if_required(&mut self) { if let Some(peer_manager) = self.get_base_node_peer_manager() { let current_base_node = peer_manager.get_current_peer_id(); trace!(target: LOG_TARGET, "check_connection: has current_base_node"); - if let Ok(Some(_)) = self.network_handle.get_connection(current_base_node).await { - trace!(target: LOG_TARGET, "check_connection: has connection"); - trace!(target: LOG_TARGET, "check_connection: is connected"); - if self.pools.contains_key(¤t_base_node) { - trace!(target: LOG_TARGET, "check_connection: has rpc pool"); - trace!(target: LOG_TARGET, "check_connection: rpc pool is already connected"); - self.set_online_status(OnlineStatus::Online); - return; + if let Ok(Some(conn)) = self.network_handle.get_connection(current_base_node).await { + trace!(target: LOG_TARGET, "check_connection: has connection with ID {}", conn.connection_id); + match self.current_pool.as_ref() { + Some(pool) if pool.peer_id == current_base_node => { + trace!(target: LOG_TARGET, "check_connection: has rpc pool, already connected"); + pool.base_node_sync_rpc_client.clear_unused_leases().await; + pool.base_node_wallet_rpc_client.clear_unused_leases().await; + self.set_online_status(OnlineStatus::Online); + return; + }, + Some(pool) => { + warn!(target: LOG_TARGET, "check_connection: current pool connected to peer {} but the base node peer is {}", pool.peer_id, current_base_node); + }, + None => { + info!(target: LOG_TARGET, "check_connection: current base node has connection but no rpc pool for connection"); + }, } - trace!(target: LOG_TARGET, "check_connection: no rpc pool for connection"); - trace!(target: LOG_TARGET, "check_connection: current base node has connection but not connected"); } trace!( target: LOG_TARGET, "check_connection: current base node has no connection, setup connection to: '{}'", peer_manager ); - self.set_online_status(OnlineStatus::Connecting); - self.setup_base_node_connection().await; + self.setup_base_node_connection(peer_manager).await; } else { self.set_online_status(OnlineStatus::Offline); debug!(target: LOG_TARGET, "Base node peer manager has not been set, cannot connect"); @@ -189,12 +206,12 @@ impl WalletConnectivityService { val } else { self.pending_requests.push(reply.into()); - warn!(target: LOG_TARGET, "{} wallet requests waiting for connection", self.pending_requests.len()); + debug!(target: LOG_TARGET, "{} wallet requests waiting for connection", self.pending_requests.len()); return; }; - match self.pools.get(&node_id) { - Some(pools) => match pools.base_node_wallet_rpc_client.get().await { + match self.current_pool { + Some(ref pools) => match pools.base_node_wallet_rpc_client.get().await { Ok(client) => { debug!(target: LOG_TARGET, "Obtained pool RPC 'wallet' connection to base node '{}'", node_id); let _result = reply.send(client); @@ -234,8 +251,8 @@ impl WalletConnectivityService { return; }; - match self.pools.get(&node_id) { - Some(pools) => match pools.base_node_sync_rpc_client.get().await { + match self.current_pool { + Some(ref pools) => match pools.base_node_sync_rpc_client.get().await { Ok(client) => { debug!(target: LOG_TARGET, "Obtained pool RPC 'sync' connection to base node '{}'", node_id); let _result = reply.send(client); @@ -264,62 +281,40 @@ impl WalletConnectivityService { } fn current_base_node(&self) -> Option { - self.base_node_watch_receiver - .borrow() - .as_ref() - .map(|p| p.get_current_peer_id()) + self.base_node_watch.borrow().as_ref().map(|p| p.get_current_peer_id()) } fn get_base_node_peer_manager(&self) -> Option { - self.base_node_watch_receiver.borrow().as_ref().map(|p| p.clone()) + self.base_node_watch.borrow().as_ref().cloned() } async fn disconnect_base_node(&mut self, peer_id: PeerId) { + trace!(target: LOG_TARGET, "Disconnecting base node '{}'...", peer_id); + if let Some(pool) = self.current_pool.take() { + pool.close().await; + } if let Err(e) = self.network_handle.disconnect_peer(peer_id).await { error!(target: LOG_TARGET, "Failed to disconnect base node: {}", e); } - self.pools.remove(&peer_id); } - async fn setup_base_node_connection(&mut self) { - let Some(mut peer_manager) = self.get_base_node_peer_manager() else { - debug!(target: LOG_TARGET, "No base node peer manager set"); - return; + async fn setup_base_node_connection(&mut self, mut peer_manager: BaseNodePeerManager) { + let mut peer = if self.last_attempted_peer.is_some() { + peer_manager.select_next_peer().clone() + } else { + peer_manager.get_current_peer().clone() }; - loop { - let peer_id = if let Some(_time) = peer_manager.time_since_last_connection_attempt() { - let next_peer_id = peer_manager.select_next_peer().peer_id(); - if peer_manager.get_current_peer().peer_id() == next_peer_id { - // If we only have one peer in the list, wait a bit before retrying - debug!(target: LOG_TARGET, - "Retrying after {}s ...", - Duration::from_secs(CONNECTIVITY_WAIT).as_secs() - ); - time::sleep(Duration::from_secs(CONNECTIVITY_WAIT)).await; - } - next_peer_id - } else { - peer_manager.get_current_peer_id().to_peer_id() - }; - peer_manager.set_last_connection_attempt(); - debug!( - target: LOG_TARGET, - "Attempting to connect to base node peer '{}'... (last attempt {:?})", - peer_id, - peer_manager.time_since_last_connection_attempt() - ); - self.pools.remove(&peer_id); - match self.try_setup_rpc_pool(peer_id).await { + loop { + self.set_online_status(OnlineStatus::Connecting); + match self.try_setup_rpc_pool(&peer).await { Ok(true) => { self.base_node_watch.send(Some(peer_manager.clone())); - if let Err(e) = self.notify_pending_requests().await { - warn!(target: LOG_TARGET, "Error notifying pending RPC requests: {}", e); - } + self.notify_pending_requests().await; self.set_online_status(OnlineStatus::Online); debug!( target: LOG_TARGET, - "Wallet is ONLINE and connected to base node '{}'", peer_id + "Wallet is ONLINE and connected to base node '{}'", peer ); break; }, @@ -328,90 +323,113 @@ impl WalletConnectivityService { target: LOG_TARGET, "The peer has changed while connecting. Attempting to connect to new base node." ); - self.disconnect_base_node(peer_id).await; + self.disconnect_base_node(peer.peer_id()).await; + self.set_online_status(OnlineStatus::Offline); + return; }, Err(WalletConnectivityError::DialError(DialError::Aborted)) => { debug!(target: LOG_TARGET, "Dial was cancelled."); - self.disconnect_base_node(peer_id).await; + self.disconnect_base_node(peer.peer_id()).await; + self.set_online_status(OnlineStatus::Offline); }, Err(e) => { warn!(target: LOG_TARGET, "{}", e); - self.disconnect_base_node(peer_id).await; + self.disconnect_base_node(peer.peer_id()).await; + self.set_online_status(OnlineStatus::Offline); }, } - if self.peer_list_change_detected(&peer_manager) { - debug!( - target: LOG_TARGET, - "The peer list has changed while connecting, aborting connection attempt." - ); - self.set_online_status(OnlineStatus::Offline); - break; - } - } - } - fn peer_list_change_detected(&self, peer_manager: &BaseNodePeerManager) -> bool { - if let Some(current) = self.get_base_node_peer_manager() { - let (_, current_list) = current.get_state(); - let (_, list) = peer_manager.get_state(); - - if current_list.len() != list.len() { - return true; - } - // Check the lists are the same, disregarding ordering - let mut c = current_list.iter().map(|p| p.peer_id()).collect::>(); - for p in list { - if !c.remove(&p.peer_id()) { - return true; - } + // Select the next peer (if available) + let next_peer = peer_manager.select_next_peer().clone(); + // If we only have one peer in the list, wait a bit before retrying + if peer.peer_id() == next_peer.peer_id() { + debug!(target: LOG_TARGET, + "Only single peer in base node peer list. Waiting {}s before retrying again ...", + CONNECTIVITY_WAIT.as_secs() + ); + time::sleep(CONNECTIVITY_WAIT).await; } - !c.is_empty() - } else { - true + peer = next_peer; } } fn set_online_status(&self, status: OnlineStatus) { + if *self.online_status_watch.borrow() == status { + return; + } self.online_status_watch.send(status); } - async fn try_setup_rpc_pool(&mut self, peer_id: PeerId) -> Result { + async fn try_setup_rpc_pool(&mut self, peer: &Peer) -> Result { + self.last_attempted_peer = Some(peer.peer_id()); + let peer_id = peer.peer_id(); + let dial_wait = self + .network_handle + .dial_peer( + DialOpts::peer_id(peer.peer_id()) + .condition(PeerCondition::DisconnectedAndNotDialing) + .addresses(peer.addresses().to_vec()) + .build(), + ) + .await?; + let container = ClientPoolContainer { + peer_id, base_node_sync_rpc_client: self .network_handle .create_rpc_client_pool(1, RpcClient::builder(peer_id)), base_node_wallet_rpc_client: self .network_handle - .create_rpc_client_pool(self.config.base_node_rpc_pool_size, RpcClient::builder(peer_id)), + .create_rpc_client_pool(self.config.max_base_node_rpc_pool_size, RpcClient::builder(peer_id)), }; - match container.base_node_wallet_rpc_client.get().await { - Ok(a) => a, - Err(err) => { - error!(target: LOG_TARGET, "{err}"); - return Ok(false); - }, - }; - debug!( - target: LOG_TARGET, - "Established peer connection to base node '{}'", - peer_id - ); - self.pools.insert(peer_id, container); - - trace!(target: LOG_TARGET, "Created RPC pools for '{}'", peer_id); + + // Create the first RPC session to ensure that we can connect. + { + let mut bn_changed_fut = pin!(self.base_node_watch.changed()); + match future::select(dial_wait, &mut bn_changed_fut).await { + Either::Left((result, _)) => result?, + Either::Right(_) => return Ok(false), + }; + debug!(target: LOG_TARGET, "Dial succeeded for {peer_id}"); + let connect_fut = pin!(container.base_node_wallet_rpc_client.get()); + let client = match future::select(connect_fut, bn_changed_fut).await { + Either::Left((result, _)) => result?, + Either::Right(_) => return Ok(false), + }; + if client.is_connected() { + debug!( + target: LOG_TARGET, + "Established peer connection to base node '{}'", + peer_id + ); + } else { + return Err(WalletConnectivityError::ClientConnectionLost); + } + } + + if let Some(container) = self.current_pool.replace(container) { + container.close().await; + } + + debug!(target: LOG_TARGET, "Created RPC pools for '{}'", peer_id); Ok(true) } - async fn notify_pending_requests(&mut self) -> Result<(), WalletConnectivityError> { + async fn notify_pending_requests(&mut self) { let current_pending = mem::take(&mut self.pending_requests); + let mut count = 0; + let current_pending_len = current_pending.len(); for reply in current_pending { if reply.is_canceled() { continue; } - + count += 1; + trace!(target: LOG_TARGET, "Handle {} of {} pending RPC pool requests", count, current_pending_len); self.handle_pool_request(reply).await; } - Ok(()) + if !self.pending_requests.is_empty() { + warn!(target: LOG_TARGET, "{} of {} pending RPC pool requests not handled", count, current_pending_len); + } } } diff --git a/base_layer/wallet/src/utxo_scanner_service/utxo_scanner_task.rs b/base_layer/wallet/src/utxo_scanner_service/utxo_scanner_task.rs index a677929fcc..80176d12a7 100644 --- a/base_layer/wallet/src/utxo_scanner_service/utxo_scanner_task.rs +++ b/base_layer/wallet/src/utxo_scanner_service/utxo_scanner_task.rs @@ -178,7 +178,7 @@ where self.publish_event(UtxoScannerEvent::ConnectingToBaseNode(peer)); let selected_peer = self.resources.wallet_connectivity.get_current_base_node_peer_node_id(); - let mut client = if selected_peer.map(|p| p == peer).unwrap_or(false) { + let mut client = if selected_peer.map_or(false, |p| p == peer) { // Use the wallet connectivity service so that RPC pools are correctly managed self.resources .wallet_connectivity @@ -186,7 +186,8 @@ where .await .ok_or(UtxoScannerError::ConnectivityShutdown)? } else { - self.establish_new_rpc_connection(&peer).await? + let client = self.establish_new_rpc_connection(&peer).await?; + RpcClientLease::new(client) }; let latency = client.get_last_request_latency(); @@ -277,6 +278,7 @@ where tip_header.height, ) .await?; + if num_scanned == 0 { return Err(UtxoScannerError::UtxoScanningError( "Peer returned 0 UTXOs to scan".to_string(), @@ -297,13 +299,13 @@ where async fn establish_new_rpc_connection( &mut self, peer: &PeerId, - ) -> Result, UtxoScannerError> { + ) -> Result { let client = self .resources .network .connect_rpc_using_builder(BaseNodeWalletRpcClient::builder(*peer).with_deadline(Duration::from_secs(60))) .await?; - Ok(RpcClientLease::new(client)) + Ok(client) } async fn get_chain_tip_header( diff --git a/base_layer/wallet_ffi/src/lib.rs b/base_layer/wallet_ffi/src/lib.rs index 743414e80e..e533d5b4b8 100644 --- a/base_layer/wallet_ffi/src/lib.rs +++ b/base_layer/wallet_ffi/src/lib.rs @@ -5305,6 +5305,7 @@ pub unsafe extern "C" fn comms_config_create( rpc_max_sessions_per_peer: 0, enable_mdns: true, enable_relay: false, + max_inbound_connections_per_peer: Some(1), }; Box::into_raw(Box::new(config)) diff --git a/common/config/presets/c_base_node_c.toml b/common/config/presets/c_base_node_c.toml index 05e3cd2010..1458fdd251 100644 --- a/common/config/presets/c_base_node_c.toml +++ b/common/config/presets/c_base_node_c.toml @@ -126,6 +126,12 @@ blockchain_sync_config.rpc_deadline = 240 # _NOTE_: If using the `tor` transport type, public_addresses will be ignored and an onion address will be # automatically configured #public_addresses = ["/ip4/172.2.3.4/tcp/18189",] +# The multiaddrs to listen on. +# Default: ["/ip4/0.0.0.0/tcp/0", "/ip4/0.0.0.0/udp/0/quic"] +#listen_addresses = ["/ip4/0.0.0.0/tcp/0", "/ip4/0.0.0.0/udp/0/quic"] +# If set to `Private`, the node assume it has no public address and will try to establish a relay connection as +# soon as possible. `Auto` will use auto NAT to try determine this automatically. +#reachability_mode = auto # Optionally bind an additional TCP socket for inbound Tari P2P protocol commms. # Use cases include: @@ -163,4 +169,4 @@ blockchain_sync_config.rpc_deadline = 240 # If true, and the maximum per peer RPC sessions is reached, the RPC server will close an old session and replace it # with a new session. If false, the RPC server will reject the new session and preserve the older session. # (default value = true). -#pub cull_oldest_peer_rpc_connection_on_full = true +#cull_oldest_peer_rpc_connection_on_full = true diff --git a/common/config/presets/d_console_wallet.toml b/common/config/presets/d_console_wallet.toml index c0381ff013..3e573c299b 100644 --- a/common/config/presets/d_console_wallet.toml +++ b/common/config/presets/d_console_wallet.toml @@ -166,6 +166,13 @@ event_channel_size = 3500 # _NOTE_: If using the `tor` transport type, public_address will be ignored and an onion address will be # automatically configured #public_addresses = ["/ip4/172.2.3.4/tcp/18188",] +# The multiaddrs to listen on. +# Default: ["/ip4/0.0.0.0/tcp/0", "/ip4/0.0.0.0/udp/0/quic"] +#listen_addresses = ["/ip4/0.0.0.0/tcp/0", "/ip4/0.0.0.0/udp/0/quic"] +# If set to `Private`, the node will not be used as a public relay +# If set to `Private`, the node assume it has no public address and will try to establish a relay connection as +# soon as possible. `Auto` will use auto NAT to try determine this automatically. +#reachability_mode = private # Optionally bind an additional TCP socket for inbound Tari P2P protocol commms. # Use cases include: @@ -205,7 +212,6 @@ event_channel_size = 3500 #rpc_max_simultaneous_sessions = 100 # The maximum comms RPC sessions allowed per peer (default value = 10). #rpc_max_sessions_per_peer = 10 -#rpc_max_sessions_per_peer = 10 # If true, and the maximum per peer RPC sessions is reached, the RPC server will close an old session and replace it # with a new session. If false, the RPC server will reject the new session and preserve the older session. # (default value = true). diff --git a/integration_tests/log4rs/cucumber.yml b/integration_tests/log4rs/cucumber.yml index 6c99dbceed..186151ac24 100644 --- a/integration_tests/log4rs/cucumber.yml +++ b/integration_tests/log4rs/cucumber.yml @@ -33,6 +33,7 @@ appenders: pattern: "{{log_dir}}/log/network.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" + # An appender named "base_layer_base_node" that writes to a file with a custom pattern encoder base_layer_base_node: kind: rolling_file path: "{{log_dir}}/log/base_node.log" @@ -48,6 +49,7 @@ appenders: pattern: "{{log_dir}}/log/base_node.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" + # An appender named "base_layer_wallet" that writes to a file with a custom pattern encoder base_layer_wallet: kind: rolling_file path: "{{log_dir}}/log/wallet.log" @@ -63,6 +65,7 @@ appenders: pattern: "{{log_dir}}/log/wallet.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" + # An appender named "base_layer_contacts" that writes to a file with a custom pattern encoder base_layer_contacts: kind: rolling_file path: "{{log_dir}}/log/contacts.log" @@ -86,7 +89,7 @@ appenders: kind: compound trigger: kind: size - limit: 10mb + limit: 100mb roller: kind: fixed_window base: 1 @@ -94,7 +97,7 @@ appenders: pattern: "{{log_dir}}/log/other.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" - # An appender named "base_layer" that writes to a file with a custom pattern encoder + # An appender named "miner" that writes to a file with a custom pattern encoder miner: kind: rolling_file path: "{{log_dir}}/log/miner.log" @@ -102,7 +105,7 @@ appenders: kind: compound trigger: kind: size - limit: 10mb + limit: 100mb roller: kind: fixed_window base: 1 @@ -118,14 +121,30 @@ appenders: kind: compound trigger: kind: size - limit: 200mb + limit: 100mb roller: kind: fixed_window base: 1 - count: 50 + count: 5 pattern: "{{log_dir}}/log/proxy.{}.log" encoder: pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" + # An appender named "libp2p" that writes to a file with a custom pattern encoder + libp2p: + kind: rolling_file + path: "{{log_dir}}/log/libp2p.log" + policy: + kind: compound + trigger: + kind: size + limit: 100mb + roller: + kind: fixed_window + base: 1 + count: 5 + pattern: "{{log_dir}}/log/libp2p.{}.log" + encoder: + pattern: "{d(%Y-%m-%d %H:%M:%S.%f)} [{X(grpc)}] {f}.{L} {i} [{t}] {l:5} {m}{n}" # We don't want prints during cucumber test, everything useful will in logs. # root: @@ -142,33 +161,36 @@ loggers: - base_layer_base_node - miner - proxy + - libp2p additive: true stdout: level: info # we have only single print, and it's info appenders: - stdout c: - level: debug + level: debug #trace #debug appenders: - base_layer_base_node tari: level: debug appenders: - base_layer_base_node - # Route log events sent to the "tari::base_node::grpc" logger to the "network" appender tari::base_node::grpc: level: debug appenders: - base_layer_base_node additive: false - chat_ffi: + tari_mm_proxy: level: debug appenders: - - base_layer_contacts + - base_layer_base_node + + # base_layer_wallet wallet: - level: trace + level: debug #trace #debug appenders: - base_layer_wallet + # miner minotari::application: level: debug @@ -185,90 +207,179 @@ loggers: appenders: - miner additive: false + # merge mining proxy minotari_mm_proxy::proxy: level: debug appenders: - proxy additive: false - # Route log events sent to the "comms" logger to the "network" appender + + # network comms: - level: debug + level: debug #trace #debug + appenders: + - network + additive: false + network: + level: debug #trace #debug appenders: - network + additive: false comms::noise: level: error appenders: - network additive: false comms::rpc::client: - level: debug + level: debug #trace #debug appenders: - network additive: false - # Route log events sent to the "tracing" logger to the "network" appender - tracing: + tari_comms: + level: debug #trace #debug + appenders: + - network + additive: false + p2p: + level: debug #trace #debug + appenders: + - network + additive: false + mio: level: error appenders: - network additive: false - # Route log events sent to the "h2" logger to the "network" appender - h2: + yamux: level: error appenders: - network additive: false - # Route log events sent to the "hyper" logger to the "network" appender - hyper: - level: info + tracing: + level: error appenders: - - other + - network + additive: false + h2: + level: error + appenders: + - network additive: false - # Route log events sent to the "contacts" logger to the "network" appender + + # base_layer_contacts contacts: level: debug appenders: - base_layer_contacts - # Route log events sent to the "p2p" logger to the "network" appender - p2p: - level: debug - appenders: - - network - # Route log events sent to the "yamux" logger to the "network" appender - yamux: + chat_ffi: level: debug appenders: - - network - # Route log events sent to the "mio" logger to the "network" appender - mio: - level: error + - base_layer_contacts + + # other + hyper: + level: info appenders: - - network - # Route log events sent to the "rustyline" logger to the "other" appender + - other + additive: false rustyline: level: error appenders: - other additive: false - - # Route log events sent to the "tokio_util" logger to the "other" appender tokio_util: level: error appenders: - other - # Route PGP log events pgp: level: warn appenders: - other - # Route log events sent to the "tari_mm_proxy" logger to the "base_layer" appender - tari_mm_proxy: - level: debug - appenders: - - base_layer_base_node - # Route R2D2 log events r2d2: level: info appenders: - other additive: false + + # libp2p + libp2p_identify: + level: debug + appenders: + - libp2p + additive: false + libp2p_noise: + level: debug + appenders: + - libp2p + additive: false + libp2p_peersync: + level: info + appenders: + - libp2p + additive: false + libp2p_gossipsub: + level: info + appenders: + - libp2p + additive: false + quinn: + level: info + appenders: + - libp2p + additive: false + quinn_proto: + level: info + appenders: + - libp2p + additive: false + libp2p_core: + level: info + appenders: + - libp2p + additive: false + libp2p_mdns: + level: debug + appenders: + - libp2p + additive: false + libp2p_swarm: + level: debug + appenders: + - libp2p + additive: false + hickory_proto: + level: info + appenders: + - libp2p + additive: false + multistream_select: + level: info + appenders: + - libp2p + additive: false + libp2p_tcp: + level: debug + appenders: + - libp2p + additive: false + libp2p_quic: + level: debug + appenders: + - libp2p + additive: false + libp2p_kad: + level: debug + appenders: + - libp2p + additive: false + libp2p_ping: + level: debug + appenders: + - libp2p + additive: false + libp2p_messaging: + level: debug + appenders: + - libp2p + additive: false diff --git a/integration_tests/src/wallet_process.rs b/integration_tests/src/wallet_process.rs index 05f9933912..4a1601b877 100644 --- a/integration_tests/src/wallet_process.rs +++ b/integration_tests/src/wallet_process.rs @@ -31,7 +31,7 @@ use tari_common::{ configuration::{CommonConfig, MultiaddrList}, network_check::set_network_if_choice_valid, }; -use tari_network::multiaddr::Multiaddr; +use tari_network::{multiaddr::Multiaddr, ReachabilityMode}; use tari_p2p::{auto_update::AutoUpdateConfig, Network, PeerSeedsConfig}; use tari_shutdown::Shutdown; use tokio::runtime; @@ -226,6 +226,7 @@ pub fn get_default_cli() -> Cli { profile_with_tokio_console: false, view_private_key: None, spend_key: None, + reachability: Some(ReachabilityMode::Private), } } diff --git a/network/core/src/error.rs b/network/core/src/error.rs index 5667d25e4d..111f20aef6 100644 --- a/network/core/src/error.rs +++ b/network/core/src/error.rs @@ -45,7 +45,7 @@ pub enum NetworkError { GossipPublishError(#[from] gossipsub::PublishError), #[error("Failed to send message to peer: {0}")] SwarmError(#[from] TariSwarmError), - #[error("Failed to invoke handle: {0}")] + #[error("Failed to invoke network handle: {0}")] NetworkingHandleError(#[from] NetworkingHandleError), #[error("Failed to subscribe to topic: {0}")] SubscriptionError(#[from] SubscriptionError), @@ -96,7 +96,10 @@ pub enum DialError { DialPeerConditionFalse(dial_opts::PeerCondition), #[error("Pending connection attempt has been aborted.")] Aborted, - #[error("The peer identity obtained ({obtained}) on the connection did not match the one that was expected.")] + #[error( + "The peer ID obtained ({obtained}) on the connection did not match the one that was expected. This is usually \ + because a different peer is listening on the provided address." + )] WrongPeerId { obtained: PeerId, endpoint: ConnectedPoint }, #[error("One of the [`NetworkBehaviour`]s rejected the outbound connection: {cause}.")] Denied { cause: String }, diff --git a/network/core/src/event.rs b/network/core/src/event.rs index e50f781e48..545138ce42 100644 --- a/network/core/src/event.rs +++ b/network/core/src/event.rs @@ -7,7 +7,7 @@ use tari_swarm::libp2p::{identity, PeerId, StreamProtocol}; #[derive(Debug, Clone)] pub enum NetworkEvent { - IdentifiedPeer { + PeerIdentified { peer_id: PeerId, public_key: identity::PublicKey, agent_version: String, diff --git a/network/core/src/gossip.rs b/network/core/src/gossip.rs index c115d376cb..314175b903 100644 --- a/network/core/src/gossip.rs +++ b/network/core/src/gossip.rs @@ -4,13 +4,18 @@ use std::io; use tari_swarm::{ - libp2p::{gossipsub, gossipsub::IdentTopic}, + libp2p::{ + gossipsub, + gossipsub::{IdentTopic, MessageId}, + }, messaging::{prost::ProstCodec, Codec}, }; use tokio::sync::mpsc; use crate::identity::PeerId; +pub struct RawGossipMessage {} + #[derive(Debug, Clone)] pub struct GossipPublisher { topic: IdentTopic, @@ -45,48 +50,55 @@ impl GossipPublisher { #[derive(Debug)] pub struct GossipSubscription { - receiver: mpsc::UnboundedReceiver<(PeerId, gossipsub::Message)>, + receiver: mpsc::UnboundedReceiver>, codec: ProstCodec, } impl GossipSubscription { - pub(super) fn new(receiver: mpsc::UnboundedReceiver<(PeerId, gossipsub::Message)>) -> Self { + pub(super) fn new(receiver: mpsc::UnboundedReceiver>) -> Self { Self { receiver, codec: ProstCodec::default(), } } - pub async fn next_message(&mut self) -> Option, io::Error>> { - let (source, raw_msg) = self.receiver.recv().await?; + pub async fn next_message(&mut self) -> Option, InboundGossipError>> { + let raw_msg = self.receiver.recv().await?; - match self.codec.decode_from(&mut raw_msg.data.as_slice()).await { + match self.codec.decode_from(&mut raw_msg.message.data.as_slice()).await { Ok((len, msg)) => Some(Ok(GossipMessage { - source, - origin: raw_msg.source, - decoded_len: len, + message_id: raw_msg.message_id, + propagation_source: raw_msg.propagation_source, + origin: raw_msg.origin, + message_size: len, message: msg, })), - Err(err) => Some(Err(err)), + Err(err) => Some(Err(InboundGossipError { + message_id: raw_msg.message_id, + propagation_source: raw_msg.propagation_source, + error: GossipError::DecodeError(err), + })), } } } #[derive(Debug, Clone)] pub struct GossipMessage { + /// Message ID. Use this to report back the validation result. + pub message_id: MessageId, /// The peer ID of the node that sent this message - pub source: PeerId, + pub propagation_source: PeerId, /// The peer ID of the node that originally published this message, if available pub origin: Option, - /// The decoded size of the message excl the length bytes - pub decoded_len: usize, + /// The size of the message in bytes + pub message_size: usize, /// The decoded message payload pub message: T, } impl GossipMessage { pub fn origin_or_source(&self) -> PeerId { - self.origin.unwrap_or(self.source) + self.origin.unwrap_or(self.propagation_source) } } @@ -99,3 +111,11 @@ pub enum GossipError { #[error("Encode error: {0}")] EncodeError(io::Error), } + +#[derive(Debug, thiserror::Error)] +#[error("Inbound gossip error for id={message_id},peer_id={propagation_source}: {error}")] +pub struct InboundGossipError { + pub message_id: MessageId, + pub propagation_source: PeerId, + pub error: GossipError, +} diff --git a/network/core/src/handle.rs b/network/core/src/handle.rs index c2b711c39e..be36d47cd5 100644 --- a/network/core/src/handle.rs +++ b/network/core/src/handle.rs @@ -37,7 +37,14 @@ use tari_rpc_framework::{ Substream, }; use tari_swarm::{ - libp2p::{gossipsub, gossipsub::IdentTopic, swarm::dial_opts::DialOpts, Multiaddr, PeerId, StreamProtocol}, + libp2p::{ + gossipsub, + gossipsub::{IdentTopic, MessageAcceptance, MessageId}, + swarm::dial_opts::DialOpts, + Multiaddr, + PeerId, + StreamProtocol, + }, substream::{NegotiatedSubstream, ProtocolNotification}, }; use tokio::{ @@ -54,6 +61,7 @@ use crate::{ BannedPeer, DialWaiter, DiscoveryResult, + GossipMessage, GossipPublisher, GossipSubscription, NetworkError, @@ -81,7 +89,7 @@ pub enum NetworkingRequest { }, SubscribeTopic { topic: IdentTopic, - inbound: mpsc::UnboundedSender<(PeerId, gossipsub::Message)>, + inbound: mpsc::UnboundedSender>, reply: Reply)>>, }, UnsubscribeTopic { @@ -92,6 +100,11 @@ pub enum NetworkingRequest { topic: IdentTopic, reply: Reply, }, + ReportGossipMessageValidationResult { + message_id: gossipsub::MessageId, + propagation_source: PeerId, + acceptance: MessageAcceptance, + }, OpenSubstream { peer_id: PeerId, protocol_id: StreamProtocol, @@ -213,6 +226,27 @@ impl NetworkHandle { rx.await? } + pub async fn report_gossip_message_validation_result( + &self, + message_id: MessageId, + propagation_source: PeerId, + acceptance: MessageAcceptance, + ) -> Result<(), NetworkError> { + self.tx_request + .send(NetworkingRequest::ReportGossipMessageValidationResult { + message_id, + propagation_source, + acceptance, + }) + .await + .map_err(|_| NetworkingHandleError::ServiceHasShutdown)?; + // NOTE: this does not have a reply because: + // 1. any error is reported in the logs in the network. The caller would likely not be doing anything more than + // this. + // 2. the caller does not have to wait at all when sending this (a full request channel aside) + Ok(()) + } + /// Add a notifier for these protocols. An unbounded sender is used to prevent potential lockups waiting for /// consumers to read the notification. pub async fn add_protocol_notifier>( diff --git a/network/core/src/lib.rs b/network/core/src/lib.rs index a9b2c81f88..89b6439e3b 100644 --- a/network/core/src/lib.rs +++ b/network/core/src/lib.rs @@ -37,6 +37,6 @@ pub use spawn::*; pub use tari_swarm::{ config::{Config as SwarmConfig, LimitPerInterval, RelayCircuitLimits, RelayReservationLimits}, is_supported_multiaddr, - libp2p::{identity, multiaddr, swarm, StreamProtocol}, + libp2p::{gossipsub, identity, multiaddr, swarm, StreamProtocol}, ProtocolVersion, }; diff --git a/network/core/src/peer.rs b/network/core/src/peer.rs index b2df9bd9ed..7fa15e3ba4 100644 --- a/network/core/src/peer.rs +++ b/network/core/src/peer.rs @@ -31,6 +31,15 @@ impl Peer { &self.public_key } + pub fn merge_addresses(&mut self, new: Vec) -> &mut Self { + for addr in new { + if self.addresses.iter().all(|a| *a != addr) { + self.addresses.push(addr); + } + } + self + } + pub fn try_to_ristretto_public_key(&self) -> Result { let pk = self.public_key.clone().try_into_sr25519()?; Ok(pk.inner_key().clone()) diff --git a/network/core/src/relay_state.rs b/network/core/src/relay_state.rs index 93b257ddbb..a8e4c04fb0 100644 --- a/network/core/src/relay_state.rs +++ b/network/core/src/relay_state.rs @@ -45,6 +45,23 @@ impl RelayState { self.selected_relay.as_mut() } + pub fn set_relay_peer(&mut self, peer_id: PeerId, dialled_address: Option) -> bool { + if self.selected_relay.as_ref().map_or(false, |p| p.peer_id == peer_id) { + return true; + } + + if let Some(addrs) = self.possible_relays.get(&peer_id) { + self.selected_relay = Some(RelayPeer { + peer_id, + addresses: addrs.iter().cloned().collect(), + is_circuit_established: false, + remote_address: dialled_address, + }); + return true; + } + false + } + pub fn possible_relays(&self) -> impl Iterator)> { self.possible_relays.iter() } @@ -76,7 +93,7 @@ impl RelayState { peer_id: *peer, addresses: addrs.iter().cloned().collect(), is_circuit_established: false, - dialled_address: None, + remote_address: None, }); } } @@ -86,5 +103,5 @@ pub struct RelayPeer { pub peer_id: PeerId, pub addresses: Vec, pub is_circuit_established: bool, - pub dialled_address: Option, + pub remote_address: Option, } diff --git a/network/core/src/spawn.rs b/network/core/src/spawn.rs index cc965b3d81..3a627995a0 100644 --- a/network/core/src/spawn.rs +++ b/network/core/src/spawn.rs @@ -74,7 +74,10 @@ where let swarm = tari_swarm::create_swarm::>(identity.clone(), HashSet::new(), config.swarm.clone())?; let local_peer_id = *swarm.local_peer_id(); - let (tx_requests, rx_requests) = mpsc::channel(1); + // Why a large MPSC channel size greater than 1? There are some requests that do not have a reply oneshot (e.g. + // report_gossip_message_validation_result). In these cases we do not want the caller to have to wait for the + // network to get to this request before continuing. + let (tx_requests, rx_requests) = mpsc::channel(1000); let (tx_msg_requests, rx_msg_requests) = mpsc::channel(1000); let (tx_events, _) = broadcast::channel(100); let (autonat_status_sender, autonat_status_receiver) = watch::channel(AutonatStatus::Checking); diff --git a/network/core/src/worker.rs b/network/core/src/worker.rs index 3744ec8fbc..dea50addfe 100644 --- a/network/core/src/worker.rs +++ b/network/core/src/worker.rs @@ -65,6 +65,7 @@ use crate::{ ConnectionDirection, DiscoveredPeer, DiscoveryResult, + GossipMessage, MessageSpec, MessagingMode, NetworkError, @@ -92,15 +93,17 @@ where pending_kad_queries: HashMap>, substream_notifiers: Notifiers, swarm: TariSwarm>, - // TODO: we'll replace this with a proper libp2p behaviour if needed + // TODO: we'll replace this with a proper libp2p behaviour if needed. There is a new libp2p behaviour called + // allow_block_list which does this however does not support time-based bans ban_list: HashMap, allow_list: HashSet, - gossipsub_subscriptions: HashMap>, + gossipsub_subscriptions: HashMap>>, gossipsub_outbound_tx: mpsc::Sender<(IdentTopic, Vec)>, gossipsub_outbound_rx: Option)>>, config: crate::Config, relays: RelayState, seed_peers: Vec, + added_peers: HashMap, autonat_status_sender: watch::Sender, is_initial_bootstrap_complete: bool, shutdown_signal: ShutdownSignal, @@ -138,6 +141,7 @@ where pending_kad_queries: HashMap::new(), relays: RelayState::new(known_relay_nodes), seed_peers, + added_peers: HashMap::new(), swarm, ban_list: HashMap::new(), allow_list: HashSet::new(), @@ -240,7 +244,16 @@ where let (tx_waiter, rx_waiter) = oneshot::channel(); let maybe_peer_id = dial_opts.get_peer_id(); - info!(target: LOG_TARGET, "🤝 Dialing peer {:?}", dial_opts); + info!(target: LOG_TARGET, "☎️ Dialing peer {:?}", dial_opts); + // Kad can remove addresses if we fail to dial a peer (e.g. if they are temporarily offline) + // So we readd peers we've explicitly added + if let Some(peer_id) = maybe_peer_id { + if let Some(addresses) = self.added_peers.get(&peer_id).map(|p| p.addresses()) { + for address in addresses { + self.swarm.add_peer_address(peer_id, address.clone()); + } + } + } match self.swarm.dial(dial_opts) { Ok(_) => { @@ -249,6 +262,18 @@ where } let _ignore = reply.send(Ok(rx_waiter.into())); }, + Err(err @ DialError::DialPeerConditionFalse(_)) => { + debug!(target :LOG_TARGET, "{err}"); + if let Some(peer_id) = maybe_peer_id { + if self.active_connections.contains_key(&peer_id) { + let _ignore = tx_waiter.send(Ok(())); + } else { + // We can add to pending because an event will occur + self.pending_dial_requests.entry(peer_id).or_default().push(tx_waiter); + } + } + let _ignore = reply.send(Ok(rx_waiter.into())); + }, Err(err) => { info!(target: LOG_TARGET, "🚨 Failed to dial peer: {}", err); let _ignore = reply.send(Err(err.into())); @@ -297,6 +322,19 @@ where let found = self.swarm.behaviour_mut().gossipsub.topics().any(|t| *t == hash); let _ignore = reply.send(Ok(found)); }, + NetworkingRequest::ReportGossipMessageValidationResult { + message_id, + propagation_source, + acceptance, + } => { + if let Err(err) = self.swarm.behaviour_mut().gossipsub.report_message_validation_result( + &message_id, + &propagation_source, + acceptance, + ) { + warn!(target: LOG_TARGET, "⚠️ Failed to report message validation result for message {message_id}: {err}") + } + }, NetworkingRequest::OpenSubstream { peer_id, protocol_id, @@ -379,13 +417,21 @@ where let num_addresses = peer.addresses().len(); let peer_id = peer.peer_id(); let mut failed = 0usize; - for address in peer.addresses { - let update = self.swarm.behaviour_mut().kad.add_address(&peer_id, address); + for address in &peer.addresses { + let update = self.swarm.behaviour_mut().kad.add_address(&peer_id, address.clone()); if matches!(update, RoutingUpdate::Failed) { failed += 1; } } + match self.added_peers.entry(peer.peer_id) { + Entry::Occupied(mut p_mut) => { + p_mut.get_mut().merge_addresses(peer.addresses); + }, + Entry::Vacant(entry) => { + entry.insert(peer); + }, + } if failed == 0 { let _ignore = reply.send(Ok(())); } else { @@ -418,7 +464,6 @@ where return; } - // TODO: mark the peer as banned and prevent connections,messages from coming through self.ban_list.insert(peer_id, BannedPeer { peer_id, banned_at: Instant::now(), @@ -557,7 +602,7 @@ where fn gossipsub_subscribe_topic( &mut self, topic: IdentTopic, - inbound: mpsc::UnboundedSender<(PeerId, gossipsub::Message)>, + inbound: mpsc::UnboundedSender>, ) -> Result<(), NetworkError> { if !self.swarm.behaviour_mut().gossipsub.subscribe(&topic)? { warn!(target: LOG_TARGET, "Already subscribed to {topic}"); @@ -645,9 +690,10 @@ where peer_id, endpoint, cause, + connection_id, .. } => { - info!(target: LOG_TARGET, "🔌 Connection closed: peer_id={}, endpoint={:?}, cause={:?}", peer_id, endpoint, cause); + info!(target: LOG_TARGET, "🔌 Connection closed: id={}, peer_id={}, endpoint={:?}, cause={:?}", connection_id, peer_id, endpoint, cause); match self.active_connections.entry(peer_id) { Entry::Occupied(mut entry) => { entry.get_mut().retain(|c| c.endpoint != endpoint); @@ -867,12 +913,19 @@ where }; debug!(target: LOG_TARGET, "📣 RX Gossipsub: {message_id} from {propagation_source} (size: {})", message.data.len()); + let message = GossipMessage { + message_id, + propagation_source, + origin: message.source, + message_size: message.data.len(), + message, + }; - if let Err(mpsc::error::SendError((_, message))) = sink.send((propagation_source, message)) { - warn!(target: LOG_TARGET, "📣 Gossipsub sink dropped for topic {}. Removing subscription channel. The node is still subscribed (use NetworkHandle::unsubscribe_topic).", message.topic); + if let Err(mpsc::error::SendError(message)) = sink.send(message) { + warn!(target: LOG_TARGET, "📣 Gossipsub sink dropped for topic {}. Removing subscription channel. The node is still subscribed (use NetworkHandle::unsubscribe_topic).", message.message.topic); // We could unsubscribe in this case, but this probably isn't very useful and this is probably a result of a // downstream bug. - let _drop = self.gossipsub_subscriptions.remove(&message.topic); + let _drop = self.gossipsub_subscriptions.remove(&message.message.topic); } Ok(()) } @@ -1012,7 +1065,7 @@ where if let Some(relay) = self.relays.selected_relay_mut() { if endpoint.is_dialer() && relay.peer_id == peer_id { - relay.dialled_address = Some(endpoint.get_remote_address().clone()); + relay.remote_address = Some(endpoint.get_remote_address().clone()); } } @@ -1032,13 +1085,10 @@ where supported_protocols: vec![], }); - let Some(waiters) = self.pending_dial_requests.remove(&peer_id) else { - debug!(target: LOG_TARGET, "No pending dial requests initiated by this service for peer {}", peer_id); - return Ok(()); - }; - - for waiter in waiters { - let _ignore = waiter.send(Ok(())); + if let Some(waiters) = self.pending_dial_requests.remove(&peer_id) { + for waiter in waiters { + let _ignore = waiter.send(Ok(())); + } } self.publish_event(NetworkEvent::PeerConnected { @@ -1104,6 +1154,9 @@ where // Otherwise, if the peer advertises as a relay we'll add them info!(target: LOG_TARGET, "📡 Adding peer {peer_id} {address} as a relay"); self.relays.add_possible_relay(peer_id, address.clone()); + if !self.relays.has_active_relay() { + self.relays.set_relay_peer(peer_id, Some(address.clone())); + } } else { // Nothing to do } @@ -1124,7 +1177,7 @@ where self.establish_relay_circuit_on_connect(&peer_id); } - self.publish_event(NetworkEvent::IdentifiedPeer { + self.publish_event(NetworkEvent::PeerIdentified { peer_id, public_key, agent_version, @@ -1154,27 +1207,31 @@ where /// Establishes a relay circuit for the given peer if it is the selected relay peer. Returns true if the circuit /// was established from this call. - fn establish_relay_circuit_on_connect(&mut self, peer_id: &PeerId) -> bool { + fn establish_relay_circuit_on_connect(&mut self, connected_peer_id: &PeerId) -> bool { let Some(relay) = self.relays.selected_relay() else { + debug!(target: LOG_TARGET, "📡 No relay selected."); return false; }; - // If the peer we've connected with is the selected relay that we previously dialled, then continue - if relay.peer_id != *peer_id { + // If the peer we've connected with is the selected relay that we previously connected with, then continue + if relay.peer_id != *connected_peer_id { + debug!(target: LOG_TARGET, "📡 Dialed peer is not set as a relay."); return false; } // If we've already established a circuit with the relay, there's nothing to do here if relay.is_circuit_established { + debug!(target: LOG_TARGET, "📡 Already have a circuit established with this relay peer."); return false; } // Check if we've got a confirmed address for the relay - let Some(dialled_address) = relay.dialled_address.as_ref() else { + let Some(remote_address) = relay.remote_address.as_ref() else { + debug!(target: LOG_TARGET, "📡 No remote addresses for relay peer."); return false; }; - let circuit_addr = dialled_address.clone().with(Protocol::P2pCircuit); + let circuit_addr = remote_address.clone().with(Protocol::P2pCircuit); match self.swarm.listen_on(circuit_addr.clone()) { Ok(id) => { @@ -1182,7 +1239,7 @@ where .behaviour_mut() .peer_sync .add_known_local_public_addresses(vec![circuit_addr]); - info!(target: LOG_TARGET, "🌍️ Peer {peer_id} is a relay. Listening (id={id:?}) for circuit connections"); + info!(target: LOG_TARGET, "🌍️ Peer {connected_peer_id} is a relay. Listening (id={id:?}) for circuit connections"); let Some(relay_mut) = self.relays.selected_relay_mut() else { // unreachable return false; diff --git a/network/libp2p-messaging/src/behaviour.rs b/network/libp2p-messaging/src/behaviour.rs index a2266ea2cf..55f304351d 100644 --- a/network/libp2p-messaging/src/behaviour.rs +++ b/network/libp2p-messaging/src/behaviour.rs @@ -8,7 +8,7 @@ use std::{ }; use libp2p::{ - core::{transport::PortUse, Endpoint}, + core::{transport::PortUse, ConnectedPoint, Endpoint}, swarm::{ dial_opts::DialOpts, AddressChange, @@ -90,13 +90,13 @@ where TCodec: Codec + Send + Clone + 'static Some(connections) => { // Return a currently active stream if let Some(sink) = connections.next_active_sink() { - tracing::debug!("return a currently active stream {}", sink.stream_id()); + tracing::debug!("return a currently active stream {} (peer={peer_id})", sink.stream_id()); return sink.clone(); } // Otherwise, return a pending stream if let Some(sink) = connections.next_pending_sink() { - tracing::debug!("return a pending stream {}", sink.stream_id()); + tracing::debug!("return a pending stream {} (peer={peer_id})", sink.stream_id()); return sink.clone(); } @@ -170,17 +170,18 @@ where TCodec: Codec + Send + Clone + 'static .. }: ConnectionClosed, ) { - let connections = self - .connected - .get_mut(&peer_id) - .expect("Expected some established connection to peer before closing."); + let Some(connections) = self.connected.get_mut(&peer_id) else { + return; + }; - let connection = connections + let Some(connection) = connections .connections .iter() .position(|c| c.id == connection_id) .map(|p: usize| connections.connections.remove(p)) - .expect("Expected connection to be established before closing."); + else { + return; + }; debug_assert_eq!(connections.is_empty(), remaining_established == 0); if connections.is_empty() { @@ -204,10 +205,15 @@ where TCodec: Codec + Send + Clone + 'static new, .. } = address_change; + let remote_address = match new { + ConnectedPoint::Dialer { address, .. } => Some(address), + ConnectedPoint::Listener { .. } => None, + }; + if let Some(connections) = self.connected.get_mut(&peer_id) { for connection in &mut connections.connections { if connection.id == connection_id { - connection.remote_address = Some(new.get_remote_address().clone()); + connection.remote_address = remote_address.cloned(); return; } } @@ -337,9 +343,10 @@ where TCodec: Codec + Send + Clone + 'static fn poll(&mut self, _cx: &mut Context<'_>) -> Poll>> { if let Some(event) = self.pending_events.pop_front() { return Poll::Ready(event); - } - if self.pending_events.capacity() > EMPTY_QUEUE_SHRINK_THRESHOLD { + } else if self.pending_events.capacity() > EMPTY_QUEUE_SHRINK_THRESHOLD { self.pending_events.shrink_to_fit(); + } else { + // nothing } Poll::Pending diff --git a/network/libp2p-messaging/src/handler.rs b/network/libp2p-messaging/src/handler.rs index 39ae455c3b..5491e58986 100644 --- a/network/libp2p-messaging/src/handler.rs +++ b/network/libp2p-messaging/src/handler.rs @@ -202,7 +202,10 @@ where TCodec: Codec + Send + Clone + 'static .boxed(); if self.tasks.try_push(fut).is_err() { - tracing::warn!("Dropping inbound stream because we are at capacity") + tracing::warn!( + "Dropping inbound stream because we are at capacity for peer {}", + self.peer_id + ) } } } diff --git a/network/libp2p-substream/src/behaviour.rs b/network/libp2p-substream/src/behaviour.rs index 0f1268dfa5..f86297c73e 100644 --- a/network/libp2p-substream/src/behaviour.rs +++ b/network/libp2p-substream/src/behaviour.rs @@ -7,9 +7,9 @@ use std::{ }; use libp2p::{ - core::{transport::PortUse, Endpoint}, + core::{transport::PortUse, ConnectedPoint, Endpoint}, swarm::{ - dial_opts::DialOpts, + dial_opts::{DialOpts, PeerCondition}, AddressChange, ConnectionClosed, ConnectionDenied, @@ -89,14 +89,19 @@ impl Behaviour { pub fn open_substream(&mut self, peer_id: PeerId, protocol: StreamProtocol) -> StreamId { let stream_id = self.next_outbound_stream_id(); + tracing::debug!("open substream request to {peer_id}: stream={stream_id} {protocol}"); let request = OpenStreamRequest::new(stream_id, peer_id, protocol); - match self.get_connections(&peer_id) { Some(connections) => { let ix = (stream_id as usize) % connections.len(); let conn = &mut connections[ix]; - conn.pending_streams.insert(stream_id); + conn.pending_outbound_streams.insert(stream_id); let conn_id = conn.id; + tracing::debug!( + "open substream: using conn={conn_id} out of {} connnection(s) to {peer_id}: stream={stream_id} {}", + connections.len(), + request.protocol() + ); self.pending_events.push_back(ToSwarm::NotifyHandler { peer_id, handler: NotifyHandler::One(conn_id), @@ -104,8 +109,13 @@ impl Behaviour { }); }, None => { + tracing::debug!( + "open substream: no connection to peer {peer_id}: protocol: {}, stream: {}", + request.protocol(), + request.stream_id() + ); self.pending_events.push_back(ToSwarm::Dial { - opts: DialOpts::peer_id(peer_id).build(), + opts: DialOpts::peer_id(peer_id).condition(PeerCondition::Always).build(), }); self.pending_outbound_streams.entry(peer_id).or_default().push(request); }, @@ -131,22 +141,22 @@ impl Behaviour { let connections = self .connected .get_mut(&peer_id) - .expect("Expected some established connection to peer before closing."); + .expect("Expected connection to be established before closing."); let connection = connections .iter() .position(|c| c.id == connection_id) - .map(|p: usize| connections.remove(p)) - .expect("Expected connection to be established before closing."); + .map(|p| connections.remove(p)) + .expect("Expected some established connection to peer before closing."); debug_assert_eq!(connections.is_empty(), remaining_established == 0); if connections.is_empty() { self.connected.remove(&peer_id); } - for stream_id in connection.pending_streams { + for stream_id in connection.pending_outbound_streams { self.pending_events - .push_back(ToSwarm::GenerateEvent(Event::InboundFailure { + .push_back(ToSwarm::GenerateEvent(Event::OutboundFailure { peer_id, stream_id, error: Error::ConnectionClosed, @@ -161,10 +171,15 @@ impl Behaviour { new, .. } = address_change; + let remote_address = match new { + ConnectedPoint::Dialer { address, .. } => Some(address), + ConnectedPoint::Listener { .. } => None, + }; + if let Some(connections) = self.connected.get_mut(&peer_id) { for connection in connections { if connection.id == connection_id { - connection.remote_address = Some(new.get_remote_address().clone()); + connection.remote_address = remote_address.cloned(); return; } } @@ -172,24 +187,20 @@ impl Behaviour { } fn on_dial_failure(&mut self, DialFailure { peer_id, error, .. }: DialFailure) { - if matches!(error, DialError::DialPeerConditionFalse(_)) { - return; - } - if let Some(peer) = peer_id { // If there are pending outgoing stream requests when a dial failure occurs, // it is implied that we are not connected to the peer, since pending // outgoing stream requests are drained when a connection is established and // only created when a peer is not connected when a request is made. - // Therefore these requests must be considered failed, even if there is + // Therefor these requests must be considered failed, even if there is // another, concurrent dialing attempt ongoing. if let Some(pending) = self.pending_outbound_streams.remove(&peer) { let no_addresses = matches!(&error, DialError::NoAddresses); + self.pending_events.reserve(pending.len()); for request in pending { self.pending_events .push_back(ToSwarm::GenerateEvent(Event::OutboundFailure { peer_id: peer, - protocol: request.protocol().clone(), stream_id: request.stream_id(), error: if no_addresses { Error::NoAddressesForPeer @@ -215,7 +226,7 @@ impl Behaviour { if let Some(pending_streams) = self.pending_outbound_streams.remove(&peer_id) { for stream in pending_streams { - connection.pending_streams.insert(stream.stream_id()); + connection.pending_outbound_streams.insert(stream.stream_id()); handler.on_behaviour_event(stream.into()); } } @@ -278,7 +289,7 @@ impl NetworkBehaviour for Behaviour { Event::SubstreamOpen { peer_id, stream_id, .. } => { if let Some(connections) = self.connected.get_mut(peer_id) { for connection in connections { - connection.pending_streams.remove(stream_id); + connection.pending_outbound_streams.remove(stream_id); } } }, @@ -294,9 +305,10 @@ impl NetworkBehaviour for Behaviour { fn poll(&mut self, _cx: &mut Context<'_>) -> Poll>> { if let Some(event) = self.pending_events.pop_front() { return Poll::Ready(event); - } - if self.pending_events.capacity() > EMPTY_QUEUE_SHRINK_THRESHOLD { + } else if self.pending_events.capacity() > EMPTY_QUEUE_SHRINK_THRESHOLD { self.pending_events.shrink_to_fit(); + } else { + // nothing } Poll::Pending @@ -308,7 +320,7 @@ impl NetworkBehaviour for Behaviour { struct Connection { id: ConnectionId, remote_address: Option, - pending_streams: HashSet, + pending_outbound_streams: HashSet, } impl Connection { @@ -316,7 +328,7 @@ impl Connection { Self { id, remote_address, - pending_streams: HashSet::new(), + pending_outbound_streams: HashSet::new(), } } } diff --git a/network/libp2p-substream/src/error.rs b/network/libp2p-substream/src/error.rs index 625d458d4d..e9f743365e 100644 --- a/network/libp2p-substream/src/error.rs +++ b/network/libp2p-substream/src/error.rs @@ -12,6 +12,8 @@ pub enum Error { ProtocolNotSupported, ProtocolNegotiationTimeout, ChannelClosed, + StreamUpgradeError, + Io(String), } impl Display for Error { @@ -24,6 +26,8 @@ impl Display for Error { Self::ProtocolNotSupported => write!(f, "Protocol not supported"), Self::ProtocolNegotiationTimeout => write!(f, "Protocol negotiation timeout"), Self::ChannelClosed => write!(f, "Channel closed"), + Self::StreamUpgradeError => write!(f, "Stream upgrade error"), + Self::Io(err) => write!(f, "IO error: {err}"), } } } diff --git a/network/libp2p-substream/src/event.rs b/network/libp2p-substream/src/event.rs index 4e7b999372..d558c75cbd 100644 --- a/network/libp2p-substream/src/event.rs +++ b/network/libp2p-substream/src/event.rs @@ -23,7 +23,6 @@ pub enum Event { }, OutboundFailure { peer_id: PeerId, - protocol: StreamProtocol, stream_id: StreamId, error: Error, }, diff --git a/network/libp2p-substream/src/handler.rs b/network/libp2p-substream/src/handler.rs index 3f6fb33f60..1c0c4fe9dc 100644 --- a/network/libp2p-substream/src/handler.rs +++ b/network/libp2p-substream/src/handler.rs @@ -34,7 +34,6 @@ use smallvec::SmallVec; use crate::{ error::Error, event::Event, - stream::OpenStreamRequest, FromBehaviourEvent, ProtocolEvent, ProtocolNotification, @@ -45,7 +44,6 @@ use crate::{ pub struct Handler { peer_id: PeerId, protocols: Protocols, - requested_streams: VecDeque, pending_behaviour_events: VecDeque, pending_events: VecDeque, } @@ -55,7 +53,6 @@ impl Handler { Self { peer_id, protocols: Protocols::new(protocols), - requested_streams: VecDeque::new(), pending_behaviour_events: VecDeque::new(), pending_events: VecDeque::new(), } @@ -68,17 +65,11 @@ impl Handler { } fn on_dial_upgrade_error(&mut self, error: DialUpgradeError>) { - let stream = self - .requested_streams - .pop_front() - .expect("negotiated a stream without a pending request"); - match error.error { StreamUpgradeError::Timeout => { self.pending_events.push_back(Event::OutboundFailure { peer_id: self.peer_id, - protocol: stream.protocol().clone(), - stream_id: stream.stream_id(), + stream_id: error.info, error: Error::ProtocolNegotiationTimeout, }); }, @@ -90,18 +81,25 @@ impl Handler { // the remote peer does not support the requested protocol(s). self.pending_events.push_back(Event::OutboundFailure { peer_id: self.peer_id, - protocol: stream.protocol().clone(), - stream_id: stream.stream_id(), + stream_id: error.info, error: Error::ProtocolNotSupported, }); }, - StreamUpgradeError::Apply(_infallible) => {}, + StreamUpgradeError::Apply(_) => { + self.pending_events.push_back(Event::OutboundFailure { + peer_id: self.peer_id, + stream_id: error.info, + error: Error::StreamUpgradeError, + }); + }, StreamUpgradeError::Io(e) => { - tracing::debug!( - "outbound stream for request {} failed: {e}, retrying", - stream.stream_id() - ); - self.requested_streams.push_back(stream); + tracing::debug!("outbound stream for request {} failed: {e}, retrying", error.info,); + + self.pending_events.push_back(Event::OutboundFailure { + peer_id: self.peer_id, + stream_id: error.info, + error: Error::Io(e.to_string()), + }); }, } } @@ -112,8 +110,6 @@ impl Handler { ) { let (stream, protocol) = outbound.protocol; let stream_id = outbound.info; - // Requested stream succeeded, remove it from the pending list. - let _request = self.requested_streams.pop_front(); self.pending_events.push_back(Event::SubstreamOpen { peer_id: self.peer_id, @@ -163,10 +159,9 @@ impl ConnectionHandler for Handler { // Emit outbound streams. if let Some(event) = self.pending_behaviour_events.pop_front() { match event { - FromBehaviourEvent::OpenRpcSessionRequest(stream) => { + FromBehaviourEvent::OpenSubstreamRequest(stream) => { let protocol = stream.protocol().clone(); let stream_id = stream.stream_id(); - self.requested_streams.push_back(stream); return Poll::Ready(ConnectionHandlerEvent::OutboundSubstreamRequest { protocol: SubstreamProtocol::new(ChosenProtocol { protocol }, stream_id), diff --git a/network/libp2p-substream/src/stream.rs b/network/libp2p-substream/src/stream.rs index 01975dccff..1a60c54bd1 100644 --- a/network/libp2p-substream/src/stream.rs +++ b/network/libp2p-substream/src/stream.rs @@ -9,13 +9,13 @@ pub type StreamId = u32; #[derive(Debug)] pub enum FromBehaviourEvent { - OpenRpcSessionRequest(OpenStreamRequest), + OpenSubstreamRequest(OpenStreamRequest), AddSupportedProtocol(StreamProtocol), } impl From for FromBehaviourEvent { fn from(event: OpenStreamRequest) -> Self { - Self::OpenRpcSessionRequest(event) + Self::OpenSubstreamRequest(event) } } diff --git a/network/rpc_framework/proto/rpc.proto b/network/rpc_framework/proto/rpc.proto index 89296174ba..cfd03c5341 100644 --- a/network/rpc_framework/proto/rpc.proto +++ b/network/rpc_framework/proto/rpc.proto @@ -7,53 +7,54 @@ package tari.comms.rpc; // Message type for all RPC requests message RpcRequest { - // An identifier that is unique per request per session. This value is not strictly needed as - // requests and responses alternate on the protocol level without the need for storing and matching - // request IDs. However, this value is useful for logging. - uint32 request_id = 1; - // The method identifier. The matching method for a given value is defined by each service. - uint32 method = 2; - // Message flags. Currently this is not used for requests. - uint32 flags = 3; - // The length of time in seconds that a client is willing to wait for a response - uint64 deadline = 4; - - // The message payload - bytes payload = 10; + // An identifier that is unique per request per session. This value is not strictly needed as + // requests and responses alternate on the protocol level without the need for storing and matching + // request IDs. However, this value is useful for logging. + uint32 request_id = 1; + // The method identifier. The matching method for a given value is defined by each service. + uint32 method = 2; + // Message flags. Currently this is not used for requests. + uint32 flags = 3; + // The length of time in seconds that a client is willing to wait for a response + uint64 deadline = 4; + + // The message payload + bytes payload = 10; } // Message type for all RPC responses message RpcResponse { - // The request ID of a prior request. - uint32 request_id = 1; - // The status of the response. A non-zero status indicates an error. - uint32 status = 2; - // Message flags. Currently only used to indicate if a stream of messages has completed. - uint32 flags = 3; - - // The message payload. If the status is non-zero, this contains additional error details. - bytes payload = 10; + // The request ID of a prior request. + uint32 request_id = 1; + // The status of the response. A non-zero status indicates an error. + uint32 status = 2; + // Message flags. Currently only used to indicate if a stream of messages has completed. + uint32 flags = 3; + + // The message payload. If the status is non-zero, this contains additional error details. + bytes payload = 10; } // Message sent by the client when negotiating an RPC session. A server may close the substream if it does // not agree with the session parameters. message RpcSession { - // The RPC versions supported by the client - repeated uint32 supported_versions = 1; + // The RPC versions supported by the client + repeated uint32 supported_versions = 1; } message RpcSessionReply { - oneof session_result { - // The RPC version selected by the server - uint32 accepted_version = 1; - // Indicates the server rejected the session - bool rejected = 2; - } - enum HandshakeRejectReason { - HANDSHAKE_REJECT_REASON_UNKNOWN = 0; - HANDSHAKE_REJECT_REASON_UNSUPPORTED_VERSION = 1; - HANDSHAKE_REJECT_REASON_NO_SESSIONS_AVAILABLE = 2; - HANDSHAKE_REJECT_REASON_PROTOCOL_NOT_SUPPORTED= 3; - } - HandshakeRejectReason reject_reason = 3; + oneof session_result { + // The RPC version selected by the server + uint32 accepted_version = 1; + // Indicates the server rejected the session + bool rejected = 2; + } + enum HandshakeRejectReason { + HANDSHAKE_REJECT_REASON_UNKNOWN = 0; + HANDSHAKE_REJECT_REASON_UNSUPPORTED_VERSION = 1; + HANDSHAKE_REJECT_REASON_NO_SESSIONS_AVAILABLE = 2; + HANDSHAKE_REJECT_REASON_NO_PER_PEER_SESSIONS_AVAILABLE = 3; + HANDSHAKE_REJECT_REASON_PROTOCOL_NOT_SUPPORTED = 4; + } + HandshakeRejectReason reject_reason = 3; } diff --git a/network/rpc_framework/src/client/metrics.rs b/network/rpc_framework/src/client/metrics.rs index 8c9a7201d4..7196e3f797 100644 --- a/network/rpc_framework/src/client/metrics.rs +++ b/network/rpc_framework/src/client/metrics.rs @@ -27,7 +27,7 @@ use tari_metrics::{Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, pub fn num_sessions(peer: &PeerId, protocol: &StreamProtocol) -> IntGauge { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_gauge_vec( - "comms::rpc::client::num_sessions", + "network::rpc::client::num_sessions", "The number of active clients per peer per protocol", &["peer_id", "protocol"], ) @@ -40,7 +40,7 @@ pub fn num_sessions(peer: &PeerId, protocol: &StreamProtocol) -> IntGauge { pub fn handshake_counter(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::client::handshake_count", + "network::rpc::client::handshake_count", "The number of handshakes per peer per protocol", &["peer_id", "protocol"], ) @@ -53,7 +53,7 @@ pub fn handshake_counter(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter pub fn handshake_errors(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::client::handshake_errors", + "network::rpc::client::handshake_errors", "The number of handshake errors per peer per protocol", &["peer_id", "protocol"], ) @@ -66,7 +66,7 @@ pub fn handshake_errors(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter pub fn client_errors(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::client::error_count", + "network::rpc::client::error_count", "The number of client errors per peer per protocol", &["peer_id", "protocol"], ) @@ -79,7 +79,7 @@ pub fn client_errors(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { pub fn client_timeouts(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::client::error_timeouts", + "network::rpc::client::error_timeouts", "The number of client timeouts per peer per protocol", &["peer_id", "protocol"], ) @@ -92,7 +92,7 @@ pub fn client_timeouts(peer: &PeerId, protocol: &StreamProtocol) -> IntCounter { pub fn request_response_latency(peer: &PeerId, protocol: &StreamProtocol) -> Histogram { static METER: Lazy = Lazy::new(|| { tari_metrics::register_histogram_vec( - "comms::rpc::client::request_response_latency", + "network::rpc::client::request_response_latency", "A histogram of request to first response latency", &["peer_id", "protocol"], ) @@ -105,7 +105,7 @@ pub fn request_response_latency(peer: &PeerId, protocol: &StreamProtocol) -> His pub fn outbound_request_bytes(peer: &PeerId, protocol: &StreamProtocol) -> Histogram { static METER: Lazy = Lazy::new(|| { tari_metrics::register_histogram_vec( - "comms::rpc::client::outbound_request_bytes", + "network::rpc::client::outbound_request_bytes", "Avg. request bytes per peer per protocol", &["peer_id", "protocol"], ) @@ -118,7 +118,7 @@ pub fn outbound_request_bytes(peer: &PeerId, protocol: &StreamProtocol) -> Histo pub fn inbound_response_bytes(peer: &PeerId, protocol: &StreamProtocol) -> Histogram { static METER: Lazy = Lazy::new(|| { tari_metrics::register_histogram_vec( - "comms::rpc::client::inbound_response_bytes", + "network::rpc::client::inbound_response_bytes", "Avg. response bytes per peer per protocol", &["peer_id", "protocol"], ) diff --git a/network/rpc_framework/src/client/mod.rs b/network/rpc_framework/src/client/mod.rs index f55adfcc1b..c0af14f890 100644 --- a/network/rpc_framework/src/client/mod.rs +++ b/network/rpc_framework/src/client/mod.rs @@ -36,7 +36,6 @@ use std::{ fmt, future::Future, marker::PhantomData, - sync::Arc, time::{Duration, Instant}, }; @@ -56,7 +55,7 @@ use log::*; use prost::Message; use tari_shutdown::{Shutdown, ShutdownSignal}; use tokio::{ - sync::{mpsc, oneshot, watch, Mutex}, + sync::{mpsc, oneshot, watch}, time, }; use tower::{Service, ServiceExt}; @@ -75,6 +74,7 @@ use crate::{ RpcHandshakeError, RpcServerError, RpcStatus, + CHECK_BYTES, }; const LOG_TARGET: &str = "network::rpc::client"; @@ -163,8 +163,8 @@ impl RpcClient { } /// Close the RPC session. Any subsequent calls will error. - pub async fn close(&mut self) { - self.connector.close().await; + pub fn close(&mut self) { + self.connector.close(); } pub fn is_connected(&self) -> bool { @@ -309,7 +309,7 @@ impl Default for RpcClientConfig { pub struct ClientConnector { inner: mpsc::Sender, last_request_latency_rx: watch::Receiver>, - shutdown: Arc>, + shutdown: Shutdown, } impl ClientConnector { @@ -321,13 +321,12 @@ impl ClientConnector { Self { inner: sender, last_request_latency_rx, - shutdown: Arc::new(Mutex::new(shutdown)), + shutdown, } } - pub async fn close(&mut self) { - let mut lock = self.shutdown.lock().await; - lock.trigger(); + pub fn close(&mut self) { + self.shutdown.trigger(); } pub fn get_last_request_latency(&mut self) -> Option { @@ -424,6 +423,7 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin + Send self.protocol_id.as_ref() } + #[allow(clippy::too_many_lines)] async fn run(mut self) { debug!( target: LOG_TARGET, @@ -470,6 +470,9 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin + Send }, server_msg = self.framed.next() => { match server_msg { + Some(Ok(frame)) if frame.as_ref() == CHECK_BYTES.as_ref() => { + trace!(target: LOG_TARGET, "Received check bytes"); + }, Some(Ok(msg)) => { if let Err(err) = self.handle_interrupt_server_message(msg) { #[cfg(feature = "metrics")] @@ -509,6 +512,15 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin + Send #[cfg(feature = "metrics")] metrics::num_sessions(&self.peer_id, &self.protocol_id).dec(); + if let Err(err) = self.send_polite_close().await { + debug!( + target: LOG_TARGET, + "(peer: {}) IO Error when sending close substream: {}", + self.peer_id, + err + ); + } + if let Err(err) = self.framed.close().await { debug!( target: LOG_TARGET, @@ -685,10 +697,12 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin + Send // Check if the response receiver has been dropped while receiving messages let resp_result = { + let shutdown_signal = self.shutdown_signal.clone(); let resp_fut = self.read_response(request_id); tokio::pin!(resp_fut); - let closed_fut = response_tx.closed(); - tokio::pin!(closed_fut); + let resp_closed = response_tx.closed(); + tokio::pin!(resp_closed); + let closed_fut = shutdown_signal.select(resp_closed); match future::select(resp_fut, closed_fut).await { Either::Left((r, _)) => Some(r), Either::Right(_) => None, @@ -812,6 +826,19 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin + Send Ok(()) } + async fn send_polite_close(&mut self) -> Result<(), RpcError> { + let req = proto::RpcRequest { + flags: RpcMessageFlags::FIN.bits().into(), + ..Default::default() + }; + + // If we cannot set FIN quickly, just exit + if let Ok(res) = time::timeout(Duration::from_secs(2), self.send_request(req)).await { + res?; + } + Ok(()) + } + async fn send_request(&mut self, req: proto::RpcRequest) -> Result<(), RpcError> { let payload = req.encode_to_vec(); if payload.len() > crate::max_request_size() { @@ -986,17 +1013,22 @@ where TSubstream: AsyncRead + AsyncWrite + Unpin } async fn next(&mut self) -> Result { - // Wait until the timeout, allowing an extra grace period to account for latency - let next_msg_fut = match self.config.timeout_with_grace_period() { - Some(timeout) => Either::Left(time::timeout(timeout, self.framed.next())), - None => Either::Right(self.framed.next().map(Ok)), - }; + loop { + // Wait until the timeout, allowing an extra grace period to account for latency + let next_msg_fut = match self.config.timeout_with_grace_period() { + Some(timeout) => Either::Left(time::timeout(timeout, self.framed.next())), + None => Either::Right(self.framed.next().map(Ok)), + }; - match next_msg_fut.await { - Ok(Some(Ok(resp))) => Ok(proto::RpcResponse::decode(resp)?), - Ok(Some(Err(err))) => Err(err.into()), - Ok(None) => Err(RpcError::ServerClosedRequest), - Err(_) => Err(RpcError::ReplyTimeout), + match next_msg_fut.await { + Ok(Some(Ok(resp))) if resp == CHECK_BYTES => { + trace!(target: LOG_TARGET, "Received CHECK_BYTES"); + }, + Ok(Some(Ok(resp))) => return Ok(proto::RpcResponse::decode(resp)?), + Ok(Some(Err(err))) => return Err(err.into()), + Ok(None) => return Err(RpcError::ServerClosedRequest), + Err(_) => return Err(RpcError::ReplyTimeout), + } } } } diff --git a/network/rpc_framework/src/client/pool.rs b/network/rpc_framework/src/client/pool.rs index 456a2ce1d9..5a66837f6a 100644 --- a/network/rpc_framework/src/client/pool.rs +++ b/network/rpc_framework/src/client/pool.rs @@ -63,6 +63,18 @@ where let mut pool = self.pool.lock().await; pool.get_least_used_or_connect().await } + + pub async fn clear_unused_leases(&self) { + let mut pool = self.pool.lock().await; + pool.clear_unused_leases(); + } + + pub async fn close(self) { + let mut pool = self.pool.lock().await; + for mut client in pool.clients.drain(..) { + client.close(); + } + } } #[derive(Clone)] @@ -96,10 +108,21 @@ where Ok(c) => c, // This is an edge case where the remote node does not have any further sessions available. This // is gracefully handled by returning one of the existing used sessions. - Err(RpcClientPoolError::NoMoreRemoteRpcSessions) => self - .get_least_used() - .ok_or(RpcClientPoolError::NoMoreRemoteRpcSessions)?, + Err(RpcClientPoolError::NoMoreRemoteRpcSessions) => { + trace!( + target: LOG_TARGET, + "get_least_used_or_connect: no more remote rpc sessions, trying least used." + ); + self.get_least_used().ok_or({ + trace!( + target: LOG_TARGET, + "get_least_used_or_connect: lest used client not found." + ); + RpcClientPoolError::NoMoreRemoteRpcSessions + })? + }, Err(err) => { + trace!(target: LOG_TARGET, "get_least_used_or_connect: returning error ({})", err); return Err(err); }, } @@ -108,6 +131,7 @@ where if !client.is_connected() { self.prune(); + trace!(target: LOG_TARGET, "get_least_used_or_connect: new client is not connected, pruning"); continue; } @@ -116,10 +140,27 @@ where } } - // pub fn is_connected(&self) -> bool { - // // We assume a connection if any of the clients are connected. - // self.clients.iter().any(|lease| lease.is_connected()) - // } + pub fn clear_unused_leases(&mut self) { + let initial_len = self.clients.len(); + let cap = self.clients.capacity(); + self.clients = self.clients.drain(..).fold(Vec::with_capacity(cap), |mut vec, c| { + // 1 lease is held by the pool + if c.is_connected() && c.lease_count() > 1 { + vec.push(c); + } + vec + }); + + let num_cleared = initial_len - self.clients.len(); + if num_cleared > 0 { + debug!( + target: LOG_TARGET, + "Cleared {} unused client(s) (total: {})", + num_cleared, + self.clients.len() + ) + } + } #[allow(dead_code)] pub(super) fn refresh_num_active_connections(&mut self) -> usize { @@ -136,23 +177,28 @@ where // If the pool is full, we choose the client with the smallest lease_count (i.e. the one that is being used // the least or not at all). if self.is_full() { + debug!(target: LOG_TARGET, "get_next_lease: full using client with {} lease(s) (is_connected = {})", client.lease_count(), client.is_connected()); return Some(client); } // Otherwise, if the least used connection is still in use and since there is capacity for more connections, // return None. This indicates that the best option is to create a new connection. if client.lease_count() > 0 { + debug!(target: LOG_TARGET, "get_next_lease: least used client has {} lease(s) but more capacity exists.", client.is_connected()); return None; } + debug!(target: LOG_TARGET, "get_next_lease: least used client has no lease."); Some(client) } fn get_least_used(&self) -> Option<&RpcClientLease> { let mut min_count = usize::MAX; let mut selected_client = None; + debug!(target: LOG_TARGET, "get_least_used: #clients: {}", self.clients.len()); for client in &self.clients { let lease_count = client.lease_count(); + debug!(target: LOG_TARGET, "get_least_used: lease count: {}, is_connected: {}", lease_count, client.is_connected()); if lease_count == 0 { return Some(client); } @@ -172,6 +218,7 @@ where async fn add_new_client_session(&mut self) -> Result<&RpcClientLease, RpcClientPoolError> { debug_assert!(!self.is_full(), "add_new_client called when pool is full"); + debug!(target: LOG_TARGET, "Attempting new RPC pool session for {} (#clients = {})", self.client_config.peer_id(), self.clients.len()); let client = self .connector .connect_rpc_using_builder(self.client_config.clone()) @@ -243,14 +290,20 @@ impl RpcPoolClient for RpcClientLease { fn is_connected(&self) -> bool { self.inner.is_connected() } + + fn close(&mut self) { + self.inner.close(); + } } #[derive(Debug, thiserror::Error)] pub enum RpcClientPoolError { #[error("Peer connection to peer '{peer}' dropped")] PeerConnectionDropped { peer: PeerId }, - #[error("No peer RPC sessions are available")] + #[error("No RPC sessions are available")] NoMoreRemoteRpcSessions, + #[error("No RPC sessions are available (per peer)")] + NoMoreRemotePerPeerRpcSessions, #[error("Failed to create client connection: {0}")] FailedToConnect(String), } @@ -261,6 +314,9 @@ impl From for RpcClientPoolError { RpcError::HandshakeError(RpcHandshakeError::Rejected(HandshakeRejectReason::NoSessionsAvailable)) => { RpcClientPoolError::NoMoreRemoteRpcSessions }, + RpcError::HandshakeError(RpcHandshakeError::Rejected( + HandshakeRejectReason::NoPerPeerSessionsAvailable, + )) => RpcClientPoolError::NoMoreRemotePerPeerRpcSessions, err => RpcClientPoolError::FailedToConnect(err.to_string()), } } @@ -268,10 +324,15 @@ impl From for RpcClientPoolError { pub trait RpcPoolClient { fn is_connected(&self) -> bool; + fn close(&mut self); } impl RpcPoolClient for RpcClient { fn is_connected(&self) -> bool { RpcClient::is_connected(self) } + + fn close(&mut self) { + RpcClient::close(self) + } } diff --git a/network/rpc_framework/src/error.rs b/network/rpc_framework/src/error.rs index 129f8b6587..0d319e65ba 100644 --- a/network/rpc_framework/src/error.rs +++ b/network/rpc_framework/src/error.rs @@ -103,6 +103,8 @@ pub enum HandshakeRejectReason { UnsupportedVersion, #[error("no more RPC sessions available")] NoSessionsAvailable, + #[error("no more per peer RPC sessions available")] + NoPerPeerSessionsAvailable, #[error("protocol not supported")] ProtocolNotSupported, #[error("unknown protocol error: {0}")] @@ -128,6 +130,7 @@ impl From for HandshakeReje match reason { UnsupportedVersion => HandshakeRejectReason::UnsupportedVersion, NoSessionsAvailable => HandshakeRejectReason::NoSessionsAvailable, + NoPerPeerSessionsAvailable => HandshakeRejectReason::NoPerPeerSessionsAvailable, ProtocolNotSupported => HandshakeRejectReason::ProtocolNotSupported, Unknown => HandshakeRejectReason::Unknown("reject reason is not known"), } @@ -141,6 +144,7 @@ impl From for rpc_proto::rpc_session_reply::HandshakeReje match reason { HandshakeRejectReason::UnsupportedVersion => UnsupportedVersion, HandshakeRejectReason::NoSessionsAvailable => NoSessionsAvailable, + HandshakeRejectReason::NoPerPeerSessionsAvailable => NoPerPeerSessionsAvailable, HandshakeRejectReason::ProtocolNotSupported => ProtocolNotSupported, HandshakeRejectReason::Unknown(_) => Unknown, } diff --git a/network/rpc_framework/src/handshake.rs b/network/rpc_framework/src/handshake.rs index 2a910ecd05..fe83bac4e1 100644 --- a/network/rpc_framework/src/handshake.rs +++ b/network/rpc_framework/src/handshake.rs @@ -11,7 +11,7 @@ use tracing::{debug, error, span, warn, Instrument, Level}; use crate::{error::HandshakeRejectReason, framing::CanonicalFraming, proto}; -const LOG_TARGET: &str = "comms::rpc::handshake"; +const LOG_TARGET: &str = "network::rpc::handshake"; /// Supported RPC protocol versions. /// Currently only v0 is supported diff --git a/network/rpc_framework/src/lib.rs b/network/rpc_framework/src/lib.rs index 2c2531ce94..fec6d504c4 100644 --- a/network/rpc_framework/src/lib.rs +++ b/network/rpc_framework/src/lib.rs @@ -32,6 +32,9 @@ /// This can be thought of as the hard limit on message size. pub const RPC_MAX_FRAME_SIZE: usize = 4 * 1024 * 1024; // 4 MiB +/// Bytes used for session aliveness check +pub(crate) const CHECK_BYTES: Bytes = Bytes::from_static(&[0xde, 0xad, 0xbe, 0xef]); + /// The maximum request payload size const fn max_request_size() -> usize { RPC_MAX_FRAME_SIZE @@ -110,6 +113,7 @@ pub mod __macro_reexports { } pub use async_trait::async_trait; +use bytes::Bytes; // TODO: We could fairly easily abstract this and make this framework independent of libp2p pub type Substream = libp2p::Stream; diff --git a/network/rpc_framework/src/server/error.rs b/network/rpc_framework/src/server/error.rs index a822893ccc..931643a0e3 100644 --- a/network/rpc_framework/src/server/error.rs +++ b/network/rpc_framework/src/server/error.rs @@ -64,9 +64,10 @@ pub enum RpcServerError { } impl RpcServerError { - pub fn early_close_io(&self) -> Option<&io::Error> { + pub fn io(&self) -> Option<&io::Error> { match self { Self::EarlyClose(e) => e.io(), + Self::Io(e) => Some(e), _ => None, } } diff --git a/network/rpc_framework/src/server/metrics.rs b/network/rpc_framework/src/server/metrics.rs index 6c4aa85ba8..1ca071d96e 100644 --- a/network/rpc_framework/src/server/metrics.rs +++ b/network/rpc_framework/src/server/metrics.rs @@ -29,7 +29,7 @@ use crate::{RpcServerError, RpcStatusCode}; pub fn num_sessions(peer_id: &PeerId, protocol: &StreamProtocol) -> IntGauge { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_gauge_vec( - "comms::rpc::server::num_sessions", + "network::rpc::server::num_sessions", "The number of active server sessions per peer per protocol", &["peer_id", "protocol"], ) @@ -42,7 +42,7 @@ pub fn num_sessions(peer_id: &PeerId, protocol: &StreamProtocol) -> IntGauge { pub fn handshake_error_counter(peer_id: &PeerId, protocol: &StreamProtocol) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::server::handshake_error_count", + "network::rpc::server::handshake_error_count", "The number of handshake errors per peer per protocol", &["peer_id", "protocol"], ) @@ -55,7 +55,7 @@ pub fn handshake_error_counter(peer_id: &PeerId, protocol: &StreamProtocol) -> I pub fn error_counter(peer_id: &PeerId, protocol: &StreamProtocol, err: &RpcServerError) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::server::error_count", + "network::rpc::server::error_count", "The number of RPC errors per peer per protocol", &["peer_id", "protocol", "error"], ) @@ -72,7 +72,7 @@ pub fn error_counter(peer_id: &PeerId, protocol: &StreamProtocol, err: &RpcServe pub fn status_error_counter(peer_id: &PeerId, protocol: &StreamProtocol, status_code: RpcStatusCode) -> IntCounter { static METER: Lazy = Lazy::new(|| { tari_metrics::register_int_counter_vec( - "comms::rpc::server::status_error_count", + "network::rpc::server::status_error_count", "The number of RPC errors by status code per peer per protocol", &["peer_id", "protocol", "status"], ) @@ -89,7 +89,7 @@ pub fn status_error_counter(peer_id: &PeerId, protocol: &StreamProtocol, status_ pub fn inbound_requests_bytes(peer_id: &PeerId, protocol: &StreamProtocol) -> Histogram { static METER: Lazy = Lazy::new(|| { tari_metrics::register_histogram_vec( - "comms::rpc::server::inbound_request_bytes", + "network::rpc::server::inbound_request_bytes", "Avg. request bytes per peer per protocol", &["peer_id", "protocol"], ) @@ -102,7 +102,7 @@ pub fn inbound_requests_bytes(peer_id: &PeerId, protocol: &StreamProtocol) -> Hi pub fn outbound_response_bytes(peer_id: &PeerId, protocol: &StreamProtocol) -> Histogram { static METER: Lazy = Lazy::new(|| { tari_metrics::register_histogram_vec( - "comms::rpc::server::outbound_response_bytes", + "network::rpc::server::outbound_response_bytes", "Avg. response bytes per peer per protocol", &["peer_id", "protocol"], ) diff --git a/network/rpc_framework/src/server/mod.rs b/network/rpc_framework/src/server/mod.rs index ea7c0da6bf..242d22250d 100644 --- a/network/rpc_framework/src/server/mod.rs +++ b/network/rpc_framework/src/server/mod.rs @@ -49,14 +49,14 @@ use std::{ time::{Duration, Instant}, }; -use bytes::Bytes; +use bytes::{Bytes, BytesMut}; use futures::{future, stream::FuturesUnordered, SinkExt, Stream, StreamExt}; use libp2p::{PeerId, StreamProtocol}; use libp2p_substream::{ProtocolEvent, ProtocolNotification}; use log::*; use prost::Message; use router::Router; -use tokio::{sync::mpsc, task::JoinHandle, time}; +use tokio::{sync::mpsc, task::JoinHandle, time, time::MissedTickBehavior}; use tower::{make::MakeService, Service}; use tracing::{debug, error, instrument, span, trace, warn, Instrument, Level}; @@ -69,6 +69,7 @@ use super::{ status::RpcStatus, Handshake, Substream, + CHECK_BYTES, RPC_MAX_FRAME_SIZE, }; use crate::{ @@ -79,10 +80,9 @@ use crate::{ message::{RpcMethod, RpcResponse}, notify::ProtocolNotificationRx, proto, - server::early_close::EarlyClose, }; -const LOG_TARGET: &str = "comms::rpc::server"; +const LOG_TARGET: &str = "network::rpc::server"; pub trait NamedProtocolService { const PROTOCOL_NAME: &'static str; @@ -165,6 +165,7 @@ pub struct RpcServerBuilder { maximum_sessions_per_client: Option, minimum_client_deadline: Duration, handshake_timeout: Duration, + check_connection_interval: Duration, } impl RpcServerBuilder { @@ -197,6 +198,11 @@ impl RpcServerBuilder { self } + pub fn with_check_connection_interval(mut self, interval: Duration) -> Self { + self.check_connection_interval = interval; + self + } + pub fn finish(self) -> RpcServer { let (request_tx, request_rx) = mpsc::channel(10); RpcServer { @@ -214,6 +220,7 @@ impl Default for RpcServerBuilder { maximum_sessions_per_client: None, minimum_client_deadline: Duration::from_secs(1), handshake_timeout: Duration::from_secs(15), + check_connection_interval: Duration::from_secs(5), } } } @@ -419,10 +426,16 @@ where "NEW SESSION for {} ({} active) ", peer_id, num_sessions ); }, - + Err(err @ RpcServerError::MaxSessionsPerClientReached { .. }) => { + handshake + .reject_with_reason(HandshakeRejectReason::NoPerPeerSessionsAvailable) + .await?; + return Err(err); + }, Err(err) => { + // Not reachable since new_session_for only returns the above error handshake - .reject_with_reason(HandshakeRejectReason::NoSessionsAvailable) + .reject_with_reason(HandshakeRejectReason::Unknown("new_session_for unreachable error")) .await?; return Err(err); }, @@ -463,7 +476,7 @@ struct ActivePeerRpcService { protocol: StreamProtocol, peer_id: PeerId, service: TSvc, - framed: EarlyClose>, + framed: CanonicalFraming, logging_context_string: Arc, } @@ -484,7 +497,7 @@ where TSvc: Service, Response = Response, Error = RpcStatus protocol, peer_id, service, - framed: EarlyClose::new(framed), + framed, } } @@ -512,59 +525,88 @@ where TSvc: Service, Response = Response, Error = RpcStatus } async fn run(&mut self) -> Result<(), RpcServerError> { - while let Some(result) = self.framed.next().await { - match result { - Ok(frame) => { - #[cfg(feature = "metrics")] - metrics::inbound_requests_bytes(&self.peer_id, &self.protocol).observe(frame.len() as f64); + let mut interval = time::interval(self.config.check_connection_interval); + // If there is a missed tick, the handle_request essentially checks the connection, so we can skip + interval.set_missed_tick_behavior(MissedTickBehavior::Skip); + loop { + tokio::select! { + maybe_result = self.framed.next() => { + match maybe_result { + Some(result) => self.handle_request_message(result).await?, + None => break, + } + }, + + _ = interval.tick() => { + if let Err(err) = self.check_stream().await { + debug!(target: LOG_TARGET, "check_stream detected closed or malfunctioning session: {err}. Closing session."); + break; + } + } + } + } - let start = Instant::now(); + self.framed.close().await?; + Ok(()) + } - if let Err(err) = self.handle_request(frame.freeze()).await { - if let Err(err) = self.framed.close().await { - let level = err.io().map(err_to_log_level).unwrap_or(log::Level::Error); + async fn handle_request_message(&mut self, result: io::Result) -> Result<(), RpcServerError> { + match result { + Ok(frame) => { + #[cfg(feature = "metrics")] + metrics::inbound_requests_bytes(&self.peer_id, &self.protocol).observe(frame.len() as f64); + + let start = Instant::now(); + + if let Err(err) = self.handle_request(frame.freeze()).await { + if let Err(err) = self.framed.close().await { + let level = err_to_log_level(&err); - log!( - target: LOG_TARGET, - level, - "({}) Failed to close substream after socket error: {}", - self.logging_context_string, - err, - ); - } - let level = err.early_close_io().map(err_to_log_level).unwrap_or(log::Level::Error); log!( target: LOG_TARGET, level, - "(peer: {}, protocol: {}) Failed to handle request: {}", - self.peer_id, - self.protocol_name(), - err + "({}) Failed to close substream after socket error: {}", + self.logging_context_string, + err, ); - return Err(err); } - let elapsed = start.elapsed(); - debug!( + let level = err.io().map(err_to_log_level).unwrap_or(log::Level::Error); + log!( target: LOG_TARGET, - "({}) RPC request completed in {:.0?}{}", - self.logging_context_string, - elapsed, - if elapsed.as_secs() > 5 { " (LONG REQUEST)" } else { "" } + level, + "(peer: {}, protocol: {}) Failed to handle request: {}", + self.peer_id, + self.protocol_name(), + err ); - }, - Err(err) => { - if let Err(err) = self.framed.close().await { - error!( - target: LOG_TARGET, - "({}) Failed to close substream after socket error: {}", self.logging_context_string, err - ); - } - return Err(err.into()); - }, - } + return Err(err); + } + let elapsed = start.elapsed(); + debug!( + target: LOG_TARGET, + "({}) RPC request completed in {:.0?}{}", + self.logging_context_string, + elapsed, + if elapsed.as_secs() > 5 { " (LONG REQUEST)" } else { "" } + ); + Ok(()) + }, + Err(err) => { + if let Err(err) = self.framed.close().await { + error!( + target: LOG_TARGET, + "({}) Failed to close substream after socket error: {}", self.logging_context_string, err + ); + } + Err(err.into()) + }, } + } - self.framed.close().await?; + async fn check_stream(&mut self) -> Result<(), RpcServerError> { + // https://users.rust-lang.org/t/tokio-detect-connection-loss/116217/4 + self.framed.send(CHECK_BYTES).await?; + self.framed.flush().await?; Ok(()) } @@ -576,6 +618,19 @@ where TSvc: Service, Response = Response, Error = RpcStatus let method = RpcMethod::from(decoded_msg.method); let deadline = Duration::from_secs(decoded_msg.deadline); + let msg_flags = RpcMessageFlags::from_bits(u8::try_from(decoded_msg.flags).map_err(|_| { + RpcServerError::ProtocolError(format!("invalid message flag: must be less than {}", u8::MAX)) + })?) + .ok_or(RpcServerError::ProtocolError(format!( + "invalid message flag, does not match any flags ({})", + decoded_msg.flags + )))?; + + if msg_flags.contains(RpcMessageFlags::FIN) { + debug!(target: LOG_TARGET, "({}) Client sent FIN.", self.logging_context_string); + return Ok(()); + } + // The client side deadline MUST be greater or equal to the minimum_client_deadline if deadline < self.config.minimum_client_deadline { debug!( @@ -599,18 +654,6 @@ where TSvc: Service, Response = Response, Error = RpcStatus return Ok(()); } - let msg_flags = RpcMessageFlags::from_bits(u8::try_from(decoded_msg.flags).map_err(|_| { - RpcServerError::ProtocolError(format!("invalid message flag: must be less than {}", u8::MAX)) - })?) - .ok_or(RpcServerError::ProtocolError(format!( - "invalid message flag, does not match any flags ({})", - decoded_msg.flags - )))?; - - if msg_flags.contains(RpcMessageFlags::FIN) { - debug!(target: LOG_TARGET, "({}) Client sent FIN.", self.logging_context_string); - return Ok(()); - } if msg_flags.contains(RpcMessageFlags::ACK) { debug!( target: LOG_TARGET, @@ -885,7 +928,8 @@ fn err_to_log_level(err: &io::Error) -> log::Level { ErrorKind::ConnectionAborted | ErrorKind::BrokenPipe | ErrorKind::WriteZero | - ErrorKind::UnexpectedEof => log::Level::Debug, + ErrorKind::UnexpectedEof | + ErrorKind::Interrupted => log::Level::Debug, _ => log::Level::Error, } } diff --git a/network/rpc_framework/src/status.rs b/network/rpc_framework/src/status.rs index 6a02a98b90..de2a6cb56e 100644 --- a/network/rpc_framework/src/status.rs +++ b/network/rpc_framework/src/status.rs @@ -9,7 +9,7 @@ use thiserror::Error; use super::RpcError; use crate::{optional::OrOptional, proto}; -const LOG_TARGET: &str = "comms::rpc::status"; +const LOG_TARGET: &str = "network::rpc::status"; #[derive(Debug, Error, Clone, PartialEq, Eq)] pub struct RpcStatus { diff --git a/network/rpc_macros/src/generator.rs b/network/rpc_macros/src/generator.rs index 656829625a..5a20b24b4e 100644 --- a/network/rpc_macros/src/generator.rs +++ b/network/rpc_macros/src/generator.rs @@ -217,8 +217,8 @@ impl RpcCodeGenerator { self.inner.ping().await } - pub async fn close(&mut self) { - self.inner.close().await; + pub fn close(&mut self) { + self.inner.close(); } }; @@ -250,6 +250,10 @@ impl RpcCodeGenerator { fn is_connected(&self) -> bool { self.inner.is_connected() } + + fn close(&mut self) { + self.inner.close(); + } } } } diff --git a/network/swarm/src/behaviour.rs b/network/swarm/src/behaviour.rs index 024a6acece..3cbf7160e0 100644 --- a/network/swarm/src/behaviour.rs +++ b/network/swarm/src/behaviour.rs @@ -90,6 +90,8 @@ where let gossipsub_config = gossipsub::ConfigBuilder::default() .max_transmit_size(config.gossipsub_max_message_size) .validation_mode(gossipsub::ValidationMode::Strict) // This sets the kind of message validation. The default is Strict (enforce message signing) + // Require the use of report_message_validation_result before messages are forwarded onwards + .validate_messages() .message_id_fn(get_message_id) // content-address messages. No two messages of the same content will be propagated. .build() .unwrap(); @@ -144,7 +146,7 @@ where // Connection limits let connection_limits = connection_limits::Behaviour::new( - ConnectionLimits::default().with_max_established_per_peer(config.max_connections_per_peer), + ConnectionLimits::default().with_max_established_incoming(config.max_inbound_connections_per_peer), ); // mDNS diff --git a/network/swarm/src/config.rs b/network/swarm/src/config.rs index 68a7d50b5a..2d7ab508c3 100644 --- a/network/swarm/src/config.rs +++ b/network/swarm/src/config.rs @@ -13,7 +13,7 @@ pub struct Config { pub user_agent: String, pub messaging_protocol: String, pub ping: ping::Config, - pub max_connections_per_peer: Option, + pub max_inbound_connections_per_peer: Option, pub enable_mdns: bool, pub enable_relay: bool, pub enable_messaging: bool, @@ -31,7 +31,7 @@ impl Default for Config { user_agent: "/tari/unknown/0.0.1".to_string(), messaging_protocol: "/tari/messaging/0.0.1".to_string(), ping: ping::Config::default(), - max_connections_per_peer: Some(3), + max_inbound_connections_per_peer: None, enable_mdns: false, enable_relay: false, enable_messaging: true,