synor/crates/synor-network/src/partition.rs
Gulshan Yadav 7c7137c4f6 fix: resolve clippy warnings for Rust 1.93
- Replace manual modulo checks with .is_multiple_of()
- Use enumerate() instead of manual loop counters
- Use iterator .take() instead of index-based loops
- Use slice literals instead of unnecessary vec![]
- Allow too_many_arguments in IBC and bridge crates (protocol requirements)
- Allow assertions on constants in integration tests
2026-02-02 06:18:16 +05:30

1205 lines
40 KiB
Rust

//! Network partition detection for Synor blockchain.
//!
//! This module provides mechanisms to detect when a node might be partitioned
//! from the main network. Partition detection is critical for GHOSTDAG-based
//! blockchains to prevent the node from operating on a minority fork.
//!
//! # Detection Methods
//!
//! The detector uses multiple signals to identify potential partitions:
//!
//! - **Peer count drops**: Sudden loss of peers may indicate network issues
//! - **Tip divergence**: When our tips don't match what peers report
//! - **Block production stalls**: No new blocks received for extended periods
//! - **Peer diversity degradation**: Loss of geographic/subnet diversity
//! - **Protocol version skew**: Most peers on different protocol versions
//!
//! # Partition States
//!
//! - `Connected`: Normal operation, healthy network connectivity
//! - `Degraded`: Warning state, some metrics are concerning but not critical
//! - `Partitioned`: Node appears isolated from the main network
//!
//! # Usage
//!
//! ```ignore
//! use synor_network::partition::{PartitionDetector, PartitionConfig};
//!
//! let config = PartitionConfig::default();
//! let detector = PartitionDetector::new(config);
//!
//! // Periodically update metrics
//! detector.record_peer_connected(peer_id, Some(ip), protocol_version);
//! detector.record_block_received(block_hash, timestamp);
//!
//! // Check partition status
//! match detector.status() {
//! PartitionStatus::Connected => { /* normal operation */ }
//! PartitionStatus::Degraded { reasons } => { /* log warnings */ }
//! PartitionStatus::Partitioned { reasons } => { /* halt mining, alert */ }
//! }
//! ```
use hashbrown::{HashMap, HashSet};
use libp2p::PeerId;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::net::IpAddr;
use std::time::{Duration, Instant};
use synor_types::Hash256;
/// Configuration for partition detection.
#[derive(Clone, Debug)]
pub struct PartitionConfig {
/// Minimum number of peers required to consider ourselves connected.
pub min_peers: usize,
/// Minimum number of unique subnets required for diversity.
pub min_subnets: usize,
/// Minimum number of outbound peers required.
pub min_outbound_peers: usize,
/// Maximum time without receiving a new block before stall warning.
pub block_stall_warning: Duration,
/// Maximum time without receiving a new block before partition alert.
pub block_stall_critical: Duration,
/// Maximum percentage of peers allowed from a single subnet.
pub max_subnet_concentration: f32,
/// Maximum tip age before considering ourselves potentially partitioned.
pub max_tip_age: Duration,
/// How many peer tips must match ours to consider ourselves connected.
pub min_tip_agreement: f32,
/// How often to check partition status.
pub check_interval: Duration,
/// Rolling window size for block arrival times.
pub block_time_window: usize,
/// Threshold for detecting block production slowdown (ratio of expected).
pub block_rate_threshold: f32,
/// Expected block interval (for GHOSTDAG, this is the target block time).
pub expected_block_interval: Duration,
/// Minimum protocol version agreement percentage.
pub min_protocol_agreement: f32,
}
impl Default for PartitionConfig {
fn default() -> Self {
PartitionConfig {
min_peers: 3,
min_subnets: 2,
min_outbound_peers: 2,
block_stall_warning: Duration::from_secs(30),
block_stall_critical: Duration::from_secs(120),
max_subnet_concentration: 0.5, // No subnet should have >50% of peers
max_tip_age: Duration::from_secs(300),
min_tip_agreement: 0.3, // At least 30% of peers should agree on tips
check_interval: Duration::from_secs(10),
block_time_window: 100,
block_rate_threshold: 0.1, // Alert if block rate drops to 10% of expected
expected_block_interval: Duration::from_millis(1000), // 1 block per second
min_protocol_agreement: 0.5, // At least 50% should be on same protocol
}
}
}
/// Reasons why the network might be degraded or partitioned.
///
/// Note: Percentage values are stored as u8 (0-100) for Eq/Hash compatibility.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum PartitionReason {
/// Not enough connected peers.
InsufficientPeers { current: usize, required: usize },
/// Not enough outbound connections.
InsufficientOutbound { current: usize, required: usize },
/// Lack of subnet diversity.
LowSubnetDiversity { current: usize, required: usize },
/// Too many peers from a single subnet.
/// `percentage` is 0-100.
SubnetConcentration { subnet: u32, percentage: u8 },
/// No new blocks received recently.
/// `duration_secs` is the number of seconds since last block.
BlockProductionStalled { duration_secs: u64 },
/// Block production rate significantly lower than expected.
/// Rates are stored as millibps (blocks per 1000 seconds) for precision.
LowBlockRate {
current_rate_millibps: u32,
expected_rate_millibps: u32,
},
/// Our tips don't match peer tips.
/// `matching_peers_pct` is 0-100.
TipDivergence {
matching_peers_pct: u8,
threshold_pct: u8,
},
/// Tip is too old.
/// `age_secs` and `max_age_secs` are in seconds.
StaleTip { age_secs: u64, max_age_secs: u64 },
/// Protocol version mismatch with majority.
ProtocolVersionSkew {
our_version: u32,
majority_version: u32,
},
/// Most peers have higher blue scores, we may be on a minority fork.
BehindNetwork { our_score: u64, network_score: u64 },
/// All connections are inbound (potential eclipse attack).
NoOutboundConnections,
/// Lost significant portion of peers suddenly.
SuddenPeerLoss { lost: usize, remaining: usize },
}
impl PartitionReason {
/// Returns a human-readable description of the reason.
pub fn description(&self) -> String {
match self {
PartitionReason::InsufficientPeers { current, required } => {
format!(
"Only {} peers connected, need at least {}",
current, required
)
}
PartitionReason::InsufficientOutbound { current, required } => {
format!(
"Only {} outbound peers, need at least {}",
current, required
)
}
PartitionReason::LowSubnetDiversity { current, required } => {
format!(
"Only {} unique subnets, need at least {}",
current, required
)
}
PartitionReason::SubnetConcentration { subnet, percentage } => {
format!(
"Subnet {:X} has {}% of peers, max allowed 50%",
subnet, percentage
)
}
PartitionReason::BlockProductionStalled { duration_secs } => {
format!("No new blocks for {} seconds", duration_secs)
}
PartitionReason::LowBlockRate {
current_rate_millibps,
expected_rate_millibps,
} => {
format!(
"Block rate {:.2}/s, expected {:.2}/s",
*current_rate_millibps as f64 / 1000.0,
*expected_rate_millibps as f64 / 1000.0
)
}
PartitionReason::TipDivergence {
matching_peers_pct,
threshold_pct,
} => {
format!(
"Only {}% of peers agree on tips, need {}%",
matching_peers_pct, threshold_pct
)
}
PartitionReason::StaleTip {
age_secs,
max_age_secs,
} => {
format!(
"Best tip is {} seconds old, max allowed {} seconds",
age_secs, max_age_secs
)
}
PartitionReason::ProtocolVersionSkew {
our_version,
majority_version,
} => {
format!(
"Our protocol version {} differs from majority {}",
our_version, majority_version
)
}
PartitionReason::BehindNetwork {
our_score,
network_score,
} => {
format!(
"Our blue score {} is behind network {}",
our_score, network_score
)
}
PartitionReason::NoOutboundConnections => {
"No outbound connections (potential eclipse attack)".to_string()
}
PartitionReason::SuddenPeerLoss { lost, remaining } => {
format!("Lost {} peers suddenly, {} remaining", lost, remaining)
}
}
}
/// Returns true if this reason indicates a critical issue.
pub fn is_critical(&self) -> bool {
match self {
PartitionReason::NoOutboundConnections => true,
PartitionReason::InsufficientPeers { current: 0, .. } => true,
PartitionReason::BlockProductionStalled { duration_secs } => *duration_secs >= 60,
_ => false,
}
}
}
/// Current partition status.
#[derive(Clone, Debug)]
pub enum PartitionStatus {
/// Node is well connected to the network.
Connected,
/// Node has some connectivity issues but is not partitioned.
Degraded {
/// Reasons for degraded status.
reasons: Vec<PartitionReason>,
},
/// Node appears to be partitioned from the main network.
Partitioned {
/// Reasons for partition detection.
reasons: Vec<PartitionReason>,
},
}
impl PartitionStatus {
/// Returns true if the node is fully connected.
pub fn is_connected(&self) -> bool {
matches!(self, PartitionStatus::Connected)
}
/// Returns true if the node is partitioned.
pub fn is_partitioned(&self) -> bool {
matches!(self, PartitionStatus::Partitioned { .. })
}
/// Returns true if the node is degraded or partitioned.
pub fn is_degraded(&self) -> bool {
!matches!(self, PartitionStatus::Connected)
}
/// Returns all reasons for current status.
pub fn reasons(&self) -> &[PartitionReason] {
match self {
PartitionStatus::Connected => &[],
PartitionStatus::Degraded { reasons } => reasons,
PartitionStatus::Partitioned { reasons } => reasons,
}
}
}
/// Information about a connected peer for partition detection.
#[derive(Clone, Debug)]
struct PeerPartitionInfo {
/// Peer ID.
peer_id: PeerId,
/// IP address if known.
ip: Option<IpAddr>,
/// Subnet identifier (/16 for IPv4).
subnet: Option<u32>,
/// Whether this is an outbound connection.
is_outbound: bool,
/// Protocol version.
protocol_version: Option<u32>,
/// Peer's reported tips.
tips: HashSet<Hash256>,
/// Peer's reported blue score.
blue_score: Option<u64>,
/// When the peer connected.
connected_at: Instant,
/// When we last heard from this peer.
last_seen: Instant,
}
impl PeerPartitionInfo {
fn new(peer_id: PeerId, ip: Option<IpAddr>, is_outbound: bool) -> Self {
let subnet = ip.and_then(|addr| Self::extract_subnet(&addr));
let now = Instant::now();
PeerPartitionInfo {
peer_id,
ip,
subnet,
is_outbound,
protocol_version: None,
tips: HashSet::new(),
blue_score: None,
connected_at: now,
last_seen: now,
}
}
fn extract_subnet(ip: &IpAddr) -> Option<u32> {
match ip {
IpAddr::V4(addr) => {
let octets = addr.octets();
Some(((octets[0] as u32) << 8) | (octets[1] as u32))
}
IpAddr::V6(addr) => {
// Use first 32 bits for IPv6 subnet grouping
let segments = addr.segments();
Some(((segments[0] as u32) << 16) | (segments[1] as u32))
}
}
}
}
/// Block arrival record for rate calculation.
#[derive(Clone, Debug)]
struct BlockArrival {
/// Block hash.
hash: Hash256,
/// When the block was received.
received_at: Instant,
}
/// Statistics for partition monitoring.
#[derive(Clone, Debug, Default)]
pub struct PartitionStats {
/// Current number of peers.
pub peer_count: usize,
/// Number of outbound peers.
pub outbound_count: usize,
/// Number of inbound peers.
pub inbound_count: usize,
/// Number of unique subnets.
pub unique_subnets: usize,
/// Time since last block.
pub time_since_last_block: Option<Duration>,
/// Current block rate (blocks per second).
pub block_rate: f32,
/// Percentage of peers agreeing on our tips.
pub tip_agreement: f32,
/// Our blue score.
pub local_blue_score: u64,
/// Highest blue score seen on network.
pub network_blue_score: u64,
/// Current partition status.
pub status: String,
/// Number of partition reasons.
pub warning_count: usize,
/// Number of critical reasons.
pub critical_count: usize,
/// Last status check time.
pub last_check: Option<Instant>,
}
/// Network partition detector.
pub struct PartitionDetector {
/// Configuration.
config: PartitionConfig,
/// Connected peers.
peers: RwLock<HashMap<PeerId, PeerPartitionInfo>>,
/// Our local tips.
local_tips: RwLock<HashSet<Hash256>>,
/// Our local blue score.
local_blue_score: RwLock<u64>,
/// Our protocol version.
our_protocol_version: u32,
/// Recent block arrivals for rate calculation.
block_arrivals: RwLock<VecDeque<BlockArrival>>,
/// Last block received time.
last_block_time: RwLock<Option<Instant>>,
/// Historical peer count for sudden loss detection.
peer_count_history: RwLock<VecDeque<(Instant, usize)>>,
/// Last status check time.
last_check: RwLock<Instant>,
/// Cached status.
cached_status: RwLock<Option<PartitionStatus>>,
/// Listeners for partition events.
alert_sent: RwLock<bool>,
}
impl PartitionDetector {
/// Creates a new partition detector.
pub fn new(config: PartitionConfig) -> Self {
PartitionDetector {
config,
peers: RwLock::new(HashMap::new()),
local_tips: RwLock::new(HashSet::new()),
local_blue_score: RwLock::new(0),
our_protocol_version: 1, // Default to version 1
block_arrivals: RwLock::new(VecDeque::with_capacity(100)),
last_block_time: RwLock::new(None),
peer_count_history: RwLock::new(VecDeque::with_capacity(60)),
last_check: RwLock::new(Instant::now()),
cached_status: RwLock::new(None),
alert_sent: RwLock::new(false),
}
}
/// Creates a new partition detector with custom protocol version.
pub fn with_protocol_version(config: PartitionConfig, protocol_version: u32) -> Self {
let mut detector = Self::new(config);
detector.our_protocol_version = protocol_version;
detector
}
/// Records a peer connection.
pub fn record_peer_connected(&self, peer_id: PeerId, ip: Option<IpAddr>, is_outbound: bool) {
let info = PeerPartitionInfo::new(peer_id, ip, is_outbound);
self.peers.write().insert(peer_id, info);
// Record peer count for sudden loss detection
let count = self.peers.read().len();
let mut history = self.peer_count_history.write();
history.push_back((Instant::now(), count));
if history.len() > 60 {
history.pop_front();
}
// Invalidate cached status
*self.cached_status.write() = None;
}
/// Records a peer disconnection.
pub fn record_peer_disconnected(&self, peer_id: &PeerId) {
self.peers.write().remove(peer_id);
// Record peer count
let count = self.peers.read().len();
let mut history = self.peer_count_history.write();
history.push_back((Instant::now(), count));
if history.len() > 60 {
history.pop_front();
}
// Invalidate cached status
*self.cached_status.write() = None;
}
/// Updates peer's protocol version.
pub fn update_peer_protocol_version(&self, peer_id: &PeerId, version: u32) {
if let Some(peer) = self.peers.write().get_mut(peer_id) {
peer.protocol_version = Some(version);
peer.last_seen = Instant::now();
}
}
/// Updates peer's reported tips.
pub fn update_peer_tips(&self, peer_id: &PeerId, tips: Vec<Hash256>) {
if let Some(peer) = self.peers.write().get_mut(peer_id) {
peer.tips = tips.into_iter().collect();
peer.last_seen = Instant::now();
}
}
/// Updates peer's blue score.
pub fn update_peer_blue_score(&self, peer_id: &PeerId, score: u64) {
if let Some(peer) = self.peers.write().get_mut(peer_id) {
peer.blue_score = Some(score);
peer.last_seen = Instant::now();
}
}
/// Records a new block received.
pub fn record_block_received(&self, hash: Hash256) {
let now = Instant::now();
// Update last block time
*self.last_block_time.write() = Some(now);
// Add to block arrivals for rate calculation
let mut arrivals = self.block_arrivals.write();
arrivals.push_back(BlockArrival {
hash,
received_at: now,
});
// Keep only recent blocks
while arrivals.len() > self.config.block_time_window {
arrivals.pop_front();
}
// Remove blocks older than 10 minutes
let cutoff = now - Duration::from_secs(600);
while let Some(front) = arrivals.front() {
if front.received_at < cutoff {
arrivals.pop_front();
} else {
break;
}
}
// Invalidate cached status
*self.cached_status.write() = None;
}
/// Sets the local tips.
pub fn set_local_tips(&self, tips: Vec<Hash256>) {
*self.local_tips.write() = tips.into_iter().collect();
}
/// Sets the local blue score.
pub fn set_local_blue_score(&self, score: u64) {
*self.local_blue_score.write() = score;
}
/// Calculates the current block rate (blocks per second).
pub fn calculate_block_rate(&self) -> f32 {
let arrivals = self.block_arrivals.read();
if arrivals.len() < 2 {
return 0.0;
}
let first = arrivals.front().unwrap().received_at;
let last = arrivals.back().unwrap().received_at;
let duration = last.duration_since(first);
if duration.as_secs_f32() > 0.0 {
(arrivals.len() - 1) as f32 / duration.as_secs_f32()
} else {
0.0
}
}
/// Checks for sudden peer loss.
fn check_sudden_peer_loss(&self) -> Option<PartitionReason> {
let history = self.peer_count_history.read();
if history.len() < 2 {
return None;
}
// Look for significant drop in last 30 seconds
let now = Instant::now();
let cutoff = now - Duration::from_secs(30);
let mut max_count = 0;
let mut current_count = 0;
for (time, count) in history.iter() {
if *time < cutoff {
if *count > max_count {
max_count = *count;
}
} else {
current_count = *count;
}
}
if max_count == 0 {
return None;
}
// Alert if we lost more than 50% of peers
if current_count < max_count / 2 && max_count >= 4 {
let lost = max_count - current_count;
return Some(PartitionReason::SuddenPeerLoss {
lost,
remaining: current_count,
});
}
None
}
/// Evaluates all partition signals and returns current status.
pub fn evaluate(&self) -> PartitionStatus {
let mut warning_reasons = Vec::new();
let mut critical_reasons = Vec::new();
let peers = self.peers.read();
let peer_count = peers.len();
// === Peer Count Checks ===
// Check minimum peers
if peer_count < self.config.min_peers {
let reason = PartitionReason::InsufficientPeers {
current: peer_count,
required: self.config.min_peers,
};
if peer_count == 0 {
critical_reasons.push(reason);
} else {
warning_reasons.push(reason);
}
}
// Check outbound connections
let outbound_count = peers.values().filter(|p| p.is_outbound).count();
if outbound_count == 0 && peer_count > 0 {
critical_reasons.push(PartitionReason::NoOutboundConnections);
} else if outbound_count < self.config.min_outbound_peers {
warning_reasons.push(PartitionReason::InsufficientOutbound {
current: outbound_count,
required: self.config.min_outbound_peers,
});
}
// === Subnet Diversity Checks ===
let mut subnet_counts: HashMap<u32, usize> = HashMap::new();
for peer in peers.values() {
if let Some(subnet) = peer.subnet {
*subnet_counts.entry(subnet).or_insert(0) += 1;
}
}
let unique_subnets = subnet_counts.len();
if unique_subnets < self.config.min_subnets && peer_count >= self.config.min_peers {
warning_reasons.push(PartitionReason::LowSubnetDiversity {
current: unique_subnets,
required: self.config.min_subnets,
});
}
// Check for subnet concentration
if peer_count > 0 {
for (&subnet, &count) in &subnet_counts {
let percentage = count as f32 / peer_count as f32;
if percentage > self.config.max_subnet_concentration {
warning_reasons.push(PartitionReason::SubnetConcentration {
subnet,
percentage: (percentage * 100.0) as u8,
});
}
}
}
// === Block Production Checks ===
let last_block_time = *self.last_block_time.read();
if let Some(last_time) = last_block_time {
let elapsed = last_time.elapsed();
if elapsed > self.config.block_stall_critical {
critical_reasons.push(PartitionReason::BlockProductionStalled {
duration_secs: elapsed.as_secs(),
});
} else if elapsed > self.config.block_stall_warning {
warning_reasons.push(PartitionReason::BlockProductionStalled {
duration_secs: elapsed.as_secs(),
});
}
}
// Check block rate (only if we have enough samples to calculate)
let arrivals_count = self.block_arrivals.read().len();
if arrivals_count >= 2 {
let block_rate = self.calculate_block_rate();
let expected_rate = 1.0 / self.config.expected_block_interval.as_secs_f32();
if block_rate < expected_rate * self.config.block_rate_threshold {
warning_reasons.push(PartitionReason::LowBlockRate {
current_rate_millibps: (block_rate * 1000.0) as u32,
expected_rate_millibps: (expected_rate * 1000.0) as u32,
});
}
}
// === Tip Agreement Checks ===
let local_tips = self.local_tips.read();
if !local_tips.is_empty() && peer_count > 0 {
let mut matching_peers = 0;
for peer in peers.values() {
if !peer.tips.is_empty() {
// Check if any of our tips match any of peer's tips
if local_tips.iter().any(|t| peer.tips.contains(t)) {
matching_peers += 1;
}
}
}
let peers_with_tips = peers.values().filter(|p| !p.tips.is_empty()).count();
if peers_with_tips > 0 {
let agreement = matching_peers as f32 / peers_with_tips as f32;
if agreement < self.config.min_tip_agreement {
warning_reasons.push(PartitionReason::TipDivergence {
matching_peers_pct: (agreement * 100.0) as u8,
threshold_pct: (self.config.min_tip_agreement * 100.0) as u8,
});
}
}
}
// === Blue Score Checks ===
let local_score = *self.local_blue_score.read();
let network_score = peers
.values()
.filter_map(|p| p.blue_score)
.max()
.unwrap_or(local_score);
// If we're significantly behind the network
if network_score > local_score + 100 {
warning_reasons.push(PartitionReason::BehindNetwork {
our_score: local_score,
network_score,
});
}
// === Protocol Version Checks ===
let mut version_counts: HashMap<u32, usize> = HashMap::new();
for peer in peers.values() {
if let Some(version) = peer.protocol_version {
*version_counts.entry(version).or_insert(0) += 1;
}
}
if let Some((&majority_version, &count)) =
version_counts.iter().max_by_key(|(_, count)| *count)
{
let peers_with_version = version_counts.values().sum::<usize>();
let percentage = count as f32 / peers_with_version as f32;
if majority_version != self.our_protocol_version
&& percentage > self.config.min_protocol_agreement
{
warning_reasons.push(PartitionReason::ProtocolVersionSkew {
our_version: self.our_protocol_version,
majority_version,
});
}
}
// === Sudden Peer Loss Check ===
drop(peers); // Release lock before checking history
if let Some(reason) = self.check_sudden_peer_loss() {
critical_reasons.push(reason);
}
// === Determine Status ===
// Update last check time
*self.last_check.write() = Instant::now();
if !critical_reasons.is_empty() {
// Any critical reason means we're partitioned
let mut all_reasons = critical_reasons;
all_reasons.extend(warning_reasons);
PartitionStatus::Partitioned {
reasons: all_reasons,
}
} else if !warning_reasons.is_empty() {
PartitionStatus::Degraded {
reasons: warning_reasons,
}
} else {
// Reset alert flag when we're connected
*self.alert_sent.write() = false;
PartitionStatus::Connected
}
}
/// Returns the current status, using cache if recent.
pub fn status(&self) -> PartitionStatus {
let last_check = *self.last_check.read();
if last_check.elapsed() < self.config.check_interval {
if let Some(status) = self.cached_status.read().clone() {
return status;
}
}
let status = self.evaluate();
*self.cached_status.write() = Some(status.clone());
status
}
/// Forces a fresh status evaluation, bypassing cache.
pub fn force_check(&self) -> PartitionStatus {
let status = self.evaluate();
*self.cached_status.write() = Some(status.clone());
status
}
/// Returns whether an alert should be sent (called once per partition event).
pub fn should_alert(&self) -> bool {
let status = self.status();
if status.is_partitioned() {
let mut alert_sent = self.alert_sent.write();
if !*alert_sent {
*alert_sent = true;
return true;
}
}
false
}
/// Clears the alert flag (call when recovering from partition).
pub fn clear_alert(&self) {
*self.alert_sent.write() = false;
}
/// Returns current statistics.
pub fn stats(&self) -> PartitionStats {
let peers = self.peers.read();
let status = self.status();
let (status_str, warning_count, critical_count) = match &status {
PartitionStatus::Connected => ("Connected".to_string(), 0, 0),
PartitionStatus::Degraded { reasons } => ("Degraded".to_string(), reasons.len(), 0),
PartitionStatus::Partitioned { reasons } => {
let critical = reasons.iter().filter(|r| r.is_critical()).count();
(
"Partitioned".to_string(),
reasons.len() - critical,
critical,
)
}
};
let mut subnet_set = HashSet::new();
for peer in peers.values() {
if let Some(subnet) = peer.subnet {
subnet_set.insert(subnet);
}
}
let network_score = peers
.values()
.filter_map(|p| p.blue_score)
.max()
.unwrap_or(0);
PartitionStats {
peer_count: peers.len(),
outbound_count: peers.values().filter(|p| p.is_outbound).count(),
inbound_count: peers.values().filter(|p| !p.is_outbound).count(),
unique_subnets: subnet_set.len(),
time_since_last_block: self.last_block_time.read().map(|t| t.elapsed()),
block_rate: self.calculate_block_rate(),
tip_agreement: self.calculate_tip_agreement(),
local_blue_score: *self.local_blue_score.read(),
network_blue_score: network_score,
status: status_str,
warning_count,
critical_count,
last_check: Some(*self.last_check.read()),
}
}
/// Calculates tip agreement percentage.
fn calculate_tip_agreement(&self) -> f32 {
let peers = self.peers.read();
let local_tips = self.local_tips.read();
if local_tips.is_empty() || peers.is_empty() {
return 1.0; // Assume agreement if we have no data
}
let peers_with_tips: Vec<_> = peers.values().filter(|p| !p.tips.is_empty()).collect();
if peers_with_tips.is_empty() {
return 1.0;
}
let matching = peers_with_tips
.iter()
.filter(|p| local_tips.iter().any(|t| p.tips.contains(t)))
.count();
matching as f32 / peers_with_tips.len() as f32
}
/// Returns the number of connected peers.
pub fn peer_count(&self) -> usize {
self.peers.read().len()
}
/// Returns the number of outbound peers.
pub fn outbound_count(&self) -> usize {
self.peers.read().values().filter(|p| p.is_outbound).count()
}
/// Returns the number of unique subnets.
pub fn unique_subnet_count(&self) -> usize {
let peers = self.peers.read();
let mut subnets = HashSet::new();
for peer in peers.values() {
if let Some(subnet) = peer.subnet {
subnets.insert(subnet);
}
}
subnets.len()
}
/// Clears all state (for testing or reset).
pub fn clear(&self) {
self.peers.write().clear();
self.local_tips.write().clear();
*self.local_blue_score.write() = 0;
self.block_arrivals.write().clear();
*self.last_block_time.write() = None;
self.peer_count_history.write().clear();
*self.cached_status.write() = None;
*self.alert_sent.write() = false;
}
}
impl Default for PartitionDetector {
fn default() -> Self {
Self::new(PartitionConfig::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::net::Ipv4Addr;
fn random_peer_id() -> PeerId {
PeerId::random()
}
fn create_detector() -> PartitionDetector {
PartitionDetector::new(PartitionConfig {
min_peers: 3,
min_subnets: 2,
min_outbound_peers: 1,
block_stall_warning: Duration::from_secs(5),
block_stall_critical: Duration::from_secs(10),
..Default::default()
})
}
#[test]
fn test_connected_with_good_peers() {
let detector = create_detector();
// Add diverse outbound peers
for i in 0..5 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, true);
detector.update_peer_protocol_version(&peer_id, 1);
}
// Record a recent block
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let status = detector.force_check();
assert!(status.is_connected());
}
#[test]
fn test_degraded_with_low_peers() {
let detector = create_detector();
// Add only 2 peers (below minimum of 3)
for i in 0..2 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, true);
}
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let status = detector.force_check();
assert!(status.is_degraded());
match status {
PartitionStatus::Degraded { reasons } => {
assert!(reasons
.iter()
.any(|r| matches!(r, PartitionReason::InsufficientPeers { .. })));
}
_ => panic!("Expected degraded status"),
}
}
#[test]
fn test_partitioned_no_outbound() {
let detector = create_detector();
// Add only inbound peers
for i in 0..5 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, false); // inbound
}
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let status = detector.force_check();
assert!(status.is_partitioned());
match status {
PartitionStatus::Partitioned { reasons } => {
assert!(reasons
.iter()
.any(|r| matches!(r, PartitionReason::NoOutboundConnections)));
}
_ => panic!("Expected partitioned status"),
}
}
#[test]
fn test_subnet_concentration() {
let detector = create_detector();
// Add all peers from same subnet
for i in 0..5 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(192, 168, 1, i + 1)));
detector.record_peer_connected(peer_id, ip, true);
}
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let status = detector.force_check();
assert!(status.is_degraded());
match status {
PartitionStatus::Degraded { reasons } | PartitionStatus::Partitioned { reasons } => {
assert!(reasons.iter().any(|r| matches!(
r,
PartitionReason::SubnetConcentration { .. }
| PartitionReason::LowSubnetDiversity { .. }
)));
}
_ => panic!("Expected degraded or partitioned status"),
}
}
#[test]
fn test_block_rate_calculation() {
let detector = create_detector();
// Simulate receiving blocks
for i in 0..10 {
detector.record_block_received(Hash256::from_bytes([i; 32]));
std::thread::sleep(Duration::from_millis(10));
}
let rate = detector.calculate_block_rate();
// Should be roughly 100 blocks/second (10 blocks in ~0.1 seconds)
assert!(rate > 0.0);
}
#[test]
fn test_peer_count_tracking() {
let detector = create_detector();
let peer1 = random_peer_id();
let peer2 = random_peer_id();
detector.record_peer_connected(peer1, None, true);
assert_eq!(detector.peer_count(), 1);
detector.record_peer_connected(peer2, None, false);
assert_eq!(detector.peer_count(), 2);
assert_eq!(detector.outbound_count(), 1);
detector.record_peer_disconnected(&peer1);
assert_eq!(detector.peer_count(), 1);
assert_eq!(detector.outbound_count(), 0);
}
#[test]
fn test_tip_agreement() {
let detector = create_detector();
// Set our tips
let our_tip = Hash256::from_bytes([1u8; 32]);
detector.set_local_tips(vec![our_tip]);
// Add peers
for i in 0..5 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, true);
// Some peers agree, some don't
if i < 3 {
detector.update_peer_tips(&peer_id, vec![our_tip]);
} else {
detector.update_peer_tips(&peer_id, vec![Hash256::from_bytes([2u8; 32])]);
}
}
let agreement = detector.calculate_tip_agreement();
assert!((agreement - 0.6).abs() < 0.01); // 3 out of 5 = 60%
}
#[test]
fn test_partition_reason_descriptions() {
let reasons = vec![
PartitionReason::InsufficientPeers {
current: 1,
required: 3,
},
PartitionReason::NoOutboundConnections,
PartitionReason::BlockProductionStalled { duration_secs: 60 },
];
for reason in reasons {
let desc = reason.description();
assert!(!desc.is_empty());
}
}
#[test]
fn test_stats() {
let detector = create_detector();
// Add some peers
for i in 0..3 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, i.is_multiple_of(2));
detector.update_peer_blue_score(&peer_id, 1000 + i as u64);
}
detector.set_local_blue_score(500);
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let stats = detector.stats();
assert_eq!(stats.peer_count, 3);
assert_eq!(stats.outbound_count, 2);
assert_eq!(stats.inbound_count, 1);
assert_eq!(stats.local_blue_score, 500);
assert_eq!(stats.network_blue_score, 1002);
}
#[test]
fn test_should_alert() {
let config = PartitionConfig {
min_peers: 3,
..Default::default()
};
let detector = PartitionDetector::new(config);
// With no peers, we're partitioned
let _ = detector.force_check();
// First call should return true
assert!(detector.should_alert());
// Second call should return false (already alerted)
assert!(!detector.should_alert());
// Add enough peers to recover
for i in 0..5 {
let peer_id = random_peer_id();
let ip = Some(IpAddr::V4(Ipv4Addr::new(10, i, 1, 1)));
detector.record_peer_connected(peer_id, ip, true);
}
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
let _ = detector.force_check();
// Clear all peers again
detector.clear();
let _ = detector.force_check();
// Should alert again after recovery
assert!(detector.should_alert());
}
#[test]
fn test_clear() {
let detector = create_detector();
// Add some state
detector.record_peer_connected(random_peer_id(), None, true);
detector.set_local_tips(vec![Hash256::from_bytes([1u8; 32])]);
detector.set_local_blue_score(1000);
detector.record_block_received(Hash256::from_bytes([1u8; 32]));
assert_eq!(detector.peer_count(), 1);
// Clear
detector.clear();
assert_eq!(detector.peer_count(), 0);
assert_eq!(detector.calculate_block_rate(), 0.0);
}
}