Security (Desktop Wallet): - Implement BIP39 mnemonic generation with cryptographic RNG - Add Argon2id password-based key derivation (64MB, 3 iterations) - Add ChaCha20-Poly1305 authenticated encryption for seed storage - Add mnemonic auto-clear (60s timeout) and clipboard auto-clear (30s) - Add sanitized error logging to prevent credential leaks - Strengthen CSP with object-src, base-uri, form-action, frame-ancestors - Clear sensitive state on component unmount Explorer (Gas Estimator): - Add Gas Estimation page with from/to/amount/data inputs - Add bech32 address validation (synor1/tsynor1 prefix) - Add BigInt-based amount parsing to avoid floating point errors - Add production guard for mock mode (cannot enable in prod builds) Monitoring (30-day Testnet): - Add Prometheus config with 30-day retention - Add comprehensive alert rules for node health, consensus, network, mempool - Add Alertmanager with severity-based routing and inhibition rules - Add Grafana with auto-provisioned datasource and dashboard - Add Synor testnet dashboard with uptime SLA tracking Docker: - Update docker-compose.testnet.yml with monitoring profile - Fix node-exporter for macOS Docker Desktop compatibility - Change Grafana port to 3001 to avoid conflict
172 lines
6.1 KiB
YAML
172 lines
6.1 KiB
YAML
# Synor Testnet Alert Rules
|
|
# For 30-day stability validation
|
|
|
|
groups:
|
|
# ==========================================================================
|
|
# Node Health Alerts
|
|
# ==========================================================================
|
|
- name: synor_node_health
|
|
interval: 30s
|
|
rules:
|
|
# Node Down Alert
|
|
- alert: SynorNodeDown
|
|
expr: up{job="synor-nodes"} == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Synor node {{ $labels.instance }} is down"
|
|
description: "Node {{ $labels.instance }} has been unreachable for more than 2 minutes."
|
|
|
|
# Node Restarted
|
|
- alert: SynorNodeRestarted
|
|
expr: changes(process_start_time_seconds{job="synor-nodes"}[5m]) > 0
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Synor node {{ $labels.instance }} restarted"
|
|
description: "Node has restarted in the last 5 minutes."
|
|
|
|
# ==========================================================================
|
|
# Consensus Alerts
|
|
# ==========================================================================
|
|
- name: synor_consensus
|
|
interval: 1m
|
|
rules:
|
|
# No new blocks for 10 minutes (at 10 BPS, this is critical)
|
|
- alert: SynorNoNewBlocks
|
|
expr: increase(synor_block_count_total[10m]) == 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "No new blocks produced on {{ $labels.instance }}"
|
|
description: "No blocks have been produced in the last 10 minutes. Consensus may be stalled."
|
|
|
|
# Block rate too low (< 5 BPS when target is 10)
|
|
- alert: SynorLowBlockRate
|
|
expr: rate(synor_block_count_total[5m]) < 5
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Low block rate on {{ $labels.instance }}"
|
|
description: "Block rate is {{ $value | humanize }}/s (target: 10/s)"
|
|
|
|
# DAA Score not increasing
|
|
- alert: SynorDaaScoreStalled
|
|
expr: increase(synor_daa_score[5m]) == 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "DAA score stalled on {{ $labels.instance }}"
|
|
description: "DAA score has not increased in 5 minutes."
|
|
|
|
# ==========================================================================
|
|
# Network Alerts
|
|
# ==========================================================================
|
|
- name: synor_network
|
|
interval: 1m
|
|
rules:
|
|
# Low peer count
|
|
- alert: SynorLowPeerCount
|
|
expr: synor_peer_count < 2
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Low peer count on {{ $labels.instance }}"
|
|
description: "Node has only {{ $value }} peers (minimum recommended: 3)"
|
|
|
|
# Network partition (node isolated)
|
|
- alert: SynorNetworkPartition
|
|
expr: synor_peer_count == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Node {{ $labels.instance }} is isolated"
|
|
description: "Node has 0 peers - possible network partition."
|
|
|
|
# ==========================================================================
|
|
# Mempool Alerts
|
|
# ==========================================================================
|
|
- name: synor_mempool
|
|
interval: 1m
|
|
rules:
|
|
# Mempool growing too large
|
|
- alert: SynorMempoolOverflow
|
|
expr: synor_mempool_size > 10000
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Mempool overflow on {{ $labels.instance }}"
|
|
description: "Mempool has {{ $value }} transactions (threshold: 10000)"
|
|
|
|
# Mempool not draining
|
|
- alert: SynorMempoolStale
|
|
expr: synor_mempool_size > 100 and increase(synor_mempool_txs_removed[10m]) == 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Mempool not draining on {{ $labels.instance }}"
|
|
description: "Mempool has {{ $value }} transactions but none are being processed."
|
|
|
|
# ==========================================================================
|
|
# Resource Alerts
|
|
# ==========================================================================
|
|
- name: synor_resources
|
|
interval: 30s
|
|
rules:
|
|
# High CPU usage
|
|
- alert: SynorHighCpuUsage
|
|
expr: rate(process_cpu_seconds_total{job="synor-nodes"}[5m]) > 0.9
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High CPU usage on {{ $labels.instance }}"
|
|
description: "CPU usage is {{ $value | humanizePercentage }}"
|
|
|
|
# High memory usage
|
|
- alert: SynorHighMemoryUsage
|
|
expr: process_resident_memory_bytes{job="synor-nodes"} > 4e9
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High memory usage on {{ $labels.instance }}"
|
|
description: "Memory usage is {{ $value | humanize1024 }}"
|
|
|
|
# Disk space low (host)
|
|
- alert: SynorLowDiskSpace
|
|
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Low disk space on host"
|
|
description: "Only {{ $value | humanizePercentage }} disk space remaining"
|
|
|
|
# ==========================================================================
|
|
# Uptime Tracking (for 99.9% SLA)
|
|
# ==========================================================================
|
|
- name: synor_uptime
|
|
interval: 1m
|
|
rules:
|
|
# Record uptime for SLA calculation
|
|
- record: synor:uptime_ratio:30d
|
|
expr: avg_over_time(up{job="synor-nodes"}[30d])
|
|
|
|
# Alert if below 99.9% uptime target
|
|
- alert: SynorUptimeBelowSLA
|
|
expr: synor:uptime_ratio:30d < 0.999
|
|
for: 1h
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Uptime below SLA target"
|
|
description: "30-day uptime is {{ $value | humanizePercentage }} (target: 99.9%)"
|