From 0f101084385639125b5bb92469f8b0d4289eea80 Mon Sep 17 00:00:00 2001 From: ddidderr Date: Wed, 20 May 2026 21:08:06 +0200 Subject: [PATCH] perf(peer): widen LAN bulk-transfer windows and buffers Centralize the bulk-transfer sizing in config.rs and bump the values used on both ends of a QUIC connection: - CHUNK_SIZE: 32 MiB -> 128 MiB - QUIC_CONNECTION_DATA_WINDOW: 64 MiB -> 256 MiB - QUIC_STREAM_DATA_WINDOW: 32 MiB -> 128 MiB - QUIC_MAX_SEND_BUFFER_SIZE: 32 MiB -> 128 MiB - QUIC_INITIAL_CONGESTION_WINDOW: 1 MiB -> 4 MiB - FILE_TRANSFER_BUFFER_SIZE: 64 KiB -> 1 MiB (new constant) The previous 32 MiB stream window was already comfortably above the bandwidth-delay product of a sub-millisecond LAN at 2.5 GbE. The further bump is deliberately generous: the goal is to push flow control and per-syscall overhead far enough out of the way that they cannot be the suspect when isolating the remaining LAN download bottleneck (disk, NIC, or s2n-quic platform offload on the sending host). Memory pressure from the larger windows is not observable on a desktop client moving GB-sized games. stream_file_bytes previously read the local file in 64 KiB chunks. At multi-Gbit/s send rates that produced many thousands of disk reads per second; bumping to 1 MiB keeps the per-file syscall load modest with no measurable latency cost on streamed bulk transfers. The buffer size lives in config.rs as FILE_TRANSFER_BUFFER_SIZE so it stays adjustable from one place. Also add a started/MiB-per-second log line at info level when a file finishes streaming. This matches the S37 measurement methodology already used in the peer-cli harness and makes per-file send throughput visible in normal operation. The peer-cli extended-scenarios harness uses CHUNK_SIZE as the tolerance bound for chunk-boundary variance in its assertions, so its constant is bumped to match. The multi-chunk planning unit test is rewritten to reference CHUNK_SIZE symbolically (CHUNK_SIZE * 3 + CHUNK_SIZE / 2) instead of a hardcoded 120 MiB; the previous literal would silently degrade into a single-chunk test at the new chunk size and stop exercising the spread-across-peers code path. Test Plan: - just fmt - just clippy - just test - python3 crates/lanspread-peer-cli/scripts/run_extended_scenarios.py S37 \ --build-image - python3 crates/lanspread-peer-cli/scripts/run_extended_scenarios.py S37 Refs: local LAN download performance investigation on 2026-05-20. Depends-on: d7f7dc737e06 (QUIC UDP socket buffer sizing). --- .../scripts/run_extended_scenarios.py | 2 +- crates/lanspread-peer/src/config.rs | 27 ++++++++++--------- .../lanspread-peer/src/download/planning.rs | 2 +- crates/lanspread-peer/src/peer.rs | 16 ++++++++--- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py b/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py index 79d400c..5ff37e5 100644 --- a/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py +++ b/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py @@ -25,7 +25,7 @@ NETWORK = "lanspread" CONTAINER_PREFIX = "lanspread-peer-cli-ext" CATALOG_DB = "/app/game.db" FIXTURES = REPO / "crates" / "lanspread-peer-cli" / "fixtures" -CHUNK_SIZE = 32 * 1024 * 1024 +CHUNK_SIZE = 128 * 1024 * 1024 PERF_GAME_ID = "bf1942" PERF_GAME_SIZE = 2 * 1024 * 1024 * 1024 IGNORED_DIFF_NAMES = {".lanspread", ".lanspread.json", "local"} diff --git a/crates/lanspread-peer/src/config.rs b/crates/lanspread-peer/src/config.rs index b8473f1..deb14af 100644 --- a/crates/lanspread-peer/src/config.rs +++ b/crates/lanspread-peer/src/config.rs @@ -11,29 +11,32 @@ pub const PEER_PING_IDLE_SECS: u64 = 30; /// Timeout after which a peer is considered stale (seconds). pub const PEER_STALE_TIMEOUT_SECS: u64 = 90; -/// Size of each download chunk (32 MB). -pub const CHUNK_SIZE: u64 = 32 * 1024 * 1024; +/// Size of each download chunk (128 MiB). +pub const CHUNK_SIZE: u64 = 128 * 1024 * 1024; /// Number of chunk streams to keep in flight on one peer connection. /// -/// Four 32 MB chunks hide request/stream setup latency on fast LAN links without -/// opening an unbounded number of file handles or competing writes. +/// Four chunk streams hide request/stream setup latency on fast LAN links +/// without opening an unbounded number of file handles or competing writes. pub const PEER_DOWNLOAD_STREAM_WINDOW: usize = 4; +/// Application-level read buffer used when sending file bytes over QUIC (1 MiB). +pub const FILE_TRANSFER_BUFFER_SIZE: usize = 1024 * 1024; + /// Maximum number of retry attempts for failed chunk downloads. pub const MAX_RETRY_COUNT: usize = 3; -/// QUIC connection-level receive window for bulk LAN transfers (64 MiB). -pub const QUIC_CONNECTION_DATA_WINDOW: u64 = 64 * 1024 * 1024; +/// QUIC connection-level receive window for bulk LAN transfers (256 MiB). +pub const QUIC_CONNECTION_DATA_WINDOW: u64 = 256 * 1024 * 1024; -/// QUIC per-stream receive window for bulk LAN transfers (32 MiB). -pub const QUIC_STREAM_DATA_WINDOW: u64 = 32 * 1024 * 1024; +/// QUIC per-stream receive window for bulk LAN transfers (128 MiB). +pub const QUIC_STREAM_DATA_WINDOW: u64 = 128 * 1024 * 1024; -/// Maximum queued send data per QUIC stream (32 MiB). -pub const QUIC_MAX_SEND_BUFFER_SIZE: u32 = 32 * 1024 * 1024; +/// Maximum queued send data per QUIC stream (128 MiB). +pub const QUIC_MAX_SEND_BUFFER_SIZE: u32 = 128 * 1024 * 1024; -/// Initial congestion window for LAN-oriented BBR transfers (1 MiB). -pub const QUIC_INITIAL_CONGESTION_WINDOW: u32 = 1024 * 1024; +/// Initial congestion window for LAN-oriented BBR transfers (4 MiB). +pub const QUIC_INITIAL_CONGESTION_WINDOW: u32 = 4 * 1024 * 1024; /// Requested OS UDP send and receive buffer size for QUIC sockets (4 MiB). pub const QUIC_SOCKET_BUFFER_SIZE: usize = 4 * 1024 * 1024; diff --git a/crates/lanspread-peer/src/download/planning.rs b/crates/lanspread-peer/src/download/planning.rs index 4d2d138..417f412 100644 --- a/crates/lanspread-peer/src/download/planning.rs +++ b/crates/lanspread-peer/src/download/planning.rs @@ -247,7 +247,7 @@ mod tests { fn build_peer_plans_spreads_large_file_chunks_across_shared_peers() { let peers = vec![loopback_addr(12000), loopback_addr(12001)]; let large_file = "game/large.eti"; - let file_size = 120 * 1024 * 1024; + let file_size = CHUNK_SIZE * 3 + CHUNK_SIZE / 2; let mut file_peer_map = HashMap::new(); file_peer_map.insert("game/version.ini".to_string(), peers.clone()); file_peer_map.insert(large_file.to_string(), peers.clone()); diff --git a/crates/lanspread-peer/src/peer.rs b/crates/lanspread-peer/src/peer.rs index b1d46a8..43c358b 100644 --- a/crates/lanspread-peer/src/peer.rs +++ b/crates/lanspread-peer/src/peer.rs @@ -12,7 +12,7 @@ use tokio::{ time::Instant, }; -use crate::path_validation::validate_game_file_path; +use crate::{config::FILE_TRANSFER_BUFFER_SIZE, path_validation::validate_game_file_path}; async fn stream_file_bytes( tx: &mut SendStream, @@ -40,8 +40,9 @@ async fn stream_file_bytes( let mut transfer_complete = matches!(length, Some(0)); let mut total_bytes = 0u64; let mut last_total_bytes = 0u64; + let started = Instant::now(); let mut timestamp = Instant::now(); - let mut buf = vec![0u8; 64 * 1024]; + let mut buf = vec![0u8; FILE_TRANSFER_BUFFER_SIZE]; while remaining > 0 { let read_len = std::cmp::min(remaining, buf.len() as u64); @@ -84,8 +85,15 @@ async fn stream_file_bytes( } } - log::debug!( - "{remote_addr} finished streaming file bytes: {}, total_bytes: {total_bytes}", + let elapsed = started.elapsed(); + #[allow(clippy::cast_precision_loss)] + let mib_per_s = if elapsed.as_secs_f64() > 0.0 { + total_bytes as f64 / elapsed.as_secs_f64() / (1024.0 * 1024.0) + } else { + 0.0 + }; + log::info!( + "{remote_addr} finished streaming file bytes: {}, total_bytes: {total_bytes}, MiB/s: {mib_per_s:.2}", validated_path.display() );