From 88bfaeb04abab223b22721893f0a2738eff4aaf8 Mon Sep 17 00:00:00 2001 From: ddidderr Date: Sun, 7 Jun 2026 22:14:41 +0200 Subject: [PATCH] test(peer-cli): cover streamed retry fallback NEXT_STEPS item 5 needs streamed installs to have an explicit retry policy. The handler already retries whole-stream attempts across the majority-validated peer set, so add S42 to prove that behavior with the Docker harness instead of leaving it implicit. S42 starts two catalog-version-matching `cnctw` sources. The first source sorts first in retry order but has `--unrar /missing-unrar`, so its stream attempt fails before sending chunks. The second source then completes a fresh whole-stream attempt. The scenario asserts local-only installed state, no root archive or sentinel, no `.local.installing` staging leftover, chunk events only from the good source, matching streamed byte count, and SHA-256 payload equality against the good source's `unrar p`. This pins the current policy: retry the entire stream from another validated peer, do not preserve partial files across attempts, and do not promise byte-offset resume. Test Plan: - python3 -m py_compile crates/lanspread-peer-cli/scripts/run_extended_scenarios.py - python3 crates/lanspread-peer-cli/scripts/run_extended_scenarios.py S42 - git diff --check - git diff --cached --check Refs: NEXT_STEPS.md item 5 --- NEXT_STEPS.md | 13 ++- PEER_CLI_SCENARIOS.md | 22 ++++ .../scripts/run_extended_scenarios.py | 101 +++++++++++++++++- 3 files changed, 128 insertions(+), 8 deletions(-) diff --git a/NEXT_STEPS.md b/NEXT_STEPS.md index 33df769..91c00b7 100644 --- a/NEXT_STEPS.md +++ b/NEXT_STEPS.md @@ -38,14 +38,13 @@ product-ready. step: add catalog-owned archive or extracted-file SHA-256 hashes, then verify those at the receiver before commit. -5. **Upgrade retry/resume semantics** +5. **Done — Upgrade retry/resume semantics** - Right now, failed stream means failed operation and rollback. Next useful - step: - - - retry whole stream from another trusted peer - - later, maybe keep completed files and restart only the interrupted file - - avoid byte-offset resume until there’s a strong reason + Streamed install attempts now use the same majority-validated peer set as + normal downloads, and each failed attempt rolls back its staging transaction + before trying the next peer. S42 pins the policy: retry the whole stream from + another validated peer, keep no partial files across attempts, and do not add + byte-offset resume until there is a strong reason. 6. **Expand scenario coverage** diff --git a/PEER_CLI_SCENARIOS.md b/PEER_CLI_SCENARIOS.md index 4cc077a..1e1c27a 100644 --- a/PEER_CLI_SCENARIOS.md +++ b/PEER_CLI_SCENARIOS.md @@ -49,6 +49,7 @@ for deterministic local runs; mDNS/macvlan remains an environment smoke path. | S39 | Streamed install without keeping archive payload | Empty client connects to `fixture-bravo`, then sends `stream-install cnctw`. The source has real RAR `.eti` payload entries under `bin/` and `data/`; the receiver uses the container-bundled `unrar` stream provider. | Client emits `got-game-files`, `download-begin`, streamed `download-chunk-finished`, `download-finished`, `install-begin`, and `install-finished`. Local `cnctw` is `downloaded=false`, `installed=true`, `availability=LocalOnly`; root `version.ini` and `.eti` are absent; `local/bin/cnctw-payload.bin` and `local/data/cnctw-assets.dat` match `unrar p` output by SHA-256. | | S40 | Streamed install receiver is not a peer source | After S39, a third peer connects only to the streamed-install receiver. | The third peer may see the receiver's local-only summary in peer snapshots, but `list-games` remote aggregation does not expose `cnctw` as downloadable, `peer_count` remains zero/absent, and attempting `download cnctw` fails with no local files created. | | S41 | Solid archive streamed install | Empty client connects to a peer serving `fixture-solid/cnctw`, whose `.eti` is a real solid RAR archive. The receiver uses the container-bundled `unrar` stream provider. | The fixture is verified as solid with `unrar lt`; streamed install finishes with `downloaded=false`, `installed=true`, `availability=LocalOnly`; root archive and `version.ini` are absent; streamed byte count equals the extracted solid entries; local payload SHA-256 hashes match `unrar p` output. | +| S42 | Streamed install whole-stream retry | Empty client connects to two peers serving the same catalog-version `cnctw`: one broken source whose `--unrar` path is missing, followed by one good source. | The broken source sorts before the good source in retry order, contributes zero chunks, and the good source completes a fresh whole-stream attempt. The final state is local-only installed, no root archive/sentinel, no `.local.installing`, byte count matches the extracted entries, and payload hashes match the good source. | ## Version-Skew Contract @@ -129,9 +130,30 @@ Use S39-S41 to pin down low-disk streamed installs: - S41 verifies the fixture is actually solid inside the source container, so solid handling stays covered by the same Docker harness as the existing streamed-install scenarios. +- S42 verifies retry/resume semantics: failed streamed attempts roll back their + staging directory and retry the whole stream from another validated peer. + There is no byte-offset resume contract. ## Run Log +### 2026-06-07 - Streamed Install Whole-Stream Retry (S42) + +- Code under test added S42 in `run_extended_scenarios.py`. +- Gates before Docker: `python3 -m py_compile + crates/lanspread-peer-cli/scripts/run_extended_scenarios.py` passed. +- Runner: + `python3 crates/lanspread-peer-cli/scripts/run_extended_scenarios.py S42` + passed against the current `lanspread-peer-cli:dev` image. +- S42 started a broken source with `--unrar /missing-unrar` and a good source + with the same catalog-version `cnctw` metadata. The broken source sorted first + (`10.66.0.2:32897`) and the good source second (`10.66.0.3:34092`). +- The broken source contributed zero chunks; the good source completed the fresh + whole-stream attempt with `3145728` streamed file bytes. +- The final client state was `downloaded=false`, `installed=true`, + `availability=LocalOnly`, with no root `version.ini`, no root `cnctw.eti`, + and no `.local.installing` staging directory. Payload SHA-256 hashes matched + the good source's `unrar p` output. + ### 2026-06-07 - Solid Streamed Install Coverage (S41) - Code under test added `fixture-solid/cnctw`, a real solid RAR `.eti`, plus diff --git a/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py b/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py index 0552fe4..6e3dbcc 100644 --- a/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py +++ b/crates/lanspread-peer-cli/scripts/run_extended_scenarios.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 -"""Run the peer-cli scenarios S1-S41 through Docker.""" +"""Run the peer-cli scenarios S1-S42 through Docker.""" from __future__ import annotations import argparse import hashlib +import ipaddress import json import os import queue @@ -329,6 +330,7 @@ class Runner: ("S39", self.s39_streamed_install_local_only), ("S40", self.s40_streamed_receiver_not_source), ("S41", self.s41_solid_archive_streamed_install), + ("S42", self.s42_streamed_install_retries_next_source), ] for scenario_id, scenario in scenarios: @@ -1250,6 +1252,95 @@ class Runner: f"payload hashes={actual}, bytes={streamed_bytes}" ) + def s42_streamed_install_retries_next_source(self) -> str: + bad_dir = self.fixture_root / "s42-bad-source" + good_dir = self.fixture_root / "s42-good-source" + copy_game("cnctw", bad_dir, version="20160128") + copy_game("cnctw", good_dir, version="20160128") + + bad = self.peer( + "s42-bad-source", + games_dir=bad_dir, + extra_args=["--unrar", "/missing-unrar"], + ) + good = self.peer("s42-good-source", games_dir=good_dir) + if socket_addr_sort_key(bad.ready_addr) > socket_addr_sort_key(good.ready_addr): + raise ScenarioError( + "S42 requires the broken source to sort before the good source; " + f"bad={bad.ready_addr}, good={good.ready_addr}" + ) + + client = self.peer("s42-client") + connect_many(client, [bad, good]) + wait_remote_game(client, "cnctw", peer_count=2, version="20160128") + + waiter = LineWaiter(len(client.output)) + client.send({"cmd": "stream-install", "game_id": "cnctw"}) + client.wait_for( + event_is("got-game-files", "cnctw"), + timeout=20, + description="got retry cnctw files", + waiter=waiter, + ) + client.wait_for( + event_is("download-finished", "cnctw"), + timeout=60, + description="retry stream finish cnctw", + waiter=waiter, + ) + client.wait_for( + event_is("install-finished", "cnctw"), + timeout=30, + description="retry stream install cnctw", + waiter=waiter, + ) + + game = wait_local_game(client, "cnctw", downloaded=False, installed=True) + assert_game_state( + game, + downloaded=False, + installed=True, + availability="LocalOnly", + ) + game_root = client.host_games_dir / "cnctw" + assert_not_exists(game_root / ".local.installing") + assert_not_exists(game_root / "version.ini") + assert_not_exists(game_root / "cnctw.eti") + assert_only_chunk_sources(client, "cnctw", {good.ready_addr}) + + expected = { + "bin/cnctw-payload.bin": unrar_entry_sha256( + good, "cnctw", "bin/cnctw-payload.bin" + ), + "data/cnctw-assets.dat": unrar_entry_sha256( + good, "cnctw", "data/cnctw-assets.dat" + ), + } + actual = { + rel: sha256_file(game_root / "local" / rel) + for rel in expected + } + if actual != expected: + raise ScenarioError(f"retry streamed payload hashes mismatched: {actual} != {expected}") + + streamed_bytes = sum( + int(item.get("data", {}).get("length", 0)) + for item in client.output + if item.get("type") == "event" + and item.get("event") == "download-chunk-finished" + and item.get("data", {}).get("game_id") == "cnctw" + ) + expected_bytes = 3 * 1024 * 1024 + if streamed_bytes != expected_bytes: + raise ScenarioError( + f"retry streamed byte count mismatch: {streamed_bytes} != {expected_bytes}" + ) + + return ( + "broken first source failed without chunks, next source completed whole stream; " + f"good={good.ready_addr}, bad={bad.ready_addr}, bytes={streamed_bytes}" + ) + def run(command: list[str], description: str) -> subprocess.CompletedProcess[str]: result = subprocess.run( @@ -1402,6 +1493,14 @@ def assert_peer_rar_archive_solid(peer: Peer, game_id: str) -> None: raise ScenarioError(f"RAR archive details were not reported: {game_id}") +def socket_addr_sort_key(addr: str | None) -> tuple[int, int]: + if addr is None: + raise ScenarioError("cannot sort missing peer address") + host, port = addr.rsplit(":", 1) + host = host.removeprefix("[").removesuffix("]") + return (int(ipaddress.ip_address(host)), int(port)) + + def format_bytes(size: int) -> str: return f"{size / 1024 / 1024 / 1024:.2f} GiB"