e711cf3454
The follow-up backlog had drifted into three settled peer/runtime issues: the legacy game-list fallback contradicted the one-wire-version policy, the Tauri shell still re-derived local install state from disk after peer snapshots, and `Availability::Downloading` existed even though active operations are already reported through a separate operation table. Remove the legacy `AnnounceGames` request and fallback service. Discovery now ignores peers that do not advertise the current protocol and a peer id, and library changes are sent through the current delta path only. This keeps the runtime aligned with the documented current-build-only interoperability model. Make peer `LocalGamesUpdated` snapshots authoritative for local fields in the Tauri database. The GUI-side catalog still owns static metadata such as names, sizes, and descriptions, but downloaded, installed, local version, and availability now come from the peer runtime instead of a second whole-library filesystem scan. Snapshot reconciliation also pins the missing-begin and missing-finish lifecycle cases in tests. Collapse availability back to the settled `Ready` and `LocalOnly` states. Aggregation now counts only `Ready` peers as download sources, and the frontend no longer carries a dead `Downloading` enum value. The core peer also exposes the small non-GUI hooks needed by scripted callers: startup options for state and mDNS, a local-ready event, direct connection, peer snapshots, and an explicit post-download install policy. Those hooks reuse the same current protocol path and do not add compatibility shims. Test Plan: - `git diff --check` - `just fmt` - `just clippy` - `just test` Refs: BACKLOG.md, FINDINGS.md, IMPL_DECISIONS.md
466 lines
14 KiB
Rust
466 lines
14 KiB
Rust
//! Peer runtime task startup and shutdown orchestration.
|
|
|
|
use std::{
|
|
any::Any,
|
|
future::Future,
|
|
net::SocketAddr,
|
|
panic::AssertUnwindSafe,
|
|
path::PathBuf,
|
|
sync::Arc,
|
|
time::Duration,
|
|
};
|
|
|
|
use futures::FutureExt as _;
|
|
use tokio::sync::{
|
|
RwLock,
|
|
mpsc::{UnboundedReceiver, UnboundedSender},
|
|
watch,
|
|
};
|
|
use tokio_util::{sync::CancellationToken, task::TaskTracker};
|
|
|
|
use crate::{
|
|
PeerCommand,
|
|
PeerEvent,
|
|
PeerRuntimeComponent,
|
|
Unpacker,
|
|
context::Ctx,
|
|
events,
|
|
network::send_goodbye,
|
|
peer_db::PeerGameDB,
|
|
run_peer,
|
|
services::{
|
|
run_local_game_monitor,
|
|
run_peer_discovery,
|
|
run_ping_service,
|
|
run_server_component,
|
|
},
|
|
};
|
|
|
|
/// Handle to a running peer runtime.
|
|
///
|
|
/// Holds the command sender plus the runtime's shutdown token and a `stopped`
|
|
/// signal so callers can request a clean shutdown and wait for goodbye
|
|
/// notifications to flush.
|
|
pub struct PeerRuntimeHandle {
|
|
tx: UnboundedSender<PeerCommand>,
|
|
shutdown: CancellationToken,
|
|
stopped: watch::Receiver<bool>,
|
|
}
|
|
|
|
impl PeerRuntimeHandle {
|
|
/// Returns a clone of the command channel sender.
|
|
#[must_use]
|
|
pub fn sender(&self) -> UnboundedSender<PeerCommand> {
|
|
self.tx.clone()
|
|
}
|
|
|
|
/// Signals the runtime to shut down. Idempotent.
|
|
pub fn shutdown(&self) {
|
|
self.shutdown.cancel();
|
|
}
|
|
|
|
/// Resolves once the runtime task has fully stopped (services drained,
|
|
/// goodbye notifications sent). Returns even if the runtime stopped
|
|
/// without an explicit shutdown request.
|
|
pub async fn wait_stopped(&mut self) {
|
|
let _ = self.stopped.wait_for(|stopped| *stopped).await;
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
pub(crate) enum SupervisionPolicy {
|
|
Required,
|
|
Restart { backoff: Duration },
|
|
BestEffort,
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments, clippy::implicit_hasher)]
|
|
pub(crate) fn spawn_peer_runtime(
|
|
tx_control: UnboundedSender<PeerCommand>,
|
|
rx_control: UnboundedReceiver<PeerCommand>,
|
|
tx_notify_ui: UnboundedSender<PeerEvent>,
|
|
peer_game_db: Arc<RwLock<PeerGameDB>>,
|
|
peer_id: String,
|
|
game_dir: PathBuf,
|
|
unpacker: Arc<dyn Unpacker>,
|
|
catalog: Arc<RwLock<std::collections::HashSet<String>>>,
|
|
enable_mdns: bool,
|
|
) -> PeerRuntimeHandle {
|
|
let shutdown = CancellationToken::new();
|
|
let task_tracker = TaskTracker::new();
|
|
let (tx_stopped, stopped) = watch::channel(false);
|
|
|
|
let runtime_shutdown = shutdown.clone();
|
|
let runtime_tracker = task_tracker.clone();
|
|
tokio::spawn(async move {
|
|
if let Err(err) = run_peer(
|
|
rx_control,
|
|
tx_notify_ui,
|
|
peer_game_db,
|
|
peer_id,
|
|
game_dir,
|
|
unpacker,
|
|
runtime_shutdown.clone(),
|
|
runtime_tracker.clone(),
|
|
catalog,
|
|
enable_mdns,
|
|
)
|
|
.await
|
|
{
|
|
log::error!("Peer system failed: {err}");
|
|
}
|
|
|
|
runtime_shutdown.cancel();
|
|
runtime_tracker.close();
|
|
runtime_tracker.wait().await;
|
|
if tx_stopped.send(true).is_err() {
|
|
log::debug!("Peer runtime stopped after handle was dropped");
|
|
}
|
|
});
|
|
|
|
PeerRuntimeHandle {
|
|
tx: tx_control,
|
|
shutdown,
|
|
stopped,
|
|
}
|
|
}
|
|
|
|
pub(crate) fn spawn_startup_services(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
|
spawn_quic_server(ctx, tx_notify_ui);
|
|
if ctx.enable_mdns {
|
|
spawn_peer_discovery_service(ctx, tx_notify_ui);
|
|
}
|
|
spawn_peer_liveness_service(ctx, tx_notify_ui);
|
|
spawn_local_library_monitor(ctx, tx_notify_ui);
|
|
}
|
|
|
|
pub(crate) async fn send_goodbye_notifications(ctx: &Ctx) {
|
|
let peer_id = ctx.peer_id.as_ref().clone();
|
|
let peer_addresses = { ctx.peer_game_db.read().await.get_peer_addresses() };
|
|
|
|
futures::future::join_all(
|
|
peer_addresses
|
|
.into_iter()
|
|
.map(|peer_addr| send_goodbye_notification(peer_addr, peer_id.clone())),
|
|
)
|
|
.await;
|
|
}
|
|
|
|
fn spawn_quic_server(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
|
let server_addr = SocketAddr::from(([0, 0, 0, 0], 0));
|
|
let peer_ctx = ctx.to_peer_ctx(tx_notify_ui.clone());
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
let supervisor_tx = tx_notify_ui.clone();
|
|
|
|
spawn_supervised_service(
|
|
&ctx.task_tracker,
|
|
&ctx.shutdown,
|
|
&supervisor_tx,
|
|
PeerRuntimeComponent::QuicServer,
|
|
SupervisionPolicy::Required,
|
|
move || {
|
|
let peer_ctx = peer_ctx.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
async move { run_server_component(server_addr, peer_ctx, tx_notify_ui).await }
|
|
},
|
|
);
|
|
}
|
|
|
|
fn spawn_peer_discovery_service(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
|
let ctx = ctx.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
let task_tracker = ctx.task_tracker.clone();
|
|
let shutdown = ctx.shutdown.clone();
|
|
let supervisor_tx = tx_notify_ui.clone();
|
|
|
|
spawn_supervised_service(
|
|
&task_tracker,
|
|
&shutdown,
|
|
&supervisor_tx,
|
|
PeerRuntimeComponent::Discovery,
|
|
SupervisionPolicy::Restart {
|
|
backoff: Duration::from_secs(5),
|
|
},
|
|
move || {
|
|
let ctx = ctx.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
async move { run_peer_discovery(tx_notify_ui, ctx).await }
|
|
},
|
|
);
|
|
}
|
|
|
|
fn spawn_peer_liveness_service(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
let peer_game_db = ctx.peer_game_db.clone();
|
|
let active_operations = ctx.active_operations.clone();
|
|
let active_downloads = ctx.active_downloads.clone();
|
|
let shutdown = ctx.shutdown.clone();
|
|
let task_tracker = ctx.task_tracker.clone();
|
|
let supervisor_tx = tx_notify_ui.clone();
|
|
|
|
spawn_supervised_service(
|
|
&ctx.task_tracker,
|
|
&ctx.shutdown,
|
|
&supervisor_tx,
|
|
PeerRuntimeComponent::Liveness,
|
|
SupervisionPolicy::Restart {
|
|
backoff: Duration::from_secs(5),
|
|
},
|
|
move || {
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
let peer_game_db = peer_game_db.clone();
|
|
let active_operations = active_operations.clone();
|
|
let active_downloads = active_downloads.clone();
|
|
let shutdown = shutdown.clone();
|
|
let task_tracker = task_tracker.clone();
|
|
async move {
|
|
run_ping_service(
|
|
tx_notify_ui,
|
|
peer_game_db,
|
|
active_operations,
|
|
active_downloads,
|
|
shutdown,
|
|
task_tracker,
|
|
)
|
|
.await
|
|
}
|
|
},
|
|
);
|
|
}
|
|
|
|
fn spawn_local_library_monitor(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
|
let ctx = ctx.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
let task_tracker = ctx.task_tracker.clone();
|
|
let shutdown = ctx.shutdown.clone();
|
|
let supervisor_tx = tx_notify_ui.clone();
|
|
|
|
spawn_supervised_service(
|
|
&task_tracker,
|
|
&shutdown,
|
|
&supervisor_tx,
|
|
PeerRuntimeComponent::LocalMonitor,
|
|
SupervisionPolicy::BestEffort,
|
|
move || {
|
|
let ctx = ctx.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
async move { run_local_game_monitor(tx_notify_ui, ctx).await }
|
|
},
|
|
);
|
|
}
|
|
|
|
async fn send_goodbye_notification(peer_addr: SocketAddr, peer_id: String) {
|
|
match tokio::time::timeout(Duration::from_secs(1), send_goodbye(peer_addr, peer_id)).await {
|
|
Ok(Ok(())) => {}
|
|
Ok(Err(err)) => log::warn!("Failed to send Goodbye to {peer_addr}: {err}"),
|
|
Err(_) => log::warn!("Timed out sending Goodbye to {peer_addr}"),
|
|
}
|
|
}
|
|
|
|
fn spawn_supervised_service<F, Fut>(
|
|
task_tracker: &TaskTracker,
|
|
shutdown: &CancellationToken,
|
|
tx_notify_ui: &UnboundedSender<PeerEvent>,
|
|
component: PeerRuntimeComponent,
|
|
policy: SupervisionPolicy,
|
|
mut make_service: F,
|
|
) where
|
|
F: FnMut() -> Fut + Send + 'static,
|
|
Fut: Future<Output = eyre::Result<()>> + Send + 'static,
|
|
{
|
|
let task_tracker = task_tracker.clone();
|
|
let shutdown = shutdown.clone();
|
|
let tx_notify_ui = tx_notify_ui.clone();
|
|
|
|
task_tracker.spawn(async move {
|
|
loop {
|
|
if shutdown.is_cancelled() {
|
|
break;
|
|
}
|
|
|
|
let result = match AssertUnwindSafe(make_service()).catch_unwind().await {
|
|
Ok(result) => result,
|
|
Err(payload) => Err(eyre::eyre!(
|
|
"component panicked: {}",
|
|
panic_payload_to_string(&payload)
|
|
)),
|
|
};
|
|
if shutdown.is_cancelled() {
|
|
break;
|
|
}
|
|
|
|
match policy {
|
|
SupervisionPolicy::Required => {
|
|
let error = match result {
|
|
Ok(()) => "component exited unexpectedly".to_string(),
|
|
Err(err) => err.to_string(),
|
|
};
|
|
report_required_service_failure(&tx_notify_ui, component, error, &shutdown);
|
|
break;
|
|
}
|
|
SupervisionPolicy::Restart { backoff } => {
|
|
match result {
|
|
Ok(()) => log::warn!("{component:?} exited; restarting in {backoff:?}"),
|
|
Err(err) => {
|
|
log::error!("{component:?} failed: {err}; restarting in {backoff:?}");
|
|
}
|
|
}
|
|
|
|
tokio::select! {
|
|
() = shutdown.cancelled() => break,
|
|
() = tokio::time::sleep(backoff) => {}
|
|
}
|
|
}
|
|
SupervisionPolicy::BestEffort => {
|
|
match result {
|
|
Ok(()) => log::warn!("{component:?} exited"),
|
|
Err(err) => log::error!("{component:?} failed: {err}"),
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
fn report_required_service_failure(
|
|
tx_notify_ui: &UnboundedSender<PeerEvent>,
|
|
component: PeerRuntimeComponent,
|
|
error: String,
|
|
shutdown: &CancellationToken,
|
|
) {
|
|
log::error!("{component:?} failed: {error}");
|
|
events::send(tx_notify_ui, PeerEvent::RuntimeFailed { component, error });
|
|
shutdown.cancel();
|
|
}
|
|
|
|
fn panic_payload_to_string(payload: &(dyn Any + Send)) -> String {
|
|
if let Some(message) = payload.downcast_ref::<&'static str>() {
|
|
return (*message).to_string();
|
|
}
|
|
|
|
if let Some(message) = payload.downcast_ref::<String>() {
|
|
return message.clone();
|
|
}
|
|
|
|
"unknown panic payload".to_string()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::{
|
|
sync::{
|
|
Arc,
|
|
atomic::{AtomicUsize, Ordering},
|
|
},
|
|
time::Duration,
|
|
};
|
|
|
|
use tokio_util::{sync::CancellationToken, task::TaskTracker};
|
|
|
|
use super::{SupervisionPolicy, spawn_supervised_service};
|
|
use crate::{PeerRuntimeComponent, startup::PeerRuntimeHandle};
|
|
|
|
#[tokio::test]
|
|
async fn required_service_failure_cancels_runtime_and_emits_event() {
|
|
let tracker = TaskTracker::new();
|
|
let shutdown = CancellationToken::new();
|
|
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel();
|
|
|
|
spawn_supervised_service(
|
|
&tracker,
|
|
&shutdown,
|
|
&tx,
|
|
PeerRuntimeComponent::QuicServer,
|
|
SupervisionPolicy::Required,
|
|
|| async { Err(eyre::eyre!("bind failed")) },
|
|
);
|
|
|
|
let event = tokio::time::timeout(Duration::from_secs(1), rx.recv())
|
|
.await
|
|
.expect("runtime failure event should arrive")
|
|
.expect("event channel should stay open");
|
|
|
|
assert!(shutdown.is_cancelled());
|
|
assert!(matches!(
|
|
event,
|
|
crate::PeerEvent::RuntimeFailed {
|
|
component: PeerRuntimeComponent::QuicServer,
|
|
..
|
|
}
|
|
));
|
|
|
|
tracker.close();
|
|
tokio::time::timeout(Duration::from_secs(1), tracker.wait())
|
|
.await
|
|
.expect("supervisor task should stop");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn restart_service_restarts_until_shutdown() {
|
|
let tracker = TaskTracker::new();
|
|
let shutdown = CancellationToken::new();
|
|
let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
|
|
let attempts = Arc::new(AtomicUsize::new(0));
|
|
|
|
spawn_supervised_service(
|
|
&tracker,
|
|
&shutdown,
|
|
&tx,
|
|
PeerRuntimeComponent::Discovery,
|
|
SupervisionPolicy::Restart {
|
|
backoff: Duration::from_millis(10),
|
|
},
|
|
{
|
|
let attempts = attempts.clone();
|
|
move || {
|
|
let attempts = attempts.clone();
|
|
async move {
|
|
attempts.fetch_add(1, Ordering::SeqCst);
|
|
Err(eyre::eyre!("discovery worker stopped"))
|
|
}
|
|
}
|
|
},
|
|
);
|
|
|
|
tokio::time::timeout(Duration::from_secs(1), async {
|
|
loop {
|
|
if attempts.load(Ordering::SeqCst) >= 2 {
|
|
break;
|
|
}
|
|
tokio::task::yield_now().await;
|
|
}
|
|
})
|
|
.await
|
|
.expect("restartable service should run more than once");
|
|
|
|
shutdown.cancel();
|
|
tracker.close();
|
|
tokio::time::timeout(Duration::from_secs(1), tracker.wait())
|
|
.await
|
|
.expect("restart supervisor should stop after shutdown");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn runtime_handle_can_shutdown_and_await_stopped() {
|
|
let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
|
|
let shutdown = CancellationToken::new();
|
|
let (tx_stopped, stopped) = tokio::sync::watch::channel(false);
|
|
let mut handle = PeerRuntimeHandle {
|
|
tx,
|
|
shutdown: shutdown.clone(),
|
|
stopped,
|
|
};
|
|
|
|
tokio::spawn(async move {
|
|
shutdown.cancelled().await;
|
|
let _ = tx_stopped.send(true);
|
|
});
|
|
|
|
handle.shutdown();
|
|
tokio::time::timeout(Duration::from_secs(1), handle.wait_stopped())
|
|
.await
|
|
.expect("runtime handle should observe stopped");
|
|
}
|
|
}
|