From 9ca0164d0b0a2da8414320763dcacbd440617d1d Mon Sep 17 00:00:00 2001 From: Vassiliy Yegorov Date: Mon, 15 Jun 2026 10:22:24 +0700 Subject: [PATCH] fix(app): bridge auto-reconnect so daemon restart no longer bricks the GUI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Tauri bridge connected to the daemon once at startup and held a single stream with no recovery: when the daemon exited (Restart/Stop, crash, or an update), the reader emitted spacesh:disconnected and died, and every later request went through the dead writer forever — the GUI was permanently stuck (settings frozen, offline). Since the bridge is Rust-side state that survives a webview reload, even Cmd+R didn't recover it. - bridge.rs: requests now reconnect-and-retry on failure with a single-flight guard (generation counter) so concurrent failures collapse into one reconnect and never open duplicate connections; a 5s reply timeout catches silently-dropped connections. ensure_daemon respawns the daemon if it exited. On success the bridge emits spacesh:reconnected. - App.tsx: on spacesh:reconnected, bump a connection epoch that keys LayoutEngine, remounting terminals so they re-attach (snapshot + live stream) to the restarted daemon; also reload health/config/status. - Settings: drop the Stop button — with lazy daemon spawn any GUI request resurrects the daemon, so an in-GUI "stop" is contradictory. Restart now works end to end (shutdown → reconnect respawns → panels re-attach). Co-Authored-By: Claude Opus 4.8 (1M context) --- app/src-tauri/src/bridge.rs | 98 ++++++++++++++++++++++++++++++++----- app/src/App.tsx | 15 +++++- app/src/Settings.tsx | 15 +++--- 3 files changed, 105 insertions(+), 23 deletions(-) diff --git a/app/src-tauri/src/bridge.rs b/app/src-tauri/src/bridge.rs index 5548541..da13d69 100644 --- a/app/src-tauri/src/bridge.rs +++ b/app/src-tauri/src/bridge.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use std::time::Duration; use anyhow::{Context, Result}; use base64::Engine; @@ -18,8 +19,16 @@ use tokio::sync::{mpsc, oneshot, Mutex}; pub struct Bridge { next_id: AtomicU64, - /// Outbound frames to the daemon. - tx: mpsc::Sender, + /// For respawning/reconnecting the daemon connection after it drops. + app: AppHandle, + sock: PathBuf, + /// Bumped on every successful reconnect; lets concurrent failing requests + /// collapse into a single reconnect (single-flight). + gen: AtomicU64, + /// Outbound frames to the daemon. Swapped on reconnect. + tx: Mutex>, + /// Serializes reconnect attempts. + reconnect_lock: Mutex<()>, /// Pending request id → reply slot. pending: Arc>>>, /// surface id → output channel into the webview. @@ -85,28 +94,91 @@ async fn ensure_daemon(sock: &PathBuf) -> Result { anyhow::bail!("daemon spawned ({}) but did not bind {} in time", daemon.display(), sock.display()) } +/// Connect (spawning the daemon if needed) and start the reader/writer tasks, +/// returning the outbound sender. Shared `pending`/`out_channels` are reused so +/// replies and live output keep routing across reconnects. +async fn spawn_connection( + sock: &PathBuf, + app: &AppHandle, + pending: Arc>>>, + out_channels: Arc>>>>, +) -> Result> { + let stream = ensure_daemon(sock).await?; + let (read_half, write_half) = stream.into_split(); + let (tx, rx) = mpsc::channel::(256); + spawn_writer(write_half, rx); + spawn_reader(read_half, app.clone(), pending, out_channels); + Ok(tx) +} + impl Bridge { pub async fn connect(app: AppHandle) -> Result { let sock = socket_path()?; - let stream = ensure_daemon(&sock).await?; - let (read_half, write_half) = stream.into_split(); - - let (tx, rx) = mpsc::channel::(256); let pending: Arc>>> = Arc::default(); let out_channels: Arc>>>> = Arc::default(); + let tx = spawn_connection(&sock, &app, pending.clone(), out_channels.clone()).await?; + Ok(Self { + next_id: AtomicU64::new(1), + app, + sock, + gen: AtomicU64::new(0), + tx: Mutex::new(tx), + reconnect_lock: Mutex::new(()), + pending, + out_channels, + }) + } - spawn_writer(write_half, rx); - spawn_reader(read_half, app, pending.clone(), out_channels.clone()); + /// Re-establish the daemon connection. Single-flight: callers pass the `gen` + /// they observed; if another caller already reconnected (gen advanced), this + /// is a no-op so we never open duplicate connections. + async fn reconnect(&self, seen_gen: u64) -> Result<()> { + let _guard = self.reconnect_lock.lock().await; + if self.gen.load(Ordering::Acquire) != seen_gen { + return Ok(()); + } + // Drop in-flight reply slots — their connection is gone; they'll error out. + self.pending.lock().await.clear(); + let new_tx = spawn_connection(&self.sock, &self.app, self.pending.clone(), self.out_channels.clone()).await?; + *self.tx.lock().await = new_tx; + self.gen.fetch_add(1, Ordering::Release); + let _ = self.app.emit("spacesh:reconnected", ()); + Ok(()) + } - Ok(Self { next_id: AtomicU64::new(1), tx, pending, out_channels }) + /// Send one request and await its reply with a timeout. Errors if the writer + /// is gone, the reply slot is dropped, or no reply arrives in time. + async fn send_once(&self, id: u64, env: Envelope) -> Result { + let (reply_tx, reply_rx) = oneshot::channel(); + self.pending.lock().await.insert(id, reply_tx); + let tx = self.tx.lock().await.clone(); + if tx.send(env).await.is_err() { + self.pending.lock().await.remove(&id); + anyhow::bail!("daemon writer closed"); + } + match tokio::time::timeout(Duration::from_secs(5), reply_rx).await { + Ok(Ok(env)) => Ok(env), + Ok(Err(_)) => anyhow::bail!("reply slot dropped"), + Err(_) => { + self.pending.lock().await.remove(&id); + anyhow::bail!("request timed out") + } + } } pub async fn request(&self, cmd: Cmd) -> Result { let id = self.next_id.fetch_add(1, Ordering::Relaxed); - let (reply_tx, reply_rx) = oneshot::channel(); - self.pending.lock().await.insert(id, reply_tx); - self.tx.send(Envelope::Req { id, cmd }).await?; - Ok(reply_rx.await?) + let seen_gen = self.gen.load(Ordering::Acquire); + let env = Envelope::Req { id, cmd }; + match self.send_once(id, env.clone()).await { + Ok(reply) => Ok(reply), + Err(_) => { + // Connection likely dropped — reconnect (respawns the daemon if + // it exited) and retry once. + self.reconnect(seen_gen).await?; + self.send_once(id, env).await + } + } } pub async fn register_output(&self, surface_id: String, channel: Channel>) { diff --git a/app/src/App.tsx b/app/src/App.tsx index 528fbeb..ab54f28 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -37,6 +37,9 @@ export function App() { const [sidebarOpen, setSidebarOpen] = useState(() => loadFlag("spacesh.sidebarOpen", true)); const [health, setHealth] = useState(null); const [config, setConfigState] = useState(null); + // Bumped when the daemon connection is re-established; used to remount the + // layout so terminals re-attach (snapshot + live stream) to the restarted daemon. + const [connEpoch, setConnEpoch] = useState(0); const [connected, setConnected] = useState(false); const [focusedId, setFocusedId] = useState(null); const [searchSurfaceId, setSearchSurfaceId] = useState(null); @@ -112,7 +115,15 @@ export function App() { void loadHealth(); void getConfig().then((c) => { setConfigState(c); applyTheme(c.theme, c.accent); }).catch(() => {}); }); - return () => { void unlisten.then((f) => f()); void reconnect.then((f) => f()); }; + const reconnected = onDaemonRawEvent("spacesh:reconnected", () => { + setConnected(true); + setConnEpoch((n) => n + 1); // remount layout → terminals re-attach to the new daemon + void refresh(); + void seedEvents(); + void loadHealth(); + void getConfig().then((c) => { setConfigState(c); applyTheme(c.theme, c.accent); }).catch(() => {}); + }); + return () => { void unlisten.then((f) => f()); void reconnect.then((f) => f()); void reconnected.then((f) => f()); }; }, [refresh, seedEvents, loadHealth]); useEffect(() => { @@ -158,7 +169,7 @@ export function App() { )}
{active - ? setSearchSurfaceId(null)} font={termFont} palette={termPalette} /> + ? setSearchSurfaceId(null)} font={termFont} palette={termPalette} /> :
No workspace — create one to begin.
}
diff --git a/app/src/Settings.tsx b/app/src/Settings.tsx index 5398561..37e6807 100644 --- a/app/src/Settings.tsx +++ b/app/src/Settings.tsx @@ -1,6 +1,6 @@ import { useEffect, useRef, useState } from "react"; import { COLORS, FONT, ACCENTS } from "./theme"; -import { setConfig, shutdownDaemon, restartDaemon } from "./socketBridge"; +import { setConfig, restartDaemon } from "./socketBridge"; import type { ConfigView, DaemonHealth } from "./socketBridge"; const FONTS = ["JetBrains Mono", "Menlo", "Monaco", "SF Mono", "Fira Code", "Cascadia Code"]; @@ -71,7 +71,7 @@ function fmtUptime(ms: number): string { } function DaemonSection({ health, onReload }: { health: DaemonHealth | null; onReload: () => void }) { - const [confirm, setConfirm] = useState(null); + const [confirm, setConfirm] = useState(false); // Tick so uptime counts up live while the modal is open. const [, setTick] = useState(0); useEffect(() => { @@ -88,19 +88,18 @@ function DaemonSection({ health, onReload }: { health: DaemonHealth | null; onRe ) :
offline
}
- - +
{confirm && (
- {confirm === "stop" ? "Stop the daemon? All sessions end." : "Restart the daemon? Sessions end and respawn."} + Restart the daemon? Running sessions end and respawn; panels re-attach automatically.
- - +