commit b697c457c5a67317b3ef17dd8a4bfac1aee7b814
Author: Coder Agent <coder@agents.omni>
Date: Tue Feb 17 12:07:39 2026
agentd: fix log handle close, webhook validation, graceful shutdown
t-562: Log handle close on pi crash
- readPiOutput/readPiStderr now track log handle health via IORef
- After first write failure, stop all future log writes (no more
hundreds of 'handle is closed' errors in journalctl)
- Also stop reading on non-EOF pipe errors (broken pipe = dead process)
t-563: Webhook URL validation
- New validateWebhookUrl function checks http:// or https:// prefix
- spawnHandler returns 400 for invalid webhook URLs at spawn time
- spawnPiAgent also validates and records failure if invalid
- Prevents confusing InvalidUrlException later in agent lifecycle
t-564: Graceful SIGTERM shutdown
- Signal handler uses MVar to prevent double-shutdown races
- Calls gracefulShutdown then exitSuccess (previously Warp kept running)
- gracefulShutdown wraps stopAgentProcess in try/catch per agent
- Closes SQLite connection on shutdown
- Agents get quit command → 1s grace → SIGTERM sequence
Task-Id: t-562
Task-Id: t-563
Task-Id: t-564
diff --git a/Omni/Agentd/Daemon.hs b/Omni/Agentd/Daemon.hs
index 12e28c3e..8f572a05 100644
--- a/Omni/Agentd/Daemon.hs
+++ b/Omni/Agentd/Daemon.hs
@@ -661,6 +661,13 @@ resolveWorkspace config req runId = case spawnWorkspace req of
}
)
+-- | Validate that a webhook URL is a full http/https URL (t-563).
+validateWebhookUrl :: Maybe Text -> Either Text (Maybe Text)
+validateWebhookUrl Nothing = Right Nothing
+validateWebhookUrl (Just url)
+ | Text.isPrefixOf "http://" url || Text.isPrefixOf "https://" url = Right (Just url)
+ | otherwise = Left ("Invalid webhook URL: must start with http:// or https://, got: " <> Text.take 100 url)
+
-- | Spawn a pi agent directly (no pi_rpc.py wrapper)
spawnPiAgent :: DaemonState -> SpawnRequest -> IO Text
spawnPiAgent state req = do
@@ -670,8 +677,29 @@ spawnPiAgent state req = do
logRoot = dcLogRoot config
model = fromMaybe defaultModel (spawnModel req)
piPath = dcPiPath config
- webhookUrl = spawnWebhook req
+ -- Validate webhook URL before proceeding (t-563)
+ case validateWebhookUrl (spawnWebhook req) of
+ Left err -> do
+ insertAgent conn runId (spawnPrompt req) "" Nothing
+ now <- Time.getCurrentTime
+ updateAgentCompleted conn runId now (Just err) Nothing Nothing
+ pure runId
+ Right webhookUrl -> spawnPiAgentInner state req runId conn config logRoot model piPath webhookUrl
+
+-- | Inner spawn logic after validation
+spawnPiAgentInner ::
+ DaemonState ->
+ SpawnRequest ->
+ Text ->
+ SQL.Connection ->
+ DaemonConfig ->
+ FilePath ->
+ Text ->
+ FilePath ->
+ Maybe Text ->
+ IO Text
+spawnPiAgentInner state req runId conn config logRoot model piPath webhookUrl = do
-- Create log directory for this run
let runDir = logRoot </> Text.unpack runId
Dir.createDirectoryIfMissing True runDir
@@ -862,34 +890,39 @@ updateWorkspaceSizeFromAgent conn runId = do
sizeBytes <- calculateWorkspaceSize (Text.unpack (wrPath workspace))
updateWorkspaceSize conn runId sizeBytes
--- | Read pi output, update status, log events, send webhooks
+-- | Read pi output, update status, log events, send webhooks.
+-- Uses an IORef to track whether the log handle is still usable,
+-- so we stop writing after the handle closes (t-562).
readPiOutput :: DaemonState -> Text -> IO.Handle -> IO.Handle -> TVar AgentStatus -> Maybe Text -> IO ()
-readPiOutput state runId stdoutH logH statusVar webhookUrl =
- loop Nothing 0
+readPiOutput state runId stdoutH logH statusVar webhookUrl = do
+ logOk <- IORef.newIORef True
+ loop logOk Nothing 0
where
- loop :: Maybe Text -> Int -> IO ()
- loop lastSummary totalCostCents = do
+ loop :: IORef.IORef Bool -> Maybe Text -> Int -> IO ()
+ loop logOk lastSummary totalCostCents = do
readResult <- Exception.try @Exception.IOException (BS.hGetLine stdoutH)
case readResult of
Left err
| IOError.isEOFError err -> pure ()
| otherwise -> do
TextIO.hPutStrLn IO.stderr <| "Failed to read pi output: " <> tshow err
- loop lastSummary totalCostCents
+ pure () -- Stop reading on non-EOF errors (pipe broken = process dead)
Right lineBytes -> do
let line = TextEncoding.decodeUtf8With TextEncodingError.lenientDecode lineBytes
- -- Log to file
- logResult <-
- Exception.try @SomeException <| do
- TextIO.hPutStrLn logH line
- IO.hFlush logH
- case logResult of
- Left err -> TextIO.hPutStrLn IO.stderr <| "Failed to write pi log: " <> tshow err
- Right () -> pure ()
+ -- Log to file (skip if handle already known to be closed)
+ canLog <- IORef.readIORef logOk
+ when canLog <| do
+ logResult <-
+ Exception.try @SomeException <| do
+ TextIO.hPutStrLn logH line
+ IO.hFlush logH
+ case logResult of
+ Left _err -> IORef.writeIORef logOk False -- Stop future log writes
+ Right () -> pure ()
-- Parse and process event
case Aeson.decode (BL.fromStrict (encodeUtf8 line)) :: Maybe Aeson.Value of
- Nothing -> loop lastSummary totalCostCents
+ Nothing -> loop logOk lastSummary totalCostCents
Just event -> do
processResult <-
Exception.try @SomeException
@@ -897,30 +930,36 @@ readPiOutput state runId stdoutH logH statusVar webhookUrl =
case processResult of
Left err -> do
TextIO.hPutStrLn IO.stderr <| "Failed to process pi event: " <> tshow err
- loop lastSummary totalCostCents
- Right (newSummary, newCost) -> loop newSummary newCost
+ loop logOk lastSummary totalCostCents
+ Right (newSummary, newCost) -> loop logOk newSummary newCost
+-- | Read pi stderr and write to log file.
+-- Stops writing to log if the handle is closed (t-562).
readPiStderr :: IO.Handle -> IO.Handle -> IO ()
-readPiStderr logH stderrH = loop
+readPiStderr logH stderrH = do
+ logOk <- IORef.newIORef True
+ loop logOk
where
- loop = do
+ loop logOk = do
readResult <- Exception.try @Exception.IOException (BS.hGetLine stderrH)
case readResult of
Left err
| IOError.isEOFError err -> pure ()
| otherwise -> do
TextIO.hPutStrLn IO.stderr <| "Failed to read pi stderr: " <> tshow err
- loop
+ pure () -- Stop on non-EOF errors
Right lineBytes -> do
let line = TextEncoding.decodeUtf8With TextEncodingError.lenientDecode lineBytes
- logResult <-
- Exception.try @SomeException <| do
- TextIO.hPutStrLn logH line
- IO.hFlush logH
- case logResult of
- Left err -> TextIO.hPutStrLn IO.stderr <| "Failed to write pi log: " <> tshow err
- Right () -> pure ()
- loop
+ canLog <- IORef.readIORef logOk
+ when canLog <| do
+ logResult <-
+ Exception.try @SomeException <| do
+ TextIO.hPutStrLn logH line
+ IO.hFlush logH
+ case logResult of
+ Left _err -> IORef.writeIORef logOk False
+ Right () -> pure ()
+ loop logOk
-- | Process a single pi event
processEvent ::
@@ -1183,8 +1222,12 @@ server state =
spawnHandler :: DaemonState -> SpawnRequest -> Handler SpawnResponse
spawnHandler state req = do
- runId <- liftIO <| spawnPiAgent state req
- pure <| SpawnResponse runId "spawned"
+ -- Validate webhook URL upfront (t-563)
+ case validateWebhookUrl (spawnWebhook req) of
+ Left err -> throwError err400 {errBody = BL.fromStrict (encodeUtf8 err)}
+ Right _ -> do
+ runId <- liftIO <| spawnPiAgent state req
+ pure <| SpawnResponse runId "spawned"
listHandler :: DaemonState -> Handler [AgentInfo]
listHandler state = do
@@ -1246,17 +1289,17 @@ runDaemon config = do
dsDbConn = conn
}
- -- Handle SIGTERM/SIGINT gracefully
- _ <-
- Signals.installHandler
- Signals.sigTERM
- (Signals.Catch (gracefulShutdown state))
- Nothing
- _ <-
- Signals.installHandler
- Signals.sigINT
- (Signals.Catch (gracefulShutdown state))
- Nothing
+ -- Shutdown signal (t-564): MVar to coordinate graceful exit
+ shutdownSignal <- MVar.newEmptyMVar
+
+ -- Handle SIGTERM/SIGINT gracefully (t-564)
+ let onSignal = do
+ alreadyShutting <- not </ MVar.tryPutMVar shutdownSignal ()
+ unless alreadyShutting <| do
+ gracefulShutdown state
+ Exit.exitSuccess
+ _ <- Signals.installHandler Signals.sigTERM (Signals.Catch onSignal) Nothing
+ _ <- Signals.installHandler Signals.sigINT (Signals.Catch onSignal) Nothing
-- Start server
putText <| "Starting agentd daemon on port " <> tshow (dcPort config)
@@ -1268,15 +1311,23 @@ runDaemon config = do
where
takeDirectory = Text.unpack <. Text.intercalate "/" <. List.init <. Text.splitOn "/" <. Text.pack
--- | Graceful shutdown: stop all running agents
+-- | Graceful shutdown: stop all running agents, close handles (t-564)
gracefulShutdown :: DaemonState -> IO ()
gracefulShutdown state = do
putText "Shutting down agentd daemon..."
running <- STM.atomically <| STM.readTVar (dsRunning state)
+ -- Stop all agents with a timeout
forM_ (Map.keys running) <| \runId -> do
putText <| " Stopping agent: " <> runId
- _ <- stopAgentProcess state runId
- pure ()
+ stopResult <- try @SomeException <| stopAgentProcess state runId
+ case stopResult of
+ Left err -> putText <| " Warning: failed to stop " <> runId <> ": " <> tshow err
+ Right _ -> pure ()
+ -- Close database connection
+ closeResult <- try @SomeException <| SQL.close (dsDbConn state)
+ case closeResult of
+ Left err -> putText <| " Warning: failed to close DB: " <> tshow err
+ Right _ -> pure ()
putText "Shutdown complete."
-- * Tests