← Back to task

Commit 4427e278

commit 4427e27847be780193b803cacea70f41f7acfaf0
Author: Coder Agent <coder@agents.omni>
Date:   Sat Apr 11 18:22:31 2026

    agentd: deploy hermetically via deployer
    
    Move agentd service config to deployer-managed, repo-tracked flow.
    Use absolute Nix store paths for agentd and agent binaries.
    
    Daemon now writes AGENTD_AGENT_COMMAND and AGENTD_STATE_DIR into
    persistent agent env files and generates runtime assets under ~/.config.
    
    Prompt delivery over FIFO is now NUL-delimited to match Omni/Agent.
    HTTP send/stop now fall back to persistent control after daemon restart.
    
    Add Omni/Ide/deploy-agentd.sh as canonical deploy script.
    Update Omni/Agentd/SPEC.md and Omni/Deploy/README.md.
    
    Task-Id: t-775
    Task-Id: t-776

diff --git a/Omni/Agentd/Daemon.hs b/Omni/Agentd/Daemon.hs
index b0f25ef4..b25a04b6 100644
--- a/Omni/Agentd/Daemon.hs
+++ b/Omni/Agentd/Daemon.hs
@@ -79,7 +79,6 @@ import qualified Data.Aeson as Aeson
 import qualified Data.Aeson.KeyMap as KeyMap
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BLC8
 import qualified Data.IORef as IORef
 import qualified Data.List as List
 import qualified Data.Map.Strict as Map
@@ -860,14 +859,107 @@ agentEnvPath name = do
 
 agentStateDir :: IO FilePath
 agentStateDir = do
-  stateRoot <- defaultStateRoot
-  pure (stateRoot </> "agentd-agents")
+  mOverride <- Env.lookupEnv "AGENTD_STATE_DIR"
+  case mOverride of
+    Just dir -> pure dir
+    Nothing -> do
+      uid <- Posix.getEffectiveUserID
+      if uid == 0
+        then pure "/var/lib/omni/agentd-agents"
+        else do
+          stateRoot <- defaultStateRoot
+          pure (stateRoot </> "agentd-agents")
 
 agentFifoPath :: Text -> IO FilePath
 agentFifoPath name = do
   dir <- agentStateDir
   pure (dir </> Text.unpack name <> ".fifo")
 
+agentExecPath :: IO FilePath
+agentExecPath = do
+  dir <- agentEnvDir
+  pure (dir </> "agentd-agent-exec")
+
+agentSystemdUnitPath :: IO FilePath
+agentSystemdUnitPath = do
+  home <- Dir.getHomeDirectory
+  mConfig <- Env.lookupEnv "XDG_CONFIG_HOME"
+  let configRoot = fromMaybe (home </> ".config") mConfig
+  pure (configRoot </> "systemd/user/agentd-agent@.service")
+
+renderAgentExecScript :: Text
+renderAgentExecScript =
+  Text.unlines
+    [ "#!/usr/bin/env bash",
+      "set -euo pipefail",
+      "",
+      "if [[ -n \"${AGENTD_CWD:-}\" ]]; then",
+      "  cd \"$AGENTD_CWD\" || exit 1",
+      "fi",
+      "",
+      "STATE_DIR=\"${AGENTD_STATE_DIR:-${XDG_STATE_HOME:-${HOME}/.local/state}/agentd-agents}\"",
+      "FIFO=\"${STATE_DIR}/${AGENTD_AGENT_NAME}.fifo\"",
+      "mkdir -p \"$STATE_DIR\"",
+      "rm -f \"$FIFO\"",
+      "mkfifo \"$FIFO\"",
+      "",
+      "cleanup() {",
+      "  exec 3>&- 2>/dev/null || true",
+      "  rm -f \"$FIFO\"",
+      "}",
+      "trap cleanup EXIT",
+      "",
+      "exec 3<>\"$FIFO\"",
+      "",
+      "AGENTD_AGENT_COMMAND=\"${AGENTD_AGENT_COMMAND:-agent}\"",
+      "",
+      "exec \"$AGENTD_AGENT_COMMAND\" \\",
+      "  --provider \"${AGENTD_PROVIDER}\" \\",
+      "  --model \"${AGENTD_MODEL}\" \\",
+      "  --run-id \"${AGENTD_AGENT_NAME}\" \\",
+      "  --json \\",
+      "  ${AGENTD_EXTRA_ARGS:-} <&3"
+    ]
+
+renderAgentSystemdUnit :: FilePath -> FilePath -> Text
+renderAgentSystemdUnit envDir execPath =
+  Text.unlines
+    [ "[Unit]",
+      "Description=Agentd Agent (%i)",
+      "After=network-online.target",
+      "",
+      "[Service]",
+      "Type=simple",
+      "EnvironmentFile=" <> Text.pack envDir <> "/%i.env",
+      "Environment=HOME=%h",
+      "Environment=PATH=/usr/local/bin:/run/current-system/sw/bin:/usr/bin:/bin:%h/.nix-profile/bin",
+      "Environment=AGENTD_AGENT_NAME=%i",
+      "ExecStart=" <> Text.pack execPath,
+      "Restart=on-failure",
+      "RestartSec=30",
+      "TimeoutStopSec=10",
+      "StandardOutput=journal",
+      "StandardError=journal",
+      "SyslogIdentifier=agentd-agent-%i",
+      "",
+      "[Install]",
+      "WantedBy=default.target"
+    ]
+
+ensurePersistentRuntimeAssets :: IO ()
+ensurePersistentRuntimeAssets = do
+  envDir <- agentEnvDir
+  execPath <- agentExecPath
+  unitPath <- agentSystemdUnitPath
+  stateDir <- agentStateDir
+  Dir.createDirectoryIfMissing True envDir
+  Dir.createDirectoryIfMissing True stateDir
+  Dir.createDirectoryIfMissing True (takeDirectory unitPath)
+  TextIO.writeFile execPath renderAgentExecScript
+  (chmodCode, _chmodOut, chmodErr) <- Process.readProcessWithExitCode "chmod" ["0755", execPath] ""
+  when (chmodCode /= Exit.ExitSuccess) <| Exception.throwIO <| IOError.userError ("chmod failed for " <> execPath <> ": " <> chmodErr)
+  TextIO.writeFile unitPath (renderAgentSystemdUnit envDir execPath)
+
 statusToText :: AgentStatus -> Text
 statusToText = \case
   StatusPending -> "pending"
@@ -990,17 +1082,33 @@ queryMainPid name = do
     then pure (Read.readMaybe (Text.unpack (Text.strip (Text.pack out))))
     else pure Nothing
 
+resolvePersistentAgentCommand :: IO Text
+resolvePersistentAgentCommand = do
+  mOverride <- Env.lookupEnv "AGENTD_AGENT_COMMAND"
+  case mOverride of
+    Just cmd -> pure (Text.pack cmd)
+    Nothing -> do
+      mResolved <- Dir.findExecutable "agent"
+      pure (Text.pack (fromMaybe "agent" mResolved))
+
+resolvePersistentStateDir :: IO Text
+resolvePersistentStateDir = Text.pack </ agentStateDir
+
 writeAgentEnvFile :: AgentConfig -> IO FilePath
 writeAgentEnvFile cfg = do
   dir <- agentEnvDir
   Dir.createDirectoryIfMissing True dir
   path <- agentEnvPath (acName cfg)
+  agentCmd <- resolvePersistentAgentCommand
+  stateDir <- resolvePersistentStateDir
   let baseLines =
         [ "AGENTD_PROVIDER=" <> sanitizeEnvValue (acProvider cfg),
           "AGENTD_MODEL=" <> sanitizeEnvValue (acModel cfg),
           "AGENTD_CWD=" <> sanitizeEnvValue (Text.pack (acCwd cfg)),
           "AGENTD_THINKING=" <> sanitizeEnvValue (acThinking cfg),
-          "AGENTD_EXTRA_ARGS=" <> maybe "" sanitizeEnvValue (acExtraArgs cfg)
+          "AGENTD_EXTRA_ARGS=" <> maybe "" sanitizeEnvValue (acExtraArgs cfg),
+          "AGENTD_AGENT_COMMAND=" <> sanitizeEnvValue agentCmd,
+          "AGENTD_STATE_DIR=" <> sanitizeEnvValue stateDir
         ]
       extraLines = map (\(k, v) -> k <> "=" <> sanitizeEnvValue v) (Map.toAscList (acExtraEnv cfg))
   TextIO.writeFile path (Text.unlines (baseLines <> extraLines))
@@ -1069,28 +1177,32 @@ createAgent mDbPath cfg = do
     case existing of
       Just _ -> pure (Left ("Agent already exists: " <> runId))
       Nothing -> do
-        _ <- writeAgentEnvFile normalizedCfg
-        _ <- runSystemctlUser ["daemon-reload"]
-        now <- Time.getCurrentTime
-        let workspaceText = Text.pack (acCwd normalizedCfg)
-        insertResult <-
-          try @SomeException
-            <| SQL.execute
-              conn
-              "INSERT INTO agents (run_id, prompt, workspace, status, provider, model, cwd, thinking, extra_args, extra_env, updated_at) VALUES (?, '', ?, 'stopped', ?, ?, ?, ?, ?, ?, ?)"
-              ( runId,
-                workspaceText,
-                acProvider normalizedCfg,
-                acModel normalizedCfg,
-                workspaceText,
-                acThinking normalizedCfg,
-                acExtraArgs normalizedCfg,
-                encodeExtraEnv (acExtraEnv normalizedCfg),
-                tshow now
-              )
-        case insertResult of
-          Left err -> pure (Left ("Failed to create agent: " <> tshow err))
-          Right () -> Right </ hydratePersistentAgent conn normalizedCfg
+        setupResult <- try @SomeException ensurePersistentRuntimeAssets
+        case setupResult of
+          Left err -> pure (Left ("Failed to install persistent runtime assets: " <> tshow err))
+          Right () -> do
+            _ <- writeAgentEnvFile normalizedCfg
+            _ <- runSystemctlUser ["daemon-reload"]
+            now <- Time.getCurrentTime
+            let workspaceText = Text.pack (acCwd normalizedCfg)
+            insertResult <-
+              try @SomeException
+                <| SQL.execute
+                  conn
+                  "INSERT INTO agents (run_id, prompt, workspace, status, provider, model, cwd, thinking, extra_args, extra_env, updated_at) VALUES (?, '', ?, 'stopped', ?, ?, ?, ?, ?, ?, ?)"
+                  ( runId,
+                    workspaceText,
+                    acProvider normalizedCfg,
+                    acModel normalizedCfg,
+                    workspaceText,
+                    acThinking normalizedCfg,
+                    acExtraArgs normalizedCfg,
+                    encodeExtraEnv (acExtraEnv normalizedCfg),
+                    tshow now
+                  )
+            case insertResult of
+              Left err -> pure (Left ("Failed to create agent: " <> tshow err))
+              Right () -> Right </ hydratePersistentAgent conn normalizedCfg
 
 listPersistentAgents :: Maybe FilePath -> IO [PersistentAgent]
 listPersistentAgents mDbPath =
@@ -1126,16 +1238,21 @@ startPersistentAgent mDbPath runId =
     case mCfg of
       Nothing -> pure (Left ("Agent not found: " <> runId))
       Just cfg -> do
-        _ <- runSystemctlUser ["daemon-reload"]
-        startResult <- runSystemctlMutation runId ["start", systemdUnitName runId]
-        case startResult of
-          Left err -> pure (Left err)
+        setupResult <- try @SomeException ensurePersistentRuntimeAssets
+        case setupResult of
+          Left err -> pure (Left ("Failed to install persistent runtime assets: " <> tshow err))
           Right () -> do
-            now <- Time.getCurrentTime
-            updateAgentStarted conn runId now
-            mPid <- queryMainPid runId
-            forM_ mPid <| updateAgentPid conn runId
-            Right </ hydratePersistentAgent conn cfg
+            _ <- writeAgentEnvFile cfg
+            _ <- runSystemctlUser ["daemon-reload"]
+            startResult <- runSystemctlMutation runId ["start", systemdUnitName runId]
+            case startResult of
+              Left err -> pure (Left err)
+              Right () -> do
+                now <- Time.getCurrentTime
+                updateAgentStarted conn runId now
+                mPid <- queryMainPid runId
+                forM_ mPid <| updateAgentPid conn runId
+                Right </ hydratePersistentAgent conn cfg
 
 stopPersistentAgent :: Maybe FilePath -> Text -> IO (Either Text PersistentAgent)
 stopPersistentAgent mDbPath runId =
@@ -1159,15 +1276,21 @@ restartPersistentAgent mDbPath runId =
     case mCfg of
       Nothing -> pure (Left ("Agent not found: " <> runId))
       Just cfg -> do
-        restartResult <- runSystemctlMutation runId ["restart", systemdUnitName runId]
-        case restartResult of
-          Left err -> pure (Left err)
+        setupResult <- try @SomeException ensurePersistentRuntimeAssets
+        case setupResult of
+          Left err -> pure (Left ("Failed to install persistent runtime assets: " <> tshow err))
           Right () -> do
-            now <- Time.getCurrentTime
-            updateAgentStarted conn runId now
-            mPid <- queryMainPid runId
-            forM_ mPid <| updateAgentPid conn runId
-            Right </ hydratePersistentAgent conn cfg
+            _ <- writeAgentEnvFile cfg
+            _ <- runSystemctlUser ["daemon-reload"]
+            restartResult <- runSystemctlMutation runId ["restart", systemdUnitName runId]
+            case restartResult of
+              Left err -> pure (Left err)
+              Right () -> do
+                now <- Time.getCurrentTime
+                updateAgentStarted conn runId now
+                mPid <- queryMainPid runId
+                forM_ mPid <| updateAgentPid conn runId
+                Right </ hydratePersistentAgent conn cfg
 
 writePromptToFifo :: Text -> Text -> IO (Either Text ())
 writePromptToFifo runId message = do
@@ -1176,19 +1299,13 @@ writePromptToFifo runId message = do
   if not exists
     then pure (Left ("FIFO not found (is agent running?): " <> Text.pack fifo))
     else do
-      let payload =
-            Aeson.encode
-              <| Aeson.object
-                [ "type" .= ("prompt" :: Text),
-                  "message" .= message
-                ]
-          line = BLC8.unpack payload <> "\n"
+      let framedPrompt = Text.unpack message <> "\NUL"
       writeResult <-
         try @SomeException
           <| bracket
             (PosixIO.openFd fifo PosixIO.WriteOnly PosixIO.defaultFileFlags {PosixIO.nonBlock = True})
             PosixIO.closeFd
-            (\fd -> void (PosixIO.fdWrite fd line))
+            (\fd -> void (PosixIO.fdWrite fd framedPrompt))
       case writeResult of
         Left err -> pure (Left ("Failed to write FIFO: " <> tshow err))
         Right () -> pure (Right ())
@@ -1831,7 +1948,11 @@ sendToAgent :: DaemonState -> Text -> Text -> IO Bool
 sendToAgent state runId message = do
   running <- STM.atomically <| STM.readTVar (dsRunning state)
   case Map.lookup runId running of
-    Nothing -> pure False
+    Nothing -> do
+      fallback <- sendPersistentAgent (Just (dcDbPath (dsConfig state))) runId message
+      case fallback of
+        Right () -> pure True
+        Left _ -> pure False
     Just ra -> do
       result <- try @SomeException <| sendPromptToAgent (raStdinHandle ra) message
       case result of
@@ -1895,7 +2016,11 @@ stopHandler state runId = do
   stopped <- liftIO <| stopAgentProcess state runId
   if stopped
     then pure <| Aeson.object ["status" .= ("stopped" :: Text)]
-    else throwError err404 {errBody = "Agent not found"}
+    else do
+      fallback <- liftIO <| stopPersistentAgent (Just (dcDbPath (dsConfig state))) runId
+      case fallback of
+        Right _ -> pure <| Aeson.object ["status" .= ("stopped" :: Text)]
+        Left _ -> throwError err404 {errBody = "Agent not found"}
 
 deleteHandler :: DaemonState -> Text -> Handler Aeson.Value
 deleteHandler state runId = do
@@ -2065,6 +2190,16 @@ test =
           Just (Aeson.Object obj) ->
             Test.assertBool "updated_at key should be present" (isJust (KeyMap.lookup "updated_at" obj))
           Just _ -> Test.assertFailure "Expected object JSON for AgentInfo",
+      Test.unit "persistent runtime script uses agent stdin mode" <| do
+        let script = renderAgentExecScript
+        Test.assertBool "script should launch agent with json output" ("--json" `Text.isInfixOf` script)
+        Test.assertBool "script should forward run-id" ("--run-id \"${AGENTD_AGENT_NAME}\"" `Text.isInfixOf` script)
+        Test.assertBool "script should not reference legacy agentd-rpc" (not ("agentd-rpc" `Text.isInfixOf` script))
+        Test.assertBool "script should not use rpc mode flag" (not ("--mode rpc" `Text.isInfixOf` script)),
+      Test.unit "persistent unit template avoids ~/.local/bin" <| do
+        let unit = renderAgentSystemdUnit "/home/ben/.config/agentd-agents" "/home/ben/.config/agentd-agents/agentd-agent-exec"
+        Test.assertBool "unit should point at tracked exec path" ("ExecStart=/home/ben/.config/agentd-agents/agentd-agent-exec" `Text.isInfixOf` unit)
+        Test.assertBool "unit should not include ~/.local/bin in PATH" (not ("%h/.local/bin" `Text.isInfixOf` unit)),
       Test.group
         "integration"
         [ Test.unit "spawn completes and sends webhook" integrationSpawnWebhook,
diff --git a/Omni/Agentd/SPEC.md b/Omni/Agentd/SPEC.md
index 6eddc021..4fec97f9 100644
--- a/Omni/Agentd/SPEC.md
+++ b/Omni/Agentd/SPEC.md
@@ -106,6 +106,14 @@ Persistent sessions are controlled with standard Unix signals:
 
 Prompt delivery to persistent sessions uses stdin with NUL-delimited frames.
 
+Persistent runtime assets are generated by `agentd` itself on create/start:
+
+- `~/.config/systemd/user/agentd-agent@.service`
+- `~/.config/agentd-agents/agentd-agent-exec`
+- `~/.config/agentd-agents/<name>.env`
+
+This keeps persistent-agent wiring declared in the omnirepo (via `Omni/Agentd/Daemon.hs`) and avoids ad-hoc `~/.local/bin` wrappers.
+
 ## Examples
 
 ### Simple task
diff --git a/Omni/Deploy/README.md b/Omni/Deploy/README.md
index 5ce80597..395d901e 100644
--- a/Omni/Deploy/README.md
+++ b/Omni/Deploy/README.md
@@ -10,6 +10,9 @@ A pull-based deployment system for deploying Nix-built services without full Nix
 # Build, cache to S3, and update manifest
 Omni/Ide/push.sh Biz/PodcastItLater/Web.py
 
+# Canonical agentd deploy (repo-tracked manifest shape + hermetic store paths)
+Omni/Ide/deploy-agentd.sh
+
 # The deployer on the target host polls every 5 minutes
 # To force immediate deployment, SSH to host and run:
 ssh biz sudo systemctl start deployer
diff --git a/Omni/Ide/deploy-agentd.sh b/Omni/Ide/deploy-agentd.sh
new file mode 100755
index 00000000..15c8a8d0
--- /dev/null
+++ b/Omni/Ide/deploy-agentd.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# Deploy agentd via Omni deployer with hermetic store-path references.
+#
+# This script is the canonical, repo-tracked way to publish agentd service config.
+# It ensures both agentd and agent runtime paths are absolute Nix store paths.
+
+set -euo pipefail
+
+CODEROOT=${CODEROOT:?must set CODEROOT}
+cd "$CODEROOT"
+
+bild Omni/Agent.hs
+bild Omni/Agentd.hs
+bild Omni/Deploy/Manifest.hs
+
+agent_store=$(readlink "$CODEROOT/_/nix/Omni/Agent.hs")
+agentd_store=$(readlink "$CODEROOT/_/nix/Omni/Agentd.hs")
+manifest_store=$(readlink "$CODEROOT/_/nix/Omni/Deploy/Manifest.hs")
+revision=$(git rev-parse --short HEAD)
+
+service_json=$(jq -n \
+  --arg store_path "$agentd_store" \
+  --arg agent_cmd "$agent_store/bin/agent" \
+  --arg revision "$revision" \
+  '{
+    name: "agentd",
+    artifact: { type: "nix-closure", storePath: $store_path },
+    hosts: ["beryllium"],
+    exec: {
+      command: "agentd daemon --port 8400 --db /var/lib/omni/agentd.db --log-root /var/log/agentd --workspace /var/lib/omni/agentd/workspaces",
+      user: "ben",
+      group: "users",
+      workingDirectory: "/home/ben/omni/live"
+    },
+    env: {
+      HOME: "/home/ben",
+      CODEROOT: "/home/ben/omni/live",
+      LANG: "en_US.utf8",
+      LC_ALL: "en_US.utf8",
+      PATH: "/run/current-system/sw/bin:/usr/bin:/bin",
+      AGENTD_AGENT_COMMAND: $agent_cmd,
+      AGENTD_STATE_DIR: "/var/lib/omni/agentd-agents"
+    },
+    envFile: null,
+    http: null,
+    systemd: {
+      after: ["network-online.target"],
+      requires: [],
+      restart: "on-failure",
+      restartSec: 5,
+      type: "simple"
+    },
+    hardening: {
+      dynamicUser: false,
+      privateTmp: false,
+      protectSystem: "strict",
+      protectHome: false,
+      noNewPrivileges: false,
+      readWritePaths: ["/tmp", "/var/lib/omni", "/var/log/agentd", "/home/ben/omni"],
+      readOnlyPaths: ["/home/ben/work"]
+    },
+    revision: $revision,
+    forceRedeploy: true
+  }')
+
+"$manifest_store/bin/deploy-manifest" add-service "$service_json"
+
+echo "Updated manifest for agentd."
+echo "Deployer will reconcile on its next timer tick (or run: /run/wrappers/bin/sudo systemctl start deployer)."