commit a711c7bf3a25e398bebee9ded4e2c543bac3d0e9
Author: Coder Agent <coder@agents.omni>
Date: Wed Apr 15 08:53:58 2026
feat(agentd): restore persistent sessions from latest checkpoint
Persist session-latest and auto-resume on persistent restart.
Task-Id: t-793
diff --git a/Omni/Agent/Programs/Agent.hs b/Omni/Agent/Programs/Agent.hs
index 33f1b5cb..b5ba594b 100644
--- a/Omni/Agent/Programs/Agent.hs
+++ b/Omni/Agent/Programs/Agent.hs
@@ -266,8 +266,11 @@ runAgent config userPrompt = do
Nothing -> Right </ agentLoop config (acMaxIterations config)
Just budget -> Op.limit budget (agentLoop config (acMaxIterations config))
+ let checkpointSessionLatest = do
+ Op.checkpoint "session-latest"
+ Op.modify (\s -> s {asLastCheckpoint = Just "session-latest"})
+
-- Build result
- finalState <- Op.get
case result of
Left exhausted -> do
errTime <- Op.liftIO Time.getCurrentTime
@@ -276,6 +279,8 @@ runAgent config userPrompt = do
(tshow exhausted)
errTime
)
+ checkpointSessionLatest
+ finalState <- Op.get
tr <- Op.getTrace
pure
AgentResult
@@ -288,7 +293,8 @@ runAgent config userPrompt = do
arTrace = tr
}
Right response -> do
- unless (asHitMaxIterations finalState) <| do
+ stateAfterRun <- Op.get
+ unless (asHitMaxIterations stateAfterRun) <| do
doneTime <- Op.liftIO Time.getCurrentTime
Op.emit
( Trace.EventCustom
@@ -296,6 +302,8 @@ runAgent config userPrompt = do
(Aeson.object ["response" Aeson..= response])
doneTime
)
+ checkpointSessionLatest
+ finalState <- Op.get
tr <- Op.getTrace
pure
AgentResult
@@ -469,10 +477,11 @@ test =
result <- Seq.runSequential seqConfig initialAgentState (runAgent config "Hi")
case result of
Left err -> Test.assertFailure ("Failed: " <> str err)
- Right (agentResult, _, finalState) -> do
+ Right (agentResult, agentTrace, finalState) -> do
arError agentResult Test.@=? Nothing
arFinalMessage agentResult Test.@=? "Hello! I'm done."
- asIteration finalState Test.@=? 0,
+ asIteration finalState Test.@=? 0
+ Trace.checkpoints agentTrace Test.@=? ["init", "session-latest"],
Test.unit "runAgent appends to existing session state" <| do
provider <- Provider.mockProvider [Provider.MockText "first reply", Provider.MockText "second reply"]
let config =
diff --git a/Omni/Agentd/Daemon.hs b/Omni/Agentd/Daemon.hs
index 27ccf381..4192cd30 100644
--- a/Omni/Agentd/Daemon.hs
+++ b/Omni/Agentd/Daemon.hs
@@ -878,6 +878,11 @@ agentFifoPath name = do
dir <- agentStateDir
pure (dir </> Text.unpack name <> ".fifo")
+agentCheckpointDir :: Text -> IO FilePath
+agentCheckpointDir name = do
+ dir <- agentStateDir
+ pure (dir </> "checkpoints" </> Text.unpack name)
+
agentExecPath :: IO FilePath
agentExecPath = do
dir <- agentEnvDir
@@ -923,7 +928,10 @@ renderAgentExecScript =
"FIFO=\"${STATE_DIR}/${AGENTD_AGENT_NAME}.fifo\"",
"SESSIONS_DIR=\"${STATE_DIR}/sessions\"",
"SESSION_FILE=\"${SESSIONS_DIR}/${AGENTD_AGENT_NAME}.jsonl\"",
- "mkdir -p \"$STATE_DIR\" \"$SESSIONS_DIR\"",
+ "CHECKPOINTS_DIR=\"${STATE_DIR}/checkpoints\"",
+ "CHECKPOINT_DIR=\"${CHECKPOINTS_DIR}/${AGENTD_AGENT_NAME}\"",
+ "SESSION_CHECKPOINT=\"${CHECKPOINT_DIR}/session-latest.json\"",
+ "mkdir -p \"$STATE_DIR\" \"$SESSIONS_DIR\" \"$CHECKPOINT_DIR\"",
"touch \"$SESSION_FILE\"",
"rm -f \"$FIFO\"",
"mkfifo \"$FIFO\"",
@@ -938,13 +946,24 @@ renderAgentExecScript =
"exec > >(tee -a \"$SESSION_FILE\")",
"",
"AGENTD_AGENT_COMMAND=\"${AGENTD_AGENT_COMMAND:-agent}\"",
+ "RESUME_ARGS=()",
+ "if [[ -f \"$SESSION_CHECKPOINT\" ]]; then",
+ " RESUME_ARGS=(--resume \"$SESSION_CHECKPOINT\")",
+ "fi",
+ "EXTRA_ARGS=()",
+ "if [[ -n \"${AGENTD_EXTRA_ARGS:-}\" ]]; then",
+ " # shellcheck disable=SC2206",
+ " EXTRA_ARGS=(${AGENTD_EXTRA_ARGS})",
+ "fi",
"",
"exec \"$AGENTD_AGENT_COMMAND\" \\",
" --provider \"${AGENTD_PROVIDER}\" \\",
" --model \"${AGENTD_MODEL}\" \\",
" --run-id \"${AGENTD_AGENT_NAME}\" \\",
" --json \\",
- " ${AGENTD_EXTRA_ARGS:-} <&3"
+ " --checkpoint-dir \"$CHECKPOINT_DIR\" \\",
+ " \"${RESUME_ARGS[@]}\" \\",
+ " \"${EXTRA_ARGS[@]}\" <&3"
]
renderAgentSystemdUnit :: FilePath -> FilePath -> Text
@@ -1453,6 +1472,11 @@ removeAgentRuntimeArtifacts mDbPath runId = do
when fifoExists <| do
_ <- try @SomeException <| Dir.removePathForcibly fifo
pure ()
+ checkpointsDir <- agentCheckpointDir runId
+ checkpointsExist <- Dir.doesPathExist checkpointsDir
+ when checkpointsExist <| do
+ _ <- try @SomeException <| Dir.removePathForcibly checkpointsDir
+ pure ()
_ <- runSystemctlUser ["daemon-reload"]
pure ()
@@ -2644,6 +2668,10 @@ test =
Test.assertBool "script should launch agent with json output" ("--json" `Text.isInfixOf` script)
Test.assertBool "script should forward run-id" ("--run-id \"${AGENTD_AGENT_NAME}\"" `Text.isInfixOf` script)
Test.assertBool "script should append stdout to per-agent sessions jsonl" ("exec > >(tee -a \"$SESSION_FILE\")" `Text.isInfixOf` script)
+ Test.assertBool "script should keep per-agent checkpoint dir" ("CHECKPOINT_DIR=\"${CHECKPOINTS_DIR}/${AGENTD_AGENT_NAME}\"" `Text.isInfixOf` script)
+ Test.assertBool "script should persist checkpoints" ("--checkpoint-dir \"$CHECKPOINT_DIR\"" `Text.isInfixOf` script)
+ Test.assertBool "script should look for latest session checkpoint" ("SESSION_CHECKPOINT=\"${CHECKPOINT_DIR}/session-latest.json\"" `Text.isInfixOf` script)
+ Test.assertBool "script should wire resume args" ("RESUME_ARGS=(--resume \"$SESSION_CHECKPOINT\")" `Text.isInfixOf` script)
Test.assertBool "script should not reference legacy agentd-rpc" (not ("agentd-rpc" `Text.isInfixOf` script))
Test.assertBool "script should not use rpc mode flag" (not ("--mode rpc" `Text.isInfixOf` script)),
Test.unit "notify parser extracts completion events" <| do
diff --git a/Omni/Agentd/SPEC.md b/Omni/Agentd/SPEC.md
index 4fec97f9..1f0ac9cf 100644
--- a/Omni/Agentd/SPEC.md
+++ b/Omni/Agentd/SPEC.md
@@ -106,6 +106,10 @@ Persistent sessions are controlled with standard Unix signals:
Prompt delivery to persistent sessions uses stdin with NUL-delimited frames.
+Persistent sessions also persist agent checkpoints under `$AGENTD_STATE_DIR/checkpoints/<agent>/`.
+The runtime wrapper always passes `--checkpoint-dir` and auto-adds `--resume .../session-latest.json`
+when that file exists, so `agentd restart` restores prior conversation context.
+
Persistent runtime assets are generated by `agentd` itself on create/start:
- `~/.config/systemd/user/agentd-agent@.service`