← Back to task

Commit a6d3d85c

commit a6d3d85c58968ea9e58a976a32181a0b49bb49b2
Author: Ben Sima <ben@bensima.com>
Date:   Sat Jan 3 11:31:57 2026

    Add structured logging to agent engine (t-338)
    
    New features:
    - AgentEvent sum type for all agent events (start, iteration, tool_call,
      tool_result, cost, thinking, complete, error, guardrail)
    - LogLevel type (LogInfo, LogDebug, LogTrace) for filtering
    - eventToJson for serializing events to JSON
    - engineOnEvent callback in EngineConfig for unified event handling
    - --log=LEVEL option in agent CLI (info, debug, trace)
    
    Cost reporting now includes:
    - Per-call tokens and cost
    - Cumulative totals
    - Iteration number
    
    Events are emitted at:
    - Agent start (model, provider, timestamp)
    - Each iteration start
    - Each tool call (name, args, iteration)
    - Each tool result (name, success, output, duration, iteration)
    - Cost updates (tokens, cost, totals, iteration)
    - Assistant thinking/text
    - Completion (iterations, total cost, total tokens)
    - Errors
    
    Usage:
      agent --log=debug 'prompt' 2>debug.jsonl
    
    Task-Id: t-338

diff --git a/Omni/Agent.hs b/Omni/Agent.hs
index 70f7c091..b45f7050 100755
--- a/Omni/Agent.hs
+++ b/Omni/Agent.hs
@@ -78,6 +78,7 @@ Options:
   --max-iter=N      Iteration limit [default: 20]
   --verbose         Show tool calls to stderr
   --json            Output structured JSON
+  --log=LEVEL       Structured JSON logging: info, debug, trace
   --dry-run         Show config, don't run
   -h --help         Show this help
   -v --version      Show version
@@ -150,6 +151,7 @@ data AgentOptions = AgentOptions
     optMaxIter :: Int,
     optVerbose :: Bool,
     optJson :: Bool,
+    optLogLevel :: Maybe Engine.LogLevel,
     optDryRun :: Bool,
     optPrompt :: Text
   }
@@ -164,6 +166,7 @@ defaultOptions =
       optMaxIter = 20,
       optVerbose = False,
       optJson = False,
+      optLogLevel = Nothing,
       optDryRun = False,
       optPrompt = ""
     }
@@ -222,6 +225,12 @@ parseOptions args = do
         | Text.null argPrompt = stdinContent
         | otherwise = argPrompt <> "\n\n--- Input Data ---\n" <> stdinContent
 
+  let logLevel = case Docopt.getArg args (Docopt.longOption "log") of
+        Just "info" -> Just Engine.LogInfo
+        Just "debug" -> Just Engine.LogDebug
+        Just "trace" -> Just Engine.LogTrace
+        _ -> Nothing
+
   pure
     AgentOptions
       { optProvider = maybe "auto" Text.pack <| Docopt.getArg args (Docopt.longOption "provider"),
@@ -230,6 +239,7 @@ parseOptions args = do
         optMaxIter = fromMaybe 20 <| (Docopt.getArg args (Docopt.longOption "max-iter") +> readMaybe),
         optVerbose = args `Cli.has` Docopt.longOption "verbose",
         optJson = args `Cli.has` Docopt.longOption "json",
+        optLogLevel = logLevel,
         optDryRun = args `Cli.has` Docopt.longOption "dry-run",
         optPrompt = Text.strip prompt
       }
@@ -309,9 +319,30 @@ runAgent opts = do
                 Engine.agentTools = tools,
                 Engine.agentMaxIterations = optMaxIter opts
               }
+          -- Structured event logger
+          logEvent :: Engine.AgentEvent -> IO ()
+          logEvent event = case optLogLevel opts of
+            Nothing -> pure ()
+            Just level -> do
+              let shouldLog = case event of
+                    Engine.EventStart {} -> level >= Engine.LogInfo
+                    Engine.EventComplete {} -> level >= Engine.LogInfo
+                    Engine.EventError {} -> level >= Engine.LogInfo
+                    Engine.EventIteration {} -> level >= Engine.LogDebug
+                    Engine.EventThinking {} -> level >= Engine.LogDebug
+                    Engine.EventToolCall {} -> level >= Engine.LogDebug
+                    Engine.EventToolResult {} -> level >= Engine.LogDebug
+                    Engine.EventCost {} -> level >= Engine.LogDebug
+                    Engine.EventGuardrail {} -> level >= Engine.LogInfo
+              when shouldLog <| do
+                let json = Engine.eventToJson event
+                BL.hPutStr IO.stderr (Aeson.encode json)
+                TextIO.hPutStrLn IO.stderr ""
+
           engineConfig =
             Engine.defaultEngineConfig
-              { Engine.engineOnActivity =
+              { Engine.engineOnEvent = logEvent,
+                Engine.engineOnActivity =
                   if optVerbose opts
                     then \msg -> TextIO.hPutStrLn IO.stderr <| "[agent] " <> msg
                     else \_ -> pure (),
@@ -328,7 +359,7 @@ runAgent opts = do
                     else \_ _ -> pure (),
                 Engine.engineOnToolResult =
                   if optVerbose opts
-                    then \name ok result -> TextIO.hPutStrLn IO.stderr <| "[result] " <> name <> " " <> (if ok then "ok" else "err") <> " " <> Text.take 100 result
+                    then \name ok res -> TextIO.hPutStrLn IO.stderr <| "[result] " <> name <> " " <> (if ok then "ok" else "err") <> " " <> Text.take 100 res
                     else \_ _ _ -> pure ()
               }
 
diff --git a/Omni/Agent/Engine.hs b/Omni/Agent/Engine.hs
index 45dd095c..d83dee53 100644
--- a/Omni/Agent/Engine.hs
+++ b/Omni/Agent/Engine.hs
@@ -16,7 +16,8 @@
 -- : dep case-insensitive
 -- : dep time
 module Omni.Agent.Engine
-  ( Tool (..),
+  ( -- * Core Types
+    Tool (..),
     LLM (..),
     EngineConfig (..),
     AgentConfig (..),
@@ -33,15 +34,26 @@ module Omni.Agent.Engine
     Choice (..),
     Usage (..),
     ToolApi (..),
+
+    -- * Structured Events
+    AgentEvent (..),
+    LogLevel (..),
+    eventToJson,
+
+    -- * Defaults and Helpers
     encodeToolForApi,
     defaultLLM,
     defaultEngineConfig,
     defaultAgentConfig,
     defaultGuardrails,
+
+    -- * Execution
     chat,
     runAgent,
     runAgentWithProvider,
     runAgentWithProviderStreaming,
+
+    -- * Testing
     main,
     test,
   )
@@ -62,6 +74,151 @@ import qualified Network.HTTP.Simple as HTTP
 import qualified Omni.Agent.Provider as Provider
 import qualified Omni.Test as Test
 
+-- | Log levels for filtering events
+data LogLevel
+  = -- | Only start, complete, errors
+    LogInfo
+  | -- | + tool calls, thinking, iterations
+    LogDebug
+  | -- | + full API payloads
+    LogTrace
+  deriving (Show, Eq, Ord, Generic)
+
+-- | Structured agent events for logging/observability
+data AgentEvent
+  = -- | Agent execution started
+    EventStart
+      { eventModel :: Text,
+        eventProvider :: Text,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | New iteration starting
+    EventIteration
+      { eventIterationNum :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Assistant produced thinking/text
+    EventThinking
+      { eventContent :: Text,
+        eventIterationNum :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Tool call initiated
+    EventToolCall
+      { eventToolName :: Text,
+        eventToolArgs :: Aeson.Value,
+        eventIterationNum :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Tool call completed
+    EventToolResult
+      { eventToolName :: Text,
+        eventToolSuccess :: Bool,
+        eventToolOutput :: Text,
+        eventDurationMs :: Int,
+        eventIterationNum :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Cost update (emitted after each LLM call)
+    EventCost
+      { eventTokens :: Int, -- tokens this call
+        eventCostCents :: Double, -- cost this call
+        eventTotalTokens :: Int, -- cumulative tokens
+        eventTotalCostCents :: Double, -- cumulative cost
+        eventIterationNum :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Agent completed successfully
+    EventComplete
+      { eventIterations :: Int,
+        eventTotalCostCents :: Double,
+        eventTotalTokens :: Int,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Agent hit an error
+    EventError
+      { eventErrorMessage :: Text,
+        eventTimestamp :: Time.UTCTime
+      }
+  | -- | Guardrail triggered
+    EventGuardrail
+      { eventGuardrailResult :: GuardrailResult,
+        eventTimestamp :: Time.UTCTime
+      }
+  deriving (Show, Eq, Generic)
+
+-- | Convert an event to JSON for structured logging
+eventToJson :: AgentEvent -> Aeson.Value
+eventToJson = \case
+  EventStart model prov ts ->
+    Aeson.object
+      [ "type" .= ("start" :: Text),
+        "model" .= model,
+        "provider" .= prov,
+        "timestamp" .= ts
+      ]
+  EventIteration n ts ->
+    Aeson.object
+      [ "type" .= ("iteration" :: Text),
+        "iteration" .= n,
+        "timestamp" .= ts
+      ]
+  EventThinking content n ts ->
+    Aeson.object
+      [ "type" .= ("thinking" :: Text),
+        "content" .= content,
+        "iteration" .= n,
+        "timestamp" .= ts
+      ]
+  EventToolCall name args n ts ->
+    Aeson.object
+      [ "type" .= ("tool_call" :: Text),
+        "tool" .= name,
+        "args" .= args,
+        "iteration" .= n,
+        "timestamp" .= ts
+      ]
+  EventToolResult name success output durationMs n ts ->
+    Aeson.object
+      [ "type" .= ("tool_result" :: Text),
+        "tool" .= name,
+        "success" .= success,
+        "output" .= Text.take 1000 output, -- Truncate for logging
+        "duration_ms" .= durationMs,
+        "iteration" .= n,
+        "timestamp" .= ts
+      ]
+  EventCost tokens costCents totalToks totalCost iter ts ->
+    Aeson.object
+      [ "type" .= ("cost" :: Text),
+        "tokens" .= tokens,
+        "cost_cents" .= costCents,
+        "total_tokens" .= totalToks,
+        "total_cost_cents" .= totalCost,
+        "iteration" .= iter,
+        "timestamp" .= ts
+      ]
+  EventComplete iters cost tokens ts ->
+    Aeson.object
+      [ "type" .= ("complete" :: Text),
+        "iterations" .= iters,
+        "total_cost_cents" .= cost,
+        "total_tokens" .= tokens,
+        "timestamp" .= ts
+      ]
+  EventError msg ts ->
+    Aeson.object
+      [ "type" .= ("error" :: Text),
+        "message" .= msg,
+        "timestamp" .= ts
+      ]
+  EventGuardrail result ts ->
+    Aeson.object
+      [ "type" .= ("guardrail" :: Text),
+        "result" .= tshow result,
+        "timestamp" .= ts
+      ]
+
 main :: IO ()
 main = Test.run test
 
@@ -389,6 +546,9 @@ defaultAgentConfig =
 
 data EngineConfig = EngineConfig
   { engineLLM :: LLM,
+    -- | Unified structured event handler (new)
+    engineOnEvent :: AgentEvent -> IO (),
+    -- | Legacy callbacks (kept for backwards compatibility)
     engineOnCost :: Int -> Double -> IO (),
     engineOnActivity :: Text -> IO (),
     engineOnToolCall :: Text -> Text -> IO (),
@@ -404,6 +564,7 @@ defaultEngineConfig :: EngineConfig
 defaultEngineConfig =
   EngineConfig
     { engineLLM = defaultLLM,
+      engineOnEvent = \_ -> pure (),
       engineOnCost = \_ _ -> pure (),
       engineOnActivity = \_ -> pure (),
       engineOnToolCall = \_ _ -> pure (),
@@ -945,6 +1106,22 @@ estimateCost model tokens
   | "claude" `Text.isInfixOf` model = fromIntegral tokens * 0.9 / 1000
   | otherwise = fromIntegral tokens * 0.5 / 1000
 
+-- | Get provider name for logging
+getProviderName :: Provider.Provider -> Text
+getProviderName (Provider.OpenRouter _) = "openrouter"
+getProviderName (Provider.Ollama _) = "ollama"
+getProviderName (Provider.Anthropic _) = "anthropic"
+getProviderName (Provider.AnthropicOAuth _) = "anthropic-oauth"
+getProviderName (Provider.AmpCLI _) = "amp"
+
+-- | Get model name from provider
+getProviderModelName :: Provider.Provider -> Text
+getProviderModelName (Provider.OpenRouter cfg) = Provider.providerModel cfg
+getProviderModelName (Provider.Ollama cfg) = Provider.providerModel cfg
+getProviderModelName (Provider.Anthropic cfg) = Provider.providerModel cfg
+getProviderModelName (Provider.AnthropicOAuth cfg) = Provider.providerModel cfg
+getProviderModelName (Provider.AmpCLI _) = "amp"
+
 -- | Run agent with a Provider instead of LLM.
 -- This is the new preferred way to run agents with multiple backend support.
 runAgentWithProvider :: EngineConfig -> Provider.Provider -> AgentConfig -> Text -> IO (Either Text AgentResult)
@@ -955,7 +1132,12 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
       systemMsg = providerMessage Provider.System (agentSystemPrompt agentCfg)
       userMsg = providerMessage Provider.User userPrompt
       initialMessages = [systemMsg, userMsg]
+      modelName = getProviderModelName provider
+      providerName = getProviderName provider
 
+  -- Emit start event
+  now <- Time.getCurrentTime
+  engineOnEvent engineCfg <| EventStart modelName providerName now
   engineOnActivity engineCfg "Starting agent loop (Provider)"
   loopProvider provider toolApis toolMap initialMessages 0 0 0 0.0 Map.empty 0 0
   where
@@ -1008,10 +1190,14 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                     resultError = Just errMsg
                   }
             Nothing -> do
+              iterTs <- Time.getCurrentTime
+              engineOnEvent engineCfg <| EventIteration (iteration + 1) iterTs
               engineOnActivity engineCfg <| "Iteration " <> tshow (iteration + 1)
               result <- Provider.chatWithUsage prov toolApis' msgs
               case result of
                 Left err -> do
+                  errTs <- Time.getCurrentTime
+                  engineOnEvent engineCfg <| EventError err errTs
                   engineOnError engineCfg err
                   pure (Left err)
                 Right chatRes -> do
@@ -1020,12 +1206,16 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                       cost = case Provider.chatUsage chatRes +> Provider.usageCost of
                         Just actualCost -> actualCost * 100
                         Nothing -> estimateCost (getProviderModel prov) tokens
-                  engineOnCost engineCfg tokens cost
-                  let newTokens = totalTokens + tokens
+                      newTokens = totalTokens + tokens
                       newCost = totalCost + cost
+                  costTs <- Time.getCurrentTime
+                  engineOnEvent engineCfg <| EventCost tokens cost newTokens newCost (iteration + 1) costTs
+                  engineOnCost engineCfg tokens cost
                   let assistantText = Provider.msgContent msg
-                  unless (Text.null assistantText)
-                    <| engineOnAssistant engineCfg assistantText
+                  unless (Text.null assistantText) <| do
+                    thinkTs <- Time.getCurrentTime
+                    engineOnEvent engineCfg <| EventThinking assistantText (iteration + 1) thinkTs
+                    engineOnAssistant engineCfg assistantText
                   case Provider.msgToolCalls msg of
                     Nothing
                       | Text.null (Text.strip (Provider.msgContent msg)) && totalCalls > 0 -> do
@@ -1034,6 +1224,8 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                               newMsgs = msgs <> [msg, promptMsg]
                           loopProvider prov toolApis' toolMap newMsgs (iteration + 1) totalCalls newTokens newCost toolCallCounts testFailures editFailures
                       | otherwise -> do
+                          completeTs <- Time.getCurrentTime
+                          engineOnEvent engineCfg <| EventComplete (iteration + 1) newCost newTokens completeTs
                           engineOnActivity engineCfg "Agent completed"
                           engineOnComplete engineCfg
                           pure
@@ -1047,6 +1239,8 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                                 resultError = Nothing
                               }
                     Just [] -> do
+                      completeTs2 <- Time.getCurrentTime
+                      engineOnEvent engineCfg <| EventComplete (iteration + 1) newCost newTokens completeTs2
                       engineOnActivity engineCfg "Agent completed (empty tool calls)"
                       engineOnComplete engineCfg
                       pure
@@ -1060,7 +1254,7 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                             resultError = Nothing
                           }
                     Just tcs -> do
-                      (toolResults, newTestFailures, newEditFailures) <- executeProviderToolCalls engineCfg toolMap tcs testFailures editFailures
+                      (toolResults, newTestFailures, newEditFailures) <- executeProviderToolCalls engineCfg toolMap tcs (iteration + 1) testFailures editFailures
                       let newMsgs = msgs <> [msg] <> toolResults
                           newCalls = totalCalls + length tcs
                           newToolCallCounts = updateProviderToolCallCounts toolCallCounts tcs
@@ -1077,9 +1271,9 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
     updateProviderToolCallCounts =
       foldr (\tc m -> Map.insertWith (+) (Provider.fcName (Provider.tcFunction tc)) 1 m)
 
-    executeProviderToolCalls :: EngineConfig -> Map.Map Text Tool -> [Provider.ToolCall] -> Int -> Int -> IO ([Provider.Message], Int, Int)
-    executeProviderToolCalls eCfg tMap tcs initialTestFailures initialEditFailures = do
-      results <- traverse (executeSingleProvider eCfg tMap) tcs
+    executeProviderToolCalls :: EngineConfig -> Map.Map Text Tool -> [Provider.ToolCall] -> Int -> Int -> Int -> IO ([Provider.Message], Int, Int)
+    executeProviderToolCalls eCfg tMap tcs iterNum initialTestFailures initialEditFailures = do
+      results <- traverse (executeSingleProvider eCfg tMap iterNum) tcs
       let msgs = map (\(m, _, _) -> m) results
           testDeltas = map (\(_, t, _) -> t) results
           editDeltas = map (\(_, _, e) -> e) results
@@ -1087,24 +1281,32 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
           totalEditFail = initialEditFailures + sum editDeltas
       pure (msgs, totalTestFail, totalEditFail)
 
-    executeSingleProvider :: EngineConfig -> Map.Map Text Tool -> Provider.ToolCall -> IO (Provider.Message, Int, Int)
-    executeSingleProvider eCfg tMap tc = do
+    executeSingleProvider :: EngineConfig -> Map.Map Text Tool -> Int -> Provider.ToolCall -> IO (Provider.Message, Int, Int)
+    executeSingleProvider eCfg tMap iterNum tc = do
       let name = Provider.fcName (Provider.tcFunction tc)
           argsText = Provider.fcArguments (Provider.tcFunction tc)
           callId = Provider.tcId tc
           -- Handle empty args as empty object (for no-argument tools)
           normalizedArgs = if Text.null (Text.strip argsText) then "{}" else argsText
+      -- Emit tool call event
+      callTs <- Time.getCurrentTime
+      let argsJson = fromMaybe Aeson.Null (Aeson.decode (BL.fromStrict (TE.encodeUtf8 normalizedArgs)))
+      engineOnEvent eCfg <| EventToolCall name argsJson iterNum callTs
       engineOnActivity eCfg <| "Executing tool: " <> name
       engineOnToolCall eCfg name argsText
       case Map.lookup name tMap of
         Nothing -> do
           let errMsg = "Tool not found: " <> name
+          resultTs <- Time.getCurrentTime
+          engineOnEvent eCfg <| EventToolResult name False errMsg 0 iterNum resultTs
           engineOnToolResult eCfg name False errMsg
           pure (Provider.Message Provider.ToolRole errMsg Nothing (Just callId), 0, 0)
         Just tool -> do
           case Aeson.decode (BL.fromStrict (TE.encodeUtf8 normalizedArgs)) of
             Nothing -> do
               let errMsg = "Invalid JSON arguments: " <> argsText
+              resultTs <- Time.getCurrentTime
+              engineOnEvent eCfg <| EventToolResult name False errMsg 0 iterNum resultTs
               engineOnToolResult eCfg name False errMsg
               pure (Provider.Message Provider.ToolRole errMsg Nothing (Just callId), 0, 0)
             Just args -> do
@@ -1118,6 +1320,7 @@ runAgentWithProvider engineCfg provider agentCfg userPrompt = do
                   testDelta = if isTestFailure then 1 else 0
                   isEditFailure = name == "edit_file" && isOldStrNotFoundProvider resultValue
                   editDelta = if isEditFailure then 1 else 0
+              engineOnEvent eCfg <| EventToolResult name True resultText durationMs iterNum endTime
               engineOnToolResult eCfg name True resultText
               _ <- engineOnToolTrace eCfg name argsText resultText durationMs
               pure (Provider.Message Provider.ToolRole resultText Nothing (Just callId), testDelta, editDelta)