← Back to task

Commit a6626add

commit a6626addb39b5febf1cf60ec61cb7bdd397567ef
Author: Coder Agent <coder@agents.omni>
Date:   Thu Feb 19 09:06:10 2026

    Research hydration source provider abstraction
    
    Task-Id: t-500

diff --git a/Omni/Agent/Prompt/Hydrate.hs b/Omni/Agent/Prompt/Hydrate.hs
index 2c56da37..bfcc5ad3 100644
--- a/Omni/Agent/Prompt/Hydrate.hs
+++ b/Omni/Agent/Prompt/Hydrate.hs
@@ -16,6 +16,9 @@
 -- 5. Optionally retrieve knowledge (long-term memories)
 -- 6. Assemble into PromptIR with budget enforcement
 --
+-- Provider abstraction research/design (t-500):
+--   Omni/Agent/Prompt/HydrationProviderDesign.md
+--
 -- : out prompt-hydrate
 -- : dep aeson
 -- : dep time
@@ -32,6 +35,17 @@ module Omni.Agent.Prompt.Hydrate
     SemanticResult (..),
     KnowledgeResult (..),
 
+    -- * Provider Abstraction (t-500)
+    SourceKind (..),
+    SourceCapability (..),
+    ProviderAuth (..),
+    ProviderConfig (..),
+    SourceProvider (..),
+    defaultProviderConfig,
+    providerSupports,
+    withSourceProvider,
+    withSourceProviders,
+
     -- * Hydration
     hydrate,
     hydrateWithObservation,
@@ -55,6 +69,8 @@ module Omni.Agent.Prompt.Hydrate
 where
 
 import Alpha
+import Data.Aeson (Value)
+import qualified Data.Map.Strict as Map
 import qualified Data.Text as Text
 import qualified Data.Time as Time
 import Omni.Agent.Prompt.IR hiding (main, test)
@@ -125,6 +141,120 @@ data KnowledgeResult = KnowledgeResult
   }
   deriving (Show, Eq, Generic)
 
+-- * Provider Abstraction (t-500)
+
+-- | Recommendation from t-500 research:
+--
+-- Introduce a thin provider abstraction now, while keeping hydration callsites
+-- and the existing 'HydrationConfig' shape simple.
+--
+-- Why this tradeoff:
+-- - Useful once we have more than one backend for a source kind
+--   (Memory.db, code search, docs RAG, shared memories)
+-- - Avoids a broad rewrite because providers adapt back into existing
+--   'ContextSource' hooks
+-- - Enables explicit capabilities, identity routing, and auth strategy
+--
+-- Proposed migration path:
+-- 1. Wrap existing Memory.db sources as a 'SourceProvider'
+-- 2. Use 'withSourceProvider'/'withSourceProviders' to populate
+--    'HydrationConfig'
+-- 3. Add additional providers incrementally
+-- 4. Add routing/fallback policy as a follow-up once multiple providers exist
+--
+-- Identity/auth model:
+-- - 'pcIdentity': non-secret routing keys (owner/chat/thread/tenant)
+-- - 'pcAuth': secret acquisition strategy
+-- - 'pcOptions': provider-specific knobs
+
+-- | Hydration source kinds that a provider may support.
+data SourceKind
+  = SourceTemporalKind
+  | SourceSemanticKind
+  | SourceKnowledgeKind
+  deriving (Show, Eq, Ord, Generic)
+
+-- | Machine-readable capability declaration for routing/planning.
+data SourceCapability = SourceCapability
+  { scKind :: SourceKind,
+    scSupportsThreshold :: Bool,
+    scSupportsMaxTokens :: Bool,
+    scSupportsMaxItems :: Bool
+  }
+  deriving (Show, Eq, Generic)
+
+-- | How a provider obtains credentials.
+data ProviderAuth
+  = ProviderAuthNone
+  | ProviderAuthEnv Text
+  | ProviderAuthIdentityKey Text
+  | ProviderAuthOpaque (Map.Map Text Text)
+  deriving (Show, Eq, Generic)
+
+-- | Runtime config passed to provider builders.
+data ProviderConfig = ProviderConfig
+  { pcIdentity :: Map.Map Text Text,
+    pcOptions :: Map.Map Text Value,
+    pcAuth :: ProviderAuth
+  }
+  deriving (Show, Eq, Generic)
+
+-- | Backend provider that can construct one or more source kinds.
+--
+-- Granularity decision (t-500): one provider per backend, with per-source
+-- optional builders. This keeps registration unified while avoiding fake
+-- implementations for unsupported source kinds.
+data SourceProvider = SourceProvider
+  { spName :: Text,
+    spCapabilities :: [SourceCapability],
+    spBuildTemporal :: Maybe (ProviderConfig -> ContextSource TemporalResult),
+    spBuildSemantic :: Maybe (ProviderConfig -> ContextSource SemanticResult),
+    spBuildKnowledge :: Maybe (ProviderConfig -> ContextSource KnowledgeResult)
+  }
+  deriving (Generic)
+
+-- | Empty/default provider config.
+defaultProviderConfig :: ProviderConfig
+defaultProviderConfig =
+  ProviderConfig
+    { pcIdentity = Map.empty,
+      pcOptions = Map.empty,
+      pcAuth = ProviderAuthNone
+    }
+
+-- | True if the provider both declares and implements a source kind.
+providerSupports :: SourceProvider -> SourceKind -> Bool
+providerSupports provider kind =
+  let hasCapability = any ((== kind) <. scKind) (spCapabilities provider)
+      hasBuilder = case kind of
+        SourceTemporalKind -> isJust (spBuildTemporal provider)
+        SourceSemanticKind -> isJust (spBuildSemantic provider)
+        SourceKnowledgeKind -> isJust (spBuildKnowledge provider)
+   in hasCapability && hasBuilder
+
+-- | Materialize a provider into a HydrationConfig.
+--
+-- Existing explicit sources in the config win. Provider sources are used only
+-- for missing source kinds, allowing phased migration without behavior changes.
+withSourceProvider :: ProviderConfig -> SourceProvider -> HydrationConfig -> HydrationConfig
+withSourceProvider providerCfg provider cfg =
+  let temporal = (\build -> build providerCfg) </ spBuildTemporal provider
+      semantic = (\build -> build providerCfg) </ spBuildSemantic provider
+      knowledge = (\build -> build providerCfg) </ spBuildKnowledge provider
+   in cfg
+        { hcTemporalSource = hcTemporalSource cfg <|> temporal,
+          hcSemanticSource = hcSemanticSource cfg <|> semantic,
+          hcKnowledgeSource = hcKnowledgeSource cfg <|> knowledge
+        }
+
+-- | Apply multiple providers in order.
+--
+-- Earlier providers have higher precedence because 'withSourceProvider'
+-- preserves any already-populated source.
+withSourceProviders :: ProviderConfig -> [SourceProvider] -> HydrationConfig -> HydrationConfig
+withSourceProviders providerCfg providers cfg =
+  foldl' (\acc provider -> withSourceProvider providerCfg provider acc) cfg providers
+
 -- | Default hydration config with no sources.
 defaultHydrationConfig :: HydrationConfig
 defaultHydrationConfig =
@@ -593,5 +723,102 @@ test =
         let time = Time.UTCTime (Time.fromGregorian 2026 1 24) 0
             matches = [(time, "assistant", "Previous answer", 0.75)]
             formatted = formatSemanticMatches matches
-        Text.isInfixOf "75%" formatted Test.@=? True
+        Text.isInfixOf "75%" formatted Test.@=? True,
+      Test.unit "providerSupports requires both capability and builder" <| do
+        let providerWithBuilder =
+              SourceProvider
+                { spName = "memory",
+                  spCapabilities =
+                    [ SourceCapability
+                        { scKind = SourceTemporalKind,
+                          scSupportsThreshold = False,
+                          scSupportsMaxTokens = True,
+                          scSupportsMaxItems = True
+                        }
+                    ],
+                  spBuildTemporal = Just (\_ -> ContextSource temporalStub),
+                  spBuildSemantic = Nothing,
+                  spBuildKnowledge = Nothing
+                }
+            providerWithoutBuilder =
+              providerWithBuilder
+                { spName = "declares-only",
+                  spBuildTemporal = Nothing
+                }
+            temporalStub _params = do
+              now <- Time.getCurrentTime
+              pure
+                TemporalResult
+                  { trMessages = [(now, "user", "tester", "stub")],
+                    trTotalTokens = 1,
+                    trContextWarning = Nothing
+                  }
+        providerSupports providerWithBuilder SourceTemporalKind Test.@=? True
+        providerSupports providerWithoutBuilder SourceTemporalKind Test.@=? False,
+      Test.unit "withSourceProvider preserves explicit source and fills missing ones" <| do
+        let explicitTemporal =
+              ContextSource
+                <| \_params -> do
+                  now <- Time.getCurrentTime
+                  pure
+                    TemporalResult
+                      { trMessages = [(now, "user", "explicit", "explicit temporal")],
+                        trTotalTokens = 3,
+                        trContextWarning = Just "explicit"
+                      }
+            providerTemporal =
+              ContextSource
+                <| \_params -> do
+                  now <- Time.getCurrentTime
+                  pure
+                    TemporalResult
+                      { trMessages = [(now, "user", "provider", "provider temporal")],
+                        trTotalTokens = 2,
+                        trContextWarning = Just "provider"
+                      }
+            providerSemantic =
+              ContextSource
+                <| \_params -> do
+                  now <- Time.getCurrentTime
+                  pure
+                    SemanticResult
+                      { srMatches = [(now, "assistant", "provider semantic", 0.9)],
+                        srTotalTokens = 4
+                      }
+            provider =
+              SourceProvider
+                { spName = "memory",
+                  spCapabilities =
+                    [ SourceCapability SourceTemporalKind False True True,
+                      SourceCapability SourceSemanticKind True True True
+                    ],
+                  spBuildTemporal = Just (\_ -> providerTemporal),
+                  spBuildSemantic = Just (\_ -> providerSemantic),
+                  spBuildKnowledge = Nothing
+                }
+            cfg =
+              withSourceProvider defaultProviderConfig provider
+                <| defaultHydrationConfig
+                { hcTemporalSource = Just explicitTemporal
+                }
+            params =
+              SourceParams
+                { spObservation = "obs",
+                  spMaxTokens = 100,
+                  spMaxItems = 5,
+                  spThreshold = 0.5
+                }
+        -- Existing temporal source should win over provider source
+        case hcTemporalSource cfg of
+          Nothing -> Test.assertFailure "Expected temporal source"
+          Just src -> do
+            result <- runSource src params
+            trContextWarning result Test.@=? Just "explicit"
+
+        -- Missing semantic source should be filled by provider
+        case hcSemanticSource cfg of
+          Nothing -> Test.assertFailure "Expected semantic source"
+          Just src -> do
+            result <- runSource src params
+            length (srMatches result) Test.@=? 1
     ]
diff --git a/Omni/Agent/Prompt/HydrationProviderDesign.md b/Omni/Agent/Prompt/HydrationProviderDesign.md
new file mode 100644
index 00000000..8c187564
--- /dev/null
+++ b/Omni/Agent/Prompt/HydrationProviderDesign.md
@@ -0,0 +1,94 @@
+# Hydration Source Provider Abstraction (t-500)
+
+## Recommendation
+
+**Implement a provider abstraction, but keep it thin and adapter-based.**
+
+Rationale:
+- We already have three source kinds (`temporal`, `semantic`, `knowledge`).
+- Near-term roadmap includes multiple backends per kind (Memory.db, code search, docs RAG, shared agent memory).
+- Without a provider layer, every new backend expands `HydrationConfig` wiring and call-site complexity.
+- A thin provider abstraction gives extensibility now without rewriting hydration internals.
+
+## Answers to Research Questions
+
+### 1) Is the provider abstraction worth the complexity?
+
+**Yes, with scope control.**
+
+Worth it when:
+- More than one backend can satisfy the same source kind.
+- Backends differ in auth/identity requirements.
+- We want routing/fallback policy independent of backend implementations.
+
+Not worth it if we attempted a full framework rewrite up front. The right shape is a **small adapter layer** over existing `ContextSource` hooks.
+
+### 2) Right granularity: per-source-type or unified?
+
+**Unified provider registration + per-source optional builders.**
+
+This gives one backend object (`SourceProvider`) while allowing backend-specific support:
+- Memory provider: temporal + semantic + knowledge
+- Code search provider: semantic only
+- Docs RAG provider: semantic + knowledge
+
+This avoids fake/no-op implementations while keeping provider discovery and routing uniform.
+
+### 3) How do providers declare capabilities?
+
+Use explicit, machine-readable capabilities (`SourceCapability`) attached to each provider:
+- which source kind is supported
+- whether thresholds/max-items/max-tokens are honored
+
+This prevents brittle name-based logic and allows future policy selection.
+
+### 4) How does identity/auth work across providers?
+
+Split config by responsibility:
+- `pcIdentity`: non-secret routing context (owner/chat/thread/tenant)
+- `pcAuth`: credential acquisition mode (env ref, identity key, opaque mapping)
+- `pcOptions`: provider-specific tuning knobs
+
+This keeps secrets out of prompts and supports heterogeneous backends.
+
+### 5) Patterns to borrow from LangChain/LlamaIndex?
+
+Borrow selectively:
+- **Retriever-like abstraction** per backend
+- **Capability metadata** for routing/fallback
+- **Backend-agnostic query params** + backend-specific options
+
+Avoid importing framework-level complexity (global callback stacks, deeply nested runnables, heavy object graphs).
+
+## Proposed Types (implemented in `Omni/Agent/Prompt/Hydrate.hs`)
+
+- `SourceKind`
+- `SourceCapability`
+- `ProviderAuth`
+- `ProviderConfig`
+- `SourceProvider`
+- `withSourceProvider`
+- `withSourceProviders`
+
+These adapt providers into existing fields:
+- `hcTemporalSource`
+- `hcSemanticSource`
+- `hcKnowledgeSource`
+
+No change required to `hydrate` execution semantics.
+
+## Migration Path
+
+1. **Wrap current Memory.db sources** as a `SourceProvider`.
+2. **Populate HydrationConfig via `withSourceProvider(s)`** (existing explicit sources still win).
+3. Add second provider (e.g., code search semantic provider).
+4. Add routing/fallback policy only after multi-provider behavior is exercised in production.
+5. If needed later, deprecate direct source fields in favor of provider-first config.
+
+## Decision Summary
+
+- ✅ Provider abstraction: **yes**
+- ✅ Granularity: **unified provider + per-source optional builders**
+- ✅ Capabilities: **explicit structured metadata**
+- ✅ Identity/auth: **split identity from secret acquisition**
+- ✅ External inspiration: **borrow retriever/capability ideas, avoid framework bloat**