commit a6626addb39b5febf1cf60ec61cb7bdd397567ef
Author: Coder Agent <coder@agents.omni>
Date: Thu Feb 19 09:06:10 2026
Research hydration source provider abstraction
Task-Id: t-500
diff --git a/Omni/Agent/Prompt/Hydrate.hs b/Omni/Agent/Prompt/Hydrate.hs
index 2c56da37..bfcc5ad3 100644
--- a/Omni/Agent/Prompt/Hydrate.hs
+++ b/Omni/Agent/Prompt/Hydrate.hs
@@ -16,6 +16,9 @@
-- 5. Optionally retrieve knowledge (long-term memories)
-- 6. Assemble into PromptIR with budget enforcement
--
+-- Provider abstraction research/design (t-500):
+-- Omni/Agent/Prompt/HydrationProviderDesign.md
+--
-- : out prompt-hydrate
-- : dep aeson
-- : dep time
@@ -32,6 +35,17 @@ module Omni.Agent.Prompt.Hydrate
SemanticResult (..),
KnowledgeResult (..),
+ -- * Provider Abstraction (t-500)
+ SourceKind (..),
+ SourceCapability (..),
+ ProviderAuth (..),
+ ProviderConfig (..),
+ SourceProvider (..),
+ defaultProviderConfig,
+ providerSupports,
+ withSourceProvider,
+ withSourceProviders,
+
-- * Hydration
hydrate,
hydrateWithObservation,
@@ -55,6 +69,8 @@ module Omni.Agent.Prompt.Hydrate
where
import Alpha
+import Data.Aeson (Value)
+import qualified Data.Map.Strict as Map
import qualified Data.Text as Text
import qualified Data.Time as Time
import Omni.Agent.Prompt.IR hiding (main, test)
@@ -125,6 +141,120 @@ data KnowledgeResult = KnowledgeResult
}
deriving (Show, Eq, Generic)
+-- * Provider Abstraction (t-500)
+
+-- | Recommendation from t-500 research:
+--
+-- Introduce a thin provider abstraction now, while keeping hydration callsites
+-- and the existing 'HydrationConfig' shape simple.
+--
+-- Why this tradeoff:
+-- - Useful once we have more than one backend for a source kind
+-- (Memory.db, code search, docs RAG, shared memories)
+-- - Avoids a broad rewrite because providers adapt back into existing
+-- 'ContextSource' hooks
+-- - Enables explicit capabilities, identity routing, and auth strategy
+--
+-- Proposed migration path:
+-- 1. Wrap existing Memory.db sources as a 'SourceProvider'
+-- 2. Use 'withSourceProvider'/'withSourceProviders' to populate
+-- 'HydrationConfig'
+-- 3. Add additional providers incrementally
+-- 4. Add routing/fallback policy as a follow-up once multiple providers exist
+--
+-- Identity/auth model:
+-- - 'pcIdentity': non-secret routing keys (owner/chat/thread/tenant)
+-- - 'pcAuth': secret acquisition strategy
+-- - 'pcOptions': provider-specific knobs
+
+-- | Hydration source kinds that a provider may support.
+data SourceKind
+ = SourceTemporalKind
+ | SourceSemanticKind
+ | SourceKnowledgeKind
+ deriving (Show, Eq, Ord, Generic)
+
+-- | Machine-readable capability declaration for routing/planning.
+data SourceCapability = SourceCapability
+ { scKind :: SourceKind,
+ scSupportsThreshold :: Bool,
+ scSupportsMaxTokens :: Bool,
+ scSupportsMaxItems :: Bool
+ }
+ deriving (Show, Eq, Generic)
+
+-- | How a provider obtains credentials.
+data ProviderAuth
+ = ProviderAuthNone
+ | ProviderAuthEnv Text
+ | ProviderAuthIdentityKey Text
+ | ProviderAuthOpaque (Map.Map Text Text)
+ deriving (Show, Eq, Generic)
+
+-- | Runtime config passed to provider builders.
+data ProviderConfig = ProviderConfig
+ { pcIdentity :: Map.Map Text Text,
+ pcOptions :: Map.Map Text Value,
+ pcAuth :: ProviderAuth
+ }
+ deriving (Show, Eq, Generic)
+
+-- | Backend provider that can construct one or more source kinds.
+--
+-- Granularity decision (t-500): one provider per backend, with per-source
+-- optional builders. This keeps registration unified while avoiding fake
+-- implementations for unsupported source kinds.
+data SourceProvider = SourceProvider
+ { spName :: Text,
+ spCapabilities :: [SourceCapability],
+ spBuildTemporal :: Maybe (ProviderConfig -> ContextSource TemporalResult),
+ spBuildSemantic :: Maybe (ProviderConfig -> ContextSource SemanticResult),
+ spBuildKnowledge :: Maybe (ProviderConfig -> ContextSource KnowledgeResult)
+ }
+ deriving (Generic)
+
+-- | Empty/default provider config.
+defaultProviderConfig :: ProviderConfig
+defaultProviderConfig =
+ ProviderConfig
+ { pcIdentity = Map.empty,
+ pcOptions = Map.empty,
+ pcAuth = ProviderAuthNone
+ }
+
+-- | True if the provider both declares and implements a source kind.
+providerSupports :: SourceProvider -> SourceKind -> Bool
+providerSupports provider kind =
+ let hasCapability = any ((== kind) <. scKind) (spCapabilities provider)
+ hasBuilder = case kind of
+ SourceTemporalKind -> isJust (spBuildTemporal provider)
+ SourceSemanticKind -> isJust (spBuildSemantic provider)
+ SourceKnowledgeKind -> isJust (spBuildKnowledge provider)
+ in hasCapability && hasBuilder
+
+-- | Materialize a provider into a HydrationConfig.
+--
+-- Existing explicit sources in the config win. Provider sources are used only
+-- for missing source kinds, allowing phased migration without behavior changes.
+withSourceProvider :: ProviderConfig -> SourceProvider -> HydrationConfig -> HydrationConfig
+withSourceProvider providerCfg provider cfg =
+ let temporal = (\build -> build providerCfg) </ spBuildTemporal provider
+ semantic = (\build -> build providerCfg) </ spBuildSemantic provider
+ knowledge = (\build -> build providerCfg) </ spBuildKnowledge provider
+ in cfg
+ { hcTemporalSource = hcTemporalSource cfg <|> temporal,
+ hcSemanticSource = hcSemanticSource cfg <|> semantic,
+ hcKnowledgeSource = hcKnowledgeSource cfg <|> knowledge
+ }
+
+-- | Apply multiple providers in order.
+--
+-- Earlier providers have higher precedence because 'withSourceProvider'
+-- preserves any already-populated source.
+withSourceProviders :: ProviderConfig -> [SourceProvider] -> HydrationConfig -> HydrationConfig
+withSourceProviders providerCfg providers cfg =
+ foldl' (\acc provider -> withSourceProvider providerCfg provider acc) cfg providers
+
-- | Default hydration config with no sources.
defaultHydrationConfig :: HydrationConfig
defaultHydrationConfig =
@@ -593,5 +723,102 @@ test =
let time = Time.UTCTime (Time.fromGregorian 2026 1 24) 0
matches = [(time, "assistant", "Previous answer", 0.75)]
formatted = formatSemanticMatches matches
- Text.isInfixOf "75%" formatted Test.@=? True
+ Text.isInfixOf "75%" formatted Test.@=? True,
+ Test.unit "providerSupports requires both capability and builder" <| do
+ let providerWithBuilder =
+ SourceProvider
+ { spName = "memory",
+ spCapabilities =
+ [ SourceCapability
+ { scKind = SourceTemporalKind,
+ scSupportsThreshold = False,
+ scSupportsMaxTokens = True,
+ scSupportsMaxItems = True
+ }
+ ],
+ spBuildTemporal = Just (\_ -> ContextSource temporalStub),
+ spBuildSemantic = Nothing,
+ spBuildKnowledge = Nothing
+ }
+ providerWithoutBuilder =
+ providerWithBuilder
+ { spName = "declares-only",
+ spBuildTemporal = Nothing
+ }
+ temporalStub _params = do
+ now <- Time.getCurrentTime
+ pure
+ TemporalResult
+ { trMessages = [(now, "user", "tester", "stub")],
+ trTotalTokens = 1,
+ trContextWarning = Nothing
+ }
+ providerSupports providerWithBuilder SourceTemporalKind Test.@=? True
+ providerSupports providerWithoutBuilder SourceTemporalKind Test.@=? False,
+ Test.unit "withSourceProvider preserves explicit source and fills missing ones" <| do
+ let explicitTemporal =
+ ContextSource
+ <| \_params -> do
+ now <- Time.getCurrentTime
+ pure
+ TemporalResult
+ { trMessages = [(now, "user", "explicit", "explicit temporal")],
+ trTotalTokens = 3,
+ trContextWarning = Just "explicit"
+ }
+ providerTemporal =
+ ContextSource
+ <| \_params -> do
+ now <- Time.getCurrentTime
+ pure
+ TemporalResult
+ { trMessages = [(now, "user", "provider", "provider temporal")],
+ trTotalTokens = 2,
+ trContextWarning = Just "provider"
+ }
+ providerSemantic =
+ ContextSource
+ <| \_params -> do
+ now <- Time.getCurrentTime
+ pure
+ SemanticResult
+ { srMatches = [(now, "assistant", "provider semantic", 0.9)],
+ srTotalTokens = 4
+ }
+ provider =
+ SourceProvider
+ { spName = "memory",
+ spCapabilities =
+ [ SourceCapability SourceTemporalKind False True True,
+ SourceCapability SourceSemanticKind True True True
+ ],
+ spBuildTemporal = Just (\_ -> providerTemporal),
+ spBuildSemantic = Just (\_ -> providerSemantic),
+ spBuildKnowledge = Nothing
+ }
+ cfg =
+ withSourceProvider defaultProviderConfig provider
+ <| defaultHydrationConfig
+ { hcTemporalSource = Just explicitTemporal
+ }
+ params =
+ SourceParams
+ { spObservation = "obs",
+ spMaxTokens = 100,
+ spMaxItems = 5,
+ spThreshold = 0.5
+ }
+ -- Existing temporal source should win over provider source
+ case hcTemporalSource cfg of
+ Nothing -> Test.assertFailure "Expected temporal source"
+ Just src -> do
+ result <- runSource src params
+ trContextWarning result Test.@=? Just "explicit"
+
+ -- Missing semantic source should be filled by provider
+ case hcSemanticSource cfg of
+ Nothing -> Test.assertFailure "Expected semantic source"
+ Just src -> do
+ result <- runSource src params
+ length (srMatches result) Test.@=? 1
]
diff --git a/Omni/Agent/Prompt/HydrationProviderDesign.md b/Omni/Agent/Prompt/HydrationProviderDesign.md
new file mode 100644
index 00000000..8c187564
--- /dev/null
+++ b/Omni/Agent/Prompt/HydrationProviderDesign.md
@@ -0,0 +1,94 @@
+# Hydration Source Provider Abstraction (t-500)
+
+## Recommendation
+
+**Implement a provider abstraction, but keep it thin and adapter-based.**
+
+Rationale:
+- We already have three source kinds (`temporal`, `semantic`, `knowledge`).
+- Near-term roadmap includes multiple backends per kind (Memory.db, code search, docs RAG, shared agent memory).
+- Without a provider layer, every new backend expands `HydrationConfig` wiring and call-site complexity.
+- A thin provider abstraction gives extensibility now without rewriting hydration internals.
+
+## Answers to Research Questions
+
+### 1) Is the provider abstraction worth the complexity?
+
+**Yes, with scope control.**
+
+Worth it when:
+- More than one backend can satisfy the same source kind.
+- Backends differ in auth/identity requirements.
+- We want routing/fallback policy independent of backend implementations.
+
+Not worth it if we attempted a full framework rewrite up front. The right shape is a **small adapter layer** over existing `ContextSource` hooks.
+
+### 2) Right granularity: per-source-type or unified?
+
+**Unified provider registration + per-source optional builders.**
+
+This gives one backend object (`SourceProvider`) while allowing backend-specific support:
+- Memory provider: temporal + semantic + knowledge
+- Code search provider: semantic only
+- Docs RAG provider: semantic + knowledge
+
+This avoids fake/no-op implementations while keeping provider discovery and routing uniform.
+
+### 3) How do providers declare capabilities?
+
+Use explicit, machine-readable capabilities (`SourceCapability`) attached to each provider:
+- which source kind is supported
+- whether thresholds/max-items/max-tokens are honored
+
+This prevents brittle name-based logic and allows future policy selection.
+
+### 4) How does identity/auth work across providers?
+
+Split config by responsibility:
+- `pcIdentity`: non-secret routing context (owner/chat/thread/tenant)
+- `pcAuth`: credential acquisition mode (env ref, identity key, opaque mapping)
+- `pcOptions`: provider-specific tuning knobs
+
+This keeps secrets out of prompts and supports heterogeneous backends.
+
+### 5) Patterns to borrow from LangChain/LlamaIndex?
+
+Borrow selectively:
+- **Retriever-like abstraction** per backend
+- **Capability metadata** for routing/fallback
+- **Backend-agnostic query params** + backend-specific options
+
+Avoid importing framework-level complexity (global callback stacks, deeply nested runnables, heavy object graphs).
+
+## Proposed Types (implemented in `Omni/Agent/Prompt/Hydrate.hs`)
+
+- `SourceKind`
+- `SourceCapability`
+- `ProviderAuth`
+- `ProviderConfig`
+- `SourceProvider`
+- `withSourceProvider`
+- `withSourceProviders`
+
+These adapt providers into existing fields:
+- `hcTemporalSource`
+- `hcSemanticSource`
+- `hcKnowledgeSource`
+
+No change required to `hydrate` execution semantics.
+
+## Migration Path
+
+1. **Wrap current Memory.db sources** as a `SourceProvider`.
+2. **Populate HydrationConfig via `withSourceProvider(s)`** (existing explicit sources still win).
+3. Add second provider (e.g., code search semantic provider).
+4. Add routing/fallback policy only after multi-provider behavior is exercised in production.
+5. If needed later, deprecate direct source fields in favor of provider-first config.
+
+## Decision Summary
+
+- ✅ Provider abstraction: **yes**
+- ✅ Granularity: **unified provider + per-source optional builders**
+- ✅ Capabilities: **explicit structured metadata**
+- ✅ Identity/auth: **split identity from secret acquisition**
+- ✅ External inspiration: **borrow retriever/capability ideas, avoid framework bloat**