Create CRDT State Module

t-369.6·WorkTask·
·
·
·Omni/Agent.hs
Parent:t-369·Created1 month ago·Updated1 month ago

Dependencies

Description

Edit

Create Omni/Agent/State/CRDT.hs - state strategy for conflict-free merging.

Context

When running parallel agent branches, each branch may modify state independently. CRDT (Conflict-free Replicated Data Types) allow merging without coordination.

This is ideal for coding tasks where branches might discover facts or modify files independently.

Read Omni/Agent/ARCHITECTURE.md for full design rationale.

Deliverables

Create Omni/Agent/State/CRDT.hs containing:

1. CRDT Typeclass

-- | Types that can be merged without coordination
class Semigroup a => CRDT a where
  -- Semigroup's (<>) must be:
  -- - Associative: (a <> b) <> c = a <> (b <> c)
  -- - Commutative: a <> b = b <> a
  -- - Idempotent: a <> a = a
  
  -- Default implementation just uses Semigroup
  merge :: a -> a -> a
  merge = (<>)

2. Common CRDT Types

-- | G-Set: Grow-only set (only additions)
newtype GSet a = GSet (Set a)
  deriving (Show, Eq)

instance Ord a => Semigroup (GSet a) where
  GSet a <> GSet b = GSet (Set.union a b)

instance Ord a => CRDT (GSet a)

-- | LWW: Last-Writer-Wins register
data LWW a = LWW 
  { lwwValue :: a
  , lwwTimestamp :: UTCTime
  }
  deriving (Show, Eq)

instance Semigroup (LWW a) where
  a <> b = if lwwTimestamp a >= lwwTimestamp b then a else b

instance CRDT (LWW a)

-- | LWWMap: Map with LWW semantics per key
newtype LWWMap k v = LWWMap (Map k (LWW v))
  deriving (Show, Eq)

instance Ord k => Semigroup (LWWMap k v) where
  LWWMap a <> LWWMap b = LWWMap (Map.unionWith (<>) a b)

instance Ord k => CRDT (LWWMap k v)

-- | Counter: Increment-only counter (per-actor)
newtype GCounter = GCounter (Map ActorId Int)
  deriving (Show, Eq)

instance Semigroup GCounter where
  GCounter a <> GCounter b = GCounter (Map.unionWith max a b)

instance CRDT GCounter

-- | PN-Counter: Increment and decrement counter
data PNCounter = PNCounter
  { pnIncrements :: GCounter
  , pnDecrements :: GCounter
  }
  deriving (Show, Eq)

instance Semigroup PNCounter where
  a <> b = PNCounter 
    { pnIncrements = pnIncrements a <> pnIncrements b
    , pnDecrements = pnDecrements a <> pnDecrements b
    }

instance CRDT PNCounter

pnValue :: PNCounter -> Int
pnValue pn = gcValue (pnIncrements pn) - gcValue (pnDecrements pn)

3. Code-Specific CRDT State

-- | State for coding tasks
data CodeState = CodeState
  { csFilesModified :: GSet FilePath        -- files we've touched
  , csFacts :: LWWMap Text Text             -- learned facts (key -> value)
  , csErrors :: GSet Text                   -- errors encountered
  , csCostCents :: LWW Double               -- cost tracking (LWW for simplicity)
  , csTokens :: GCounter                    -- token count
  }
  deriving (Show, Eq, Generic)

instance Semigroup CodeState where
  a <> b = CodeState
    { csFilesModified = csFilesModified a <> csFilesModified b
    , csFacts = csFacts a <> csFacts b
    , csErrors = csErrors a <> csErrors b
    , csCostCents = csCostCents a <> csCostCents b
    , csTokens = csTokens a <> csTokens b
    }

instance CRDT CodeState

-- Smart constructors
emptyCodeState :: CodeState
modifyFile :: FilePath -> CodeState -> CodeState
addFact :: Text -> Text -> CodeState -> CodeState
addError :: Text -> CodeState -> CodeState

4. Research-Specific CRDT State

-- | State for research tasks
data ResearchState = ResearchState
  { rsSources :: GSet URL                   -- sources found
  , rsFacts :: LWWMap Text Text             -- facts discovered
  , rsContradictions :: GSet (Text, Text)   -- conflicting claims
  , rsConfidence :: LWWMap Text Double      -- confidence per topic
  }
  deriving (Show, Eq, Generic)

instance Semigroup ResearchState
instance CRDT ResearchState

5. Helper for Parallel Interpreter

-- | Merge multiple states from parallel branches
mergeStates :: CRDT s => [s] -> s
mergeStates = foldr1 merge

-- | Run parallel Op with CRDT state
runParallelCRDT :: CRDT s => ParConfig -> s -> Op s a -> IO (Either Text (a, Trace, s))
runParallelCRDT = runParallelWith merge

Notes

  • CRDTs guarantee eventual consistency without coordination
  • LWW requires accurate timestamps (use UTCTime from time package)
  • GSet can only grow - use for facts that can't be retracted
  • For files, might need OR-Set (observed-remove) if files can be deleted
  • Keep it simple - start with GSet, LWW, LWWMap

Testing

  • GSet union is commutative and idempotent
  • LWW picks latest timestamp
  • LWWMap merges per-key
  • CodeState merges correctly
  • mergeStates with empty list fails gracefully

Files to Read First

  • Omni/Agent/ARCHITECTURE.md (section on state strategies)
  • Omni/Agent/Interpreter/Parallel.hs (consumer of merge)
  • CRDT literature (Shapiro et al. 2011)

Timeline (2)

🔄[human]Open → InProgress1 month ago
🔄[human]InProgress → Done1 month ago