CUAD Contract Review - Swarm with STM Implementation

t-369.24·WorkTask·
·
·
·Omni/Agent.hs
Parent:t-369·Created1 month ago·Updated1 month ago

Dependencies

Description

Edit

Implement swarm-based contract review with STM shared state.

Context

Building on the baseline (t-369.23), implement a swarm approach where:

  • Multiple reviewer agents work in parallel
  • Shared state accumulates findings and patterns
  • Pattern detection helps later agents

Hypothesis

Swarm will outperform single agent (especially at scale) because: 1. Parallel processing = faster wall clock 2. Shared patterns = "indemnification usually in Section 8" helps other agents 3. No context exhaustion = each agent only sees its contracts 4. Cross-document insights = "3 contracts have unusual liability caps"

Deliverables

1. Shared State Design

-- Omni/Agent/Experiments/ContractSwarm.hs

data SharedReviewState = SharedReviewState
  { -- All findings from all agents
    findings :: TVar (Map ContractId [Finding])
    
    -- Patterns discovered across contracts
  , patterns :: TVar [Pattern]
    
    -- Clause type hints (learned from early contracts)
  , clauseHints :: TVar (Map ClauseType [Hint])
    
    -- Progress tracking
  , completed :: TVar (Set ContractId)
  , inProgress :: TVar (Set ContractId)
    
    -- Anomalies worth flagging
  , anomalies :: TVar [Anomaly]
  }

data Pattern = Pattern
  { patternClauseType :: ClauseType
  , patternDescription :: Text  -- e.g., "Usually in Section 8 or 9"
  , patternFrequency :: Int     -- How many contracts showed this
  }

data Hint = Hint
  { hintText :: Text            -- e.g., "Look for 'shall indemnify'"
  , hintSource :: ContractId    -- Where we learned this
  }

data Anomaly = Anomaly
  { anomalyContract :: ContractId
  , anomalyDescription :: Text  -- e.g., "Unlimited liability - unusual"
  }

2. Reviewer Agent

reviewerAgent :: SharedReviewState -> Provider -> Contract -> IO ()
reviewerAgent shared provider contract = do
  -- Mark in progress
  atomically $ modifyTVar (inProgress shared) (Set.insert (contractId contract))
  
  -- Read current patterns and hints (benefit from others' work)
  (currentPatterns, currentHints) <- atomically $ (,)
    <$> readTVar (patterns shared)
    <*> readTVar (clauseHints shared)
  
  -- Build prompt with hints
  let prompt = reviewPromptWithHints contract currentPatterns currentHints
  
  -- Think + Execute
  code <- think provider prompt
  result <- execute sandbox code
  findings <- parseFindings result
  
  -- Write findings to shared state
  atomically $ do
    modifyTVar (findings shared) (Map.insert (contractId contract) findings)
    modifyTVar (completed shared) (Set.insert (contractId contract))
    modifyTVar (inProgress shared) (Set.delete (contractId contract))
  
  -- Extract any new hints for others
  let newHints = extractHints contract findings
  atomically $ forM_ newHints $ \(clauseType, hint) ->
    modifyTVar (clauseHints shared) (Map.insertWith (++) clauseType [hint])

reviewPromptWithHints :: Contract -> [Pattern] -> Map ClauseType [Hint] -> Text
reviewPromptWithHints contract patterns hints = Text.unlines
  [ "Review this contract and extract clauses."
  , ""
  , "=== HINTS FROM OTHER CONTRACTS ==="
  , formatPatterns patterns
  , formatHints hints
  , ""
  , "=== CONTRACT ==="
  , contractText contract
  , ""
  , "Output JSON with findings."
  ]

3. Pattern Detection Agent

patternAgent :: SharedReviewState -> Provider -> IO ()
patternAgent shared provider = loop
  where
    loop = do
      -- Wait for some findings to accumulate
      threadDelay 10_000_000  -- 10 seconds
      
      -- Read all findings
      allFindings <- atomically $ readTVar (findings shared)
      
      -- Only analyze if we have enough data
      when (Map.size allFindings >= 5) $ do
        -- Think: what patterns do you see?
        code <- think provider (patternPrompt allFindings)
        result <- execute sandbox code
        newPatterns <- parsePatterns result
        
        -- Update shared patterns
        atomically $ modifyTVar (patterns shared) (++ newPatterns)
      
      -- Check if we're done
      completed <- atomically $ readTVar (completed shared)
      unless (Set.size completed >= totalContracts) loop

4. Anomaly Detection Agent

anomalyAgent :: SharedReviewState -> Provider -> IO ()
anomalyAgent shared provider = loop
  where
    loop = do
      threadDelay 15_000_000  -- 15 seconds
      
      allFindings <- atomically $ readTVar (findings shared)
      
      when (Map.size allFindings >= 3) $ do
        -- Think: any unusual clauses?
        code <- think provider (anomalyPrompt allFindings)
        result <- execute sandbox code
        newAnomalies <- parseAnomalies result
        
        atomically $ modifyTVar (anomalies shared) (++ newAnomalies)
      
      -- Continue until done
      completed <- atomically $ readTVar (completed shared)
      unless (Set.size completed >= totalContracts) loop

5. Swarm Orchestrator

contractSwarm :: Provider -> [Contract] -> IO SwarmResult
contractSwarm provider contracts = do
  -- Initialize shared state
  shared <- initSharedState
  
  -- Spawn reviewer agents (one per contract, with concurrency limit)
  let concurrency = 5  -- Max 5 parallel reviewers
  reviewerSem <- newQSem concurrency
  
  reviewers <- forM contracts $ \contract ->
    async $ bracket_ (waitQSem reviewerSem) (signalQSem reviewerSem) $
      reviewerAgent shared provider contract
  
  -- Spawn pattern detection agent
  patternDetector <- async $ patternAgent shared provider
  
  -- Spawn anomaly detection agent  
  anomalyDetector <- async $ anomalyAgent shared provider
  
  -- Wait for all reviewers
  traverse_ wait reviewers
  
  -- Cancel background agents
  cancel patternDetector
  cancel anomalyDetector
  
  -- Collect results
  finalFindings <- atomically $ readTVar (findings shared)
  finalPatterns <- atomically $ readTVar (patterns shared)
  finalAnomalies <- atomically $ readTVar (anomalies shared)
  
  pure SwarmResult
    { srFindings = finalFindings
    , srPatterns = finalPatterns
    , srAnomalies = finalAnomalies
    }

6. Benchmark Comparison

runComparison :: IO ()
runComparison = do
  provider <- getProvider
  
  for_ [5, 10, 20, 50] $ \n -> do
    contracts <- loadContracts n
    let groundTruth = concatMap getGroundTruth contracts
    
    putStrLn $ "\n=== " <> show n <> " contracts ==="
    
    -- Single agent baseline
    (singleFindings, singleTime) <- timed $ 
      concat <$> traverse (reviewContract provider) contracts
    let singleEval = evaluate singleFindings groundTruth
    
    -- Swarm
    (swarmResult, swarmTime) <- timed $
      contractSwarm provider contracts
    let swarmFindings = concat $ Map.elems (srFindings swarmResult)
        swarmEval = evaluate swarmFindings groundTruth
    
    -- Report
    putStrLn "Single Agent:"
    putStrLn $ "  Time: " <> show singleTime
    putStrLn $ "  F1: " <> show (f1 singleEval)
    
    putStrLn "Swarm:"
    putStrLn $ "  Time: " <> show swarmTime
    putStrLn $ "  F1: " <> show (f1 swarmEval)
    putStrLn $ "  Patterns found: " <> show (length (srPatterns swarmResult))
    putStrLn $ "  Anomalies: " <> show (length (srAnomalies swarmResult))

What We're Testing

1. Does sharing help?

  • Do hints from early contracts improve later extractions?
  • Measure: F1 of contracts reviewed early vs late in swarm

2. Does pattern detection work?

  • Are patterns accurate and useful?
  • Measure: Quality of detected patterns

3. Scale behavior

  • Does swarm maintain quality at N=50 where single agent fails?
  • Measure: F1 at each scale

4. Cost/time tradeoff

  • Is swarm faster? More expensive?
  • Measure: Wall clock, total tokens

Success Criteria

  • [ ] Swarm F1 >= Single F1 at all scales
  • [ ] Swarm maintains F1 > 0.6 at N=50 (where single agent likely fails)
  • [ ] At least 3 useful patterns detected
  • [ ] Wall clock time < single agent time (for N >= 20)

Files

  • Omni/Agent/Experiments/ContractSwarm.hs
  • Omni/Agent/Experiments/CUAD_SWARM_RESULTS.md

Timeline (2)

🔄[human]Open → InProgress1 month ago
🔄[human]InProgress → Done1 month ago