Implement swarm-based contract review with STM shared state.
Building on the baseline (t-369.23), implement a swarm approach where:
Swarm will outperform single agent (especially at scale) because: 1. Parallel processing = faster wall clock 2. Shared patterns = "indemnification usually in Section 8" helps other agents 3. No context exhaustion = each agent only sees its contracts 4. Cross-document insights = "3 contracts have unusual liability caps"
-- Omni/Agent/Experiments/ContractSwarm.hs
data SharedReviewState = SharedReviewState
{ -- All findings from all agents
findings :: TVar (Map ContractId [Finding])
-- Patterns discovered across contracts
, patterns :: TVar [Pattern]
-- Clause type hints (learned from early contracts)
, clauseHints :: TVar (Map ClauseType [Hint])
-- Progress tracking
, completed :: TVar (Set ContractId)
, inProgress :: TVar (Set ContractId)
-- Anomalies worth flagging
, anomalies :: TVar [Anomaly]
}
data Pattern = Pattern
{ patternClauseType :: ClauseType
, patternDescription :: Text -- e.g., "Usually in Section 8 or 9"
, patternFrequency :: Int -- How many contracts showed this
}
data Hint = Hint
{ hintText :: Text -- e.g., "Look for 'shall indemnify'"
, hintSource :: ContractId -- Where we learned this
}
data Anomaly = Anomaly
{ anomalyContract :: ContractId
, anomalyDescription :: Text -- e.g., "Unlimited liability - unusual"
}
reviewerAgent :: SharedReviewState -> Provider -> Contract -> IO ()
reviewerAgent shared provider contract = do
-- Mark in progress
atomically $ modifyTVar (inProgress shared) (Set.insert (contractId contract))
-- Read current patterns and hints (benefit from others' work)
(currentPatterns, currentHints) <- atomically $ (,)
<$> readTVar (patterns shared)
<*> readTVar (clauseHints shared)
-- Build prompt with hints
let prompt = reviewPromptWithHints contract currentPatterns currentHints
-- Think + Execute
code <- think provider prompt
result <- execute sandbox code
findings <- parseFindings result
-- Write findings to shared state
atomically $ do
modifyTVar (findings shared) (Map.insert (contractId contract) findings)
modifyTVar (completed shared) (Set.insert (contractId contract))
modifyTVar (inProgress shared) (Set.delete (contractId contract))
-- Extract any new hints for others
let newHints = extractHints contract findings
atomically $ forM_ newHints $ \(clauseType, hint) ->
modifyTVar (clauseHints shared) (Map.insertWith (++) clauseType [hint])
reviewPromptWithHints :: Contract -> [Pattern] -> Map ClauseType [Hint] -> Text
reviewPromptWithHints contract patterns hints = Text.unlines
[ "Review this contract and extract clauses."
, ""
, "=== HINTS FROM OTHER CONTRACTS ==="
, formatPatterns patterns
, formatHints hints
, ""
, "=== CONTRACT ==="
, contractText contract
, ""
, "Output JSON with findings."
]
patternAgent :: SharedReviewState -> Provider -> IO ()
patternAgent shared provider = loop
where
loop = do
-- Wait for some findings to accumulate
threadDelay 10_000_000 -- 10 seconds
-- Read all findings
allFindings <- atomically $ readTVar (findings shared)
-- Only analyze if we have enough data
when (Map.size allFindings >= 5) $ do
-- Think: what patterns do you see?
code <- think provider (patternPrompt allFindings)
result <- execute sandbox code
newPatterns <- parsePatterns result
-- Update shared patterns
atomically $ modifyTVar (patterns shared) (++ newPatterns)
-- Check if we're done
completed <- atomically $ readTVar (completed shared)
unless (Set.size completed >= totalContracts) loop
anomalyAgent :: SharedReviewState -> Provider -> IO ()
anomalyAgent shared provider = loop
where
loop = do
threadDelay 15_000_000 -- 15 seconds
allFindings <- atomically $ readTVar (findings shared)
when (Map.size allFindings >= 3) $ do
-- Think: any unusual clauses?
code <- think provider (anomalyPrompt allFindings)
result <- execute sandbox code
newAnomalies <- parseAnomalies result
atomically $ modifyTVar (anomalies shared) (++ newAnomalies)
-- Continue until done
completed <- atomically $ readTVar (completed shared)
unless (Set.size completed >= totalContracts) loop
contractSwarm :: Provider -> [Contract] -> IO SwarmResult
contractSwarm provider contracts = do
-- Initialize shared state
shared <- initSharedState
-- Spawn reviewer agents (one per contract, with concurrency limit)
let concurrency = 5 -- Max 5 parallel reviewers
reviewerSem <- newQSem concurrency
reviewers <- forM contracts $ \contract ->
async $ bracket_ (waitQSem reviewerSem) (signalQSem reviewerSem) $
reviewerAgent shared provider contract
-- Spawn pattern detection agent
patternDetector <- async $ patternAgent shared provider
-- Spawn anomaly detection agent
anomalyDetector <- async $ anomalyAgent shared provider
-- Wait for all reviewers
traverse_ wait reviewers
-- Cancel background agents
cancel patternDetector
cancel anomalyDetector
-- Collect results
finalFindings <- atomically $ readTVar (findings shared)
finalPatterns <- atomically $ readTVar (patterns shared)
finalAnomalies <- atomically $ readTVar (anomalies shared)
pure SwarmResult
{ srFindings = finalFindings
, srPatterns = finalPatterns
, srAnomalies = finalAnomalies
}
runComparison :: IO ()
runComparison = do
provider <- getProvider
for_ [5, 10, 20, 50] $ \n -> do
contracts <- loadContracts n
let groundTruth = concatMap getGroundTruth contracts
putStrLn $ "\n=== " <> show n <> " contracts ==="
-- Single agent baseline
(singleFindings, singleTime) <- timed $
concat <$> traverse (reviewContract provider) contracts
let singleEval = evaluate singleFindings groundTruth
-- Swarm
(swarmResult, swarmTime) <- timed $
contractSwarm provider contracts
let swarmFindings = concat $ Map.elems (srFindings swarmResult)
swarmEval = evaluate swarmFindings groundTruth
-- Report
putStrLn "Single Agent:"
putStrLn $ " Time: " <> show singleTime
putStrLn $ " F1: " <> show (f1 singleEval)
putStrLn "Swarm:"
putStrLn $ " Time: " <> show swarmTime
putStrLn $ " F1: " <> show (f1 swarmEval)
putStrLn $ " Patterns found: " <> show (length (srPatterns swarmResult))
putStrLn $ " Anomalies: " <> show (length (srAnomalies swarmResult))
1. Does sharing help?
2. Does pattern detection work?
3. Scale behavior
4. Cost/time tradeoff