← Back to task

Commit 9c977414

commit 9c97741445b6ca57731b34304d5bd015ca5f150d
Author: Coder Agent <coder@agents.omni>
Date:   Mon Feb 16 23:32:24 2026

    Newsreader: decode HTML entities in titles and snippets
    
    Add general-purpose decodeEntities (both named and numeric &#NNN;
    &#xHHH;) to Omni.Newsreader.Web. Applied to article titles and
    snippets at render time. Previously, entities like &#8220; &#8217;
    &#39; were passed through Lucid's toHtml which re-escaped the &,
    causing the browser to display literal entity text.
    
    Task-Id: t-625

diff --git a/Omni/Newsreader/Web.hs b/Omni/Newsreader/Web.hs
index 932ad10b..5a6a9d38 100644
--- a/Omni/Newsreader/Web.hs
+++ b/Omni/Newsreader/Web.hs
@@ -26,6 +26,7 @@ where
 import Alpha
 import qualified Data.Aeson as Aeson
 import qualified Data.ByteString.Lazy as BL
+import qualified Data.Char as Char
 import qualified Data.List as List
 import qualified Data.Map.Strict as Map
 import qualified Data.Text as T
@@ -37,6 +38,7 @@ import qualified Lucid as L
 import qualified Network.HTTP.Types as HTTP
 import qualified Network.HTTP.Types.Header as Header
 import qualified Network.Wai as Wai
+import qualified Numeric
 import qualified Omni.Newsreader.Article as Article
 import qualified Omni.Newsreader.Cluster as Cluster
 import qualified Omni.Newsreader.Digest as Digest
@@ -143,10 +145,10 @@ articlePage p' conn aidText respond = do
           feeds <- Feed.listFeeds conn
           let feedMap = Map.fromList [(fid, f) | f <- feeds, Just fid <- [Feed.feedId f]]
               feedName = maybe "unknown" (fromMaybe "untitled" <. Feed.feedTitle) (Map.lookup (Article.articleFeedId art) feedMap)
-          respond <| htmlResponse HTTP.status200 <| shell (Article.articleTitle art) <| do
+          respond <| htmlResponse HTTP.status200 <| shell (decodeEntities (Article.articleTitle art)) <| do
             newsSubNav p'
             L.article_ [L.class_ "nr-article"] <| do
-              L.h1_ (L.toHtml (Article.articleTitle art))
+              L.h1_ (L.toHtml (decodeEntities (Article.articleTitle art)))
               L.div_ [L.class_ "meta"] <| do
                 L.span_ [L.class_ "accent"] (L.toHtml feedName)
                 L.toHtml (" · " :: Text)
@@ -319,7 +321,7 @@ articleCard p' feedMap art = do
       snippet = T.take 280 (stripTags (Article.articleContent art))
       artUrl = p' ("/article/" <> maybe "" (T.pack <. show <. Article.unArticleId) (Article.articleId art))
   L.div_ [L.class_ "nr-card"] <| do
-    L.a_ [L.href_ artUrl, L.class_ "nr-card-title"] (L.toHtml (Article.articleTitle art))
+    L.a_ [L.href_ artUrl, L.class_ "nr-card-title"] (L.toHtml (decodeEntities (Article.articleTitle art)))
     L.div_ [L.class_ "nr-card-meta"] <| do
       L.span_ [L.class_ "accent"] (L.toHtml feedName)
       L.span_ [L.class_ "faint"] " · "
@@ -495,4 +497,73 @@ stripTags txt =
       skipTag (c : rest) acc
         | c == '>' = go rest (acc <> " ")
         | otherwise = skipTag rest acc
-   in T.unwords <| T.words <| go (T.unpack txt) ""
+   in decodeEntities <| T.unwords <| T.words <| go (T.unpack txt) ""
+
+-- | Decode HTML entities in text (both named and numeric).
+decodeEntities :: Text -> Text
+decodeEntities = decodeNumeric <. decodeNamed
+
+-- | Decode named HTML entities.
+decodeNamed :: Text -> Text
+decodeNamed =
+  T.replace "&amp;" "&"
+    <. T.replace "&lt;" "<"
+    <. T.replace "&gt;" ">"
+    <. T.replace "&quot;" "\""
+    <. T.replace "&apos;" "'"
+    <. T.replace "&nbsp;" " "
+    <. T.replace "&mdash;" "\x2014"
+    <. T.replace "&ndash;" "\x2013"
+    <. T.replace "&ldquo;" "\x201C"
+    <. T.replace "&rdquo;" "\x201D"
+    <. T.replace "&lsquo;" "\x2018"
+    <. T.replace "&rsquo;" "\x2019"
+    <. T.replace "&hellip;" "\x2026"
+    <. T.replace "&rarr;" "\x2192"
+    <. T.replace "&larr;" "\x2190"
+
+-- | Decode numeric HTML entities like &#8220; &#x201C;
+decodeNumeric :: Text -> Text
+decodeNumeric txt = case T.breakOn "&#" txt of
+  (before, rest)
+    | T.null rest -> txt
+    | otherwise ->
+        let after = T.drop 2 rest -- drop "&#"
+         in case T.uncons after of
+              Just ('x', hex) -> decodeHex before hex
+              Just _ -> decodeDec before after
+              Nothing -> before <> rest
+  where
+    decodeDec before t =
+      case T.break (== ';') t of
+        (digits, semi)
+          | not (T.null semi),
+            not (T.null digits),
+            T.all Char.isDigit digits,
+            Just n <- readMay (T.unpack digits),
+            n > 0,
+            n <= 0x10FFFF ->
+              before <> T.singleton (Char.chr n) <> decodeNumeric (T.drop 1 semi)
+        _ -> before <> "&#" <> decodeNumeric t
+
+    decodeHex before t =
+      case T.break (== ';') t of
+        (digits, semi)
+          | not (T.null semi),
+            not (T.null digits),
+            T.all Char.isHexDigit digits,
+            Just n <- readHex' (T.unpack digits),
+            n > 0,
+            n <= 0x10FFFF ->
+              before <> T.singleton (Char.chr n) <> decodeNumeric (T.drop 1 semi)
+        _ -> before <> "&#x" <> decodeNumeric t
+
+    readMay :: String -> Maybe Int
+    readMay s = case reads s of
+      [(n, "")] -> Just n
+      _ -> Nothing
+
+    readHex' :: String -> Maybe Int
+    readHex' s = case Numeric.readHex s of
+      [(n, "")] -> Just n
+      _ -> Nothing