commit 9c97741445b6ca57731b34304d5bd015ca5f150d
Author: Coder Agent <coder@agents.omni>
Date: Mon Feb 16 23:32:24 2026
Newsreader: decode HTML entities in titles and snippets
Add general-purpose decodeEntities (both named and numeric &#NNN;
&#xHHH;) to Omni.Newsreader.Web. Applied to article titles and
snippets at render time. Previously, entities like “ ’
' were passed through Lucid's toHtml which re-escaped the &,
causing the browser to display literal entity text.
Task-Id: t-625
diff --git a/Omni/Newsreader/Web.hs b/Omni/Newsreader/Web.hs
index 932ad10b..5a6a9d38 100644
--- a/Omni/Newsreader/Web.hs
+++ b/Omni/Newsreader/Web.hs
@@ -26,6 +26,7 @@ where
import Alpha
import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy as BL
+import qualified Data.Char as Char
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
@@ -37,6 +38,7 @@ import qualified Lucid as L
import qualified Network.HTTP.Types as HTTP
import qualified Network.HTTP.Types.Header as Header
import qualified Network.Wai as Wai
+import qualified Numeric
import qualified Omni.Newsreader.Article as Article
import qualified Omni.Newsreader.Cluster as Cluster
import qualified Omni.Newsreader.Digest as Digest
@@ -143,10 +145,10 @@ articlePage p' conn aidText respond = do
feeds <- Feed.listFeeds conn
let feedMap = Map.fromList [(fid, f) | f <- feeds, Just fid <- [Feed.feedId f]]
feedName = maybe "unknown" (fromMaybe "untitled" <. Feed.feedTitle) (Map.lookup (Article.articleFeedId art) feedMap)
- respond <| htmlResponse HTTP.status200 <| shell (Article.articleTitle art) <| do
+ respond <| htmlResponse HTTP.status200 <| shell (decodeEntities (Article.articleTitle art)) <| do
newsSubNav p'
L.article_ [L.class_ "nr-article"] <| do
- L.h1_ (L.toHtml (Article.articleTitle art))
+ L.h1_ (L.toHtml (decodeEntities (Article.articleTitle art)))
L.div_ [L.class_ "meta"] <| do
L.span_ [L.class_ "accent"] (L.toHtml feedName)
L.toHtml (" · " :: Text)
@@ -319,7 +321,7 @@ articleCard p' feedMap art = do
snippet = T.take 280 (stripTags (Article.articleContent art))
artUrl = p' ("/article/" <> maybe "" (T.pack <. show <. Article.unArticleId) (Article.articleId art))
L.div_ [L.class_ "nr-card"] <| do
- L.a_ [L.href_ artUrl, L.class_ "nr-card-title"] (L.toHtml (Article.articleTitle art))
+ L.a_ [L.href_ artUrl, L.class_ "nr-card-title"] (L.toHtml (decodeEntities (Article.articleTitle art)))
L.div_ [L.class_ "nr-card-meta"] <| do
L.span_ [L.class_ "accent"] (L.toHtml feedName)
L.span_ [L.class_ "faint"] " · "
@@ -495,4 +497,73 @@ stripTags txt =
skipTag (c : rest) acc
| c == '>' = go rest (acc <> " ")
| otherwise = skipTag rest acc
- in T.unwords <| T.words <| go (T.unpack txt) ""
+ in decodeEntities <| T.unwords <| T.words <| go (T.unpack txt) ""
+
+-- | Decode HTML entities in text (both named and numeric).
+decodeEntities :: Text -> Text
+decodeEntities = decodeNumeric <. decodeNamed
+
+-- | Decode named HTML entities.
+decodeNamed :: Text -> Text
+decodeNamed =
+ T.replace "&" "&"
+ <. T.replace "<" "<"
+ <. T.replace ">" ">"
+ <. T.replace """ "\""
+ <. T.replace "'" "'"
+ <. T.replace " " " "
+ <. T.replace "—" "\x2014"
+ <. T.replace "–" "\x2013"
+ <. T.replace "“" "\x201C"
+ <. T.replace "”" "\x201D"
+ <. T.replace "‘" "\x2018"
+ <. T.replace "’" "\x2019"
+ <. T.replace "…" "\x2026"
+ <. T.replace "→" "\x2192"
+ <. T.replace "←" "\x2190"
+
+-- | Decode numeric HTML entities like “ “
+decodeNumeric :: Text -> Text
+decodeNumeric txt = case T.breakOn "&#" txt of
+ (before, rest)
+ | T.null rest -> txt
+ | otherwise ->
+ let after = T.drop 2 rest -- drop "&#"
+ in case T.uncons after of
+ Just ('x', hex) -> decodeHex before hex
+ Just _ -> decodeDec before after
+ Nothing -> before <> rest
+ where
+ decodeDec before t =
+ case T.break (== ';') t of
+ (digits, semi)
+ | not (T.null semi),
+ not (T.null digits),
+ T.all Char.isDigit digits,
+ Just n <- readMay (T.unpack digits),
+ n > 0,
+ n <= 0x10FFFF ->
+ before <> T.singleton (Char.chr n) <> decodeNumeric (T.drop 1 semi)
+ _ -> before <> "&#" <> decodeNumeric t
+
+ decodeHex before t =
+ case T.break (== ';') t of
+ (digits, semi)
+ | not (T.null semi),
+ not (T.null digits),
+ T.all Char.isHexDigit digits,
+ Just n <- readHex' (T.unpack digits),
+ n > 0,
+ n <= 0x10FFFF ->
+ before <> T.singleton (Char.chr n) <> decodeNumeric (T.drop 1 semi)
+ _ -> before <> "&#x" <> decodeNumeric t
+
+ readMay :: String -> Maybe Int
+ readMay s = case reads s of
+ [(n, "")] -> Just n
+ _ -> Nothing
+
+ readHex' :: String -> Maybe Int
+ readHex' s = case Numeric.readHex s of
+ [(n, "")] -> Just n
+ _ -> Nothing