feat(ui-tui): resolve markdown links to readable page titles (#24013)

* feat(ui-tui): resolve links to readable page titles

Mirror desktop pretty-link behavior in the TUI by resolving HTTP links to page titles with shared caching and safe fetch filters, plus slug-based fallbacks so chat links stay readable even when title fetch fails.

* refactor(ui-tui): tighten link-title fallback handling

Clean up the link-title resolver by hardening in-flight cleanup and clarifying title length limits, while adding focused coverage for HTML entity decoding and markdown-label fallback behavior.

* fix(ui-tui): block private-network targets in title fetches

Prevent automatic link-title resolution from requesting local or private hosts by rejecting RFC1918, link-local, ULA, and intranet-style hostnames before fetch, and add regression coverage for blocked host patterns.
This commit is contained in:
brooklyn! 2026-05-11 14:16:31 -07:00 committed by GitHub
parent 9a63b5f16c
commit 75b428c852
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 644 additions and 16 deletions

View file

@ -0,0 +1,138 @@
import { afterEach, describe, expect, it, vi } from 'vitest'
import {
__resetLinkTitleCache,
fetchLinkTitle,
hostPathLabel,
isTitleFetchable,
normalizeExternalUrl,
urlSlugTitleLabel
} from '../lib/externalLink.js'
afterEach(() => {
__resetLinkTitleCache()
vi.restoreAllMocks()
vi.unstubAllGlobals()
})
describe('external link helpers', () => {
it('formats URL fallbacks as host + path', () => {
expect(
hostPathLabel(
'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/'
)
).toBe('getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894')
})
it('derives readable title fallbacks from URL slugs', () => {
expect(
urlSlugTitleLabel('https://www.getyourguide.com/fajardo-l882/from-fajardo-icacos-island-full-day-catamaran-trip-t19891/')
).toBe('From Fajardo Icacos Island Full Day Catamaran Trip')
})
it('normalizes scheme-less links', () => {
expect(normalizeExternalUrl(' expedia.com/things-to-do/puerto-rico-el-yunque ')).toBe(
'https://expedia.com/things-to-do/puerto-rico-el-yunque'
)
})
it('filters out local/non-http targets for title fetches', () => {
expect(isTitleFetchable('https://www.expedia.com/things-to-do/foo')).toBe(true)
expect(isTitleFetchable('http://localhost:5174')).toBe(false)
expect(isTitleFetchable('file:///tmp/demo.html')).toBe(false)
expect(isTitleFetchable('mailto:hello@example.com')).toBe(false)
})
it('blocks private, link-local, and intranet hosts', () => {
expect(isTitleFetchable('http://10.0.0.12/path')).toBe(false)
expect(isTitleFetchable('http://172.22.5.4/path')).toBe(false)
expect(isTitleFetchable('http://192.168.1.22/path')).toBe(false)
expect(isTitleFetchable('http://169.254.169.254/latest/meta-data')).toBe(false)
expect(isTitleFetchable('http://[fd00::1]/')).toBe(false)
expect(isTitleFetchable('http://[fe80::1]/')).toBe(false)
expect(isTitleFetchable('http://printer.local/status')).toBe(false)
expect(isTitleFetchable('http://intranet/status')).toBe(false)
expect(isTitleFetchable('https://8.8.8.8/status')).toBe(true)
})
it('deduplicates in-flight title fetches and caches results', async () => {
const fetchMock = vi.fn().mockResolvedValue(
new Response('<html><head><title>El Yunque Tour Water Slide, Rope Swing & Pickup</title></head></html>', {
headers: { 'content-type': 'text/html; charset=utf-8' },
status: 200
})
)
vi.stubGlobal('fetch', fetchMock)
const url = 'https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure.a46272756.activity-details'
const [first, second] = await Promise.all([fetchLinkTitle(url), fetchLinkTitle(url)])
expect(first).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
expect(second).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
expect(fetchMock).toHaveBeenCalledTimes(1)
const third = await fetchLinkTitle(url)
expect(third).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
expect(fetchMock).toHaveBeenCalledTimes(1)
})
it('shares cache across protocol/www URL variants', async () => {
const fetchMock = vi.fn().mockResolvedValue(
new Response('<html><head><title>Shared Canonical Title</title></head></html>', {
headers: { 'content-type': 'text/html' },
status: 200
})
)
vi.stubGlobal('fetch', fetchMock)
const first = 'https://www.getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/'
const second = 'http://getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/'
const [a, b] = await Promise.all([fetchLinkTitle(first), fetchLinkTitle(second)])
expect(a).toBe('Shared Canonical Title')
expect(b).toBe('Shared Canonical Title')
expect(fetchMock).toHaveBeenCalledTimes(1)
})
it('ignores error-like fetched titles', async () => {
const fetchMock = vi.fn().mockResolvedValue(
new Response('<html><head><title>Just a moment...</title></head></html>', {
headers: { 'content-type': 'text/html' },
status: 200
})
)
vi.stubGlobal('fetch', fetchMock)
const url = 'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/'
await expect(fetchLinkTitle(url)).resolves.toBe('')
})
it('decodes HTML entities in fetched titles', async () => {
const fetchMock = vi.fn().mockResolvedValue(
new Response('<html><head><title>AT&amp;T &#39;Deals&#39;</title></head></html>', {
headers: { 'content-type': 'text/html' },
status: 200
})
)
vi.stubGlobal('fetch', fetchMock)
await expect(fetchLinkTitle('https://example.com/offers')).resolves.toBe("AT&T 'Deals'")
})
it('skips network fetch for non-fetchable targets', async () => {
const fetchMock = vi.fn()
vi.stubGlobal('fetch', fetchMock)
await expect(fetchLinkTitle('http://localhost:3000/path')).resolves.toBe('')
await expect(fetchLinkTitle('mailto:hello@example.com')).resolves.toBe('')
await expect(fetchLinkTitle('file:///tmp/demo.html')).resolves.toBe('')
expect(fetchMock).not.toHaveBeenCalled()
})
})

View file

@ -218,6 +218,41 @@ describe('Md wrapping', () => {
})
})
describe('Md link labels', () => {
it('renders bare URLs with readable slug labels', () => {
const lines = renderPlain(
React.createElement(
Box,
{ width: 120 },
React.createElement(Md, {
t: DEFAULT_THEME,
text: 'see https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure for details'
})
)
)
const rendered = lines.join('\n')
expect(rendered).toContain('Puerto Rico El Yunque Rainforest Adventure')
expect(rendered).not.toContain('https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure')
})
it('keeps explicit markdown labels as the immediate fallback', () => {
const lines = renderPlain(
React.createElement(
Box,
{ width: 80 },
React.createElement(Md, {
t: DEFAULT_THEME,
text: '[Trip details](https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure)'
})
)
)
expect(lines.join('\n')).toContain('Trip details')
})
})
describe('renderTable CJK width alignment', () => {
it('column starts share the same display offset across CJK rows', async () => {
const { stringWidth } = await import('@hermes/ink')
@ -248,6 +283,7 @@ describe('renderTable CJK width alignment', () => {
// unique anchor for column 2's start position on each row.
const colStarts = (line: string, anchor: string): number => {
const idx = line.indexOf(anchor)
return idx < 0 ? -1 : stringWidth(line.slice(0, idx))
}

View file

@ -2,6 +2,7 @@ import { Box, Link, stringWidth, Text } from '@hermes/ink'
import { Fragment, memo, type ReactNode, useMemo } from 'react'
import { ensureEmojiPresentation } from '../lib/emoji.js'
import { normalizeExternalUrl, urlSlugTitleLabel, useLinkTitle } from '../lib/externalLink.js'
import { BOX_CLOSE, BOX_OPEN, texToUnicode } from '../lib/mathUnicode.js'
import { highlightLine, isHighlightable } from '../lib/syntax.js'
import type { Theme } from '../theme.js'
@ -143,13 +144,43 @@ const isTableDivider = (row: string) => {
const autolinkUrl = (raw: string) =>
raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}`
const renderAutolink = (k: number, t: Theme, raw: string) => (
<Link key={k} url={autolinkUrl(raw)}>
<Text color={t.color.accent} underline>
{raw.replace(/^mailto:/, '')}
</Text>
</Link>
)
const defaultLinkLabel = (url: string) =>
url.startsWith('mailto:') ? url.replace(/^mailto:/, '') : /^https?:\/\//i.test(url) ? urlSlugTitleLabel(url) : url
const pickFallbackLabel = (label: string | undefined, target: string): string | undefined => {
const trimmed = label?.trim()
if (!trimmed) {
return undefined
}
return normalizeExternalUrl(trimmed) === target ? undefined : trimmed
}
interface ResolvedLinkProps {
fallbackLabel?: string
t: Theme
url: string
}
function ResolvedLink({ fallbackLabel, t, url }: ResolvedLinkProps) {
const fetched = useLinkTitle(url)
const display = fetched || fallbackLabel || defaultLinkLabel(url)
return (
<Link url={url}>
<Text color={t.color.accent} underline>
{display}
</Text>
</Link>
)
}
const renderResolvedLink = (k: number, t: Theme, rawUrl: string, label?: string) => {
const target = normalizeExternalUrl(rawUrl)
return <ResolvedLink fallbackLabel={pickFallbackLabel(label, target)} key={k} t={t} url={target} />
}
export const stripInlineMarkup = (v: string) =>
v
@ -232,15 +263,9 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
</Text>
)
} else if (m[3] && m[4]) {
parts.push(
<Link key={parts.length} url={m[4]}>
<Text color={t.color.accent} underline>
{m[3]}
</Text>
</Link>
)
parts.push(renderResolvedLink(parts.length, t, m[4], m[3]))
} else if (m[5]) {
parts.push(renderAutolink(parts.length, t, m[5]))
parts.push(renderResolvedLink(parts.length, t, autolinkUrl(m[5]), m[5].replace(/^mailto:/, '')))
} else if (m[6]) {
parts.push(
<Text key={parts.length} strikethrough>
@ -302,7 +327,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
// so `see https://x.com/, which…` keeps the comma outside the link.
const url = m[16].replace(/[),.;:!?]+$/g, '')
parts.push(renderAutolink(parts.length, t, url))
parts.push(renderResolvedLink(parts.length, t, url))
if (url.length < m[16].length) {
parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)

View file

@ -0,0 +1,429 @@
import { isIP } from 'node:net'
import { useEffect, useMemo, useState } from 'react'
const titleCache = new Map<string, string>()
const titleInflight = new Map<string, Promise<string>>()
const titleSubs = new Map<string, Set<(value: string) => void>>()
const TITLE_CACHE_LIMIT = 500
const TITLE_MAX_LENGTH = 240
const TITLE_BYTE_BUDGET = 96 * 1024
const TITLE_TIMEOUT_MS = 5000
const TITLE_USER_AGENT =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
const TITLE_ERROR_RE =
/\b(?:access denied|attention required|captcha|error|forbidden|just a moment|request blocked|too many requests)\b/i
const DOMAIN_RE = /^(?:www\.)?[a-z0-9](?:[a-z0-9-]*\.)+[a-z]{2,}(?::\d+)?(?:[/?#][^\s]*)?$/i
const SKIP_PROTO_RE = /^(?:file|data|mailto|javascript|blob|chrome|about|hermes):/i
const LOCAL_HOSTNAME_RE = /^(?:localhost|localhost\.localdomain)$/i
const LOCAL_HOST_SUFFIXES = ['.corp', '.home', '.internal', '.lan', '.local', '.localdomain']
const HTML_ENTITIES: Record<string, string> = {
'#39': "'",
amp: '&',
apos: "'",
gt: '>',
lt: '<',
nbsp: ' ',
quot: '"'
}
export function normalizeExternalUrl(value: string): string {
const trimmed = value.trim()
if (!trimmed || /^https?:\/\//i.test(trimmed)) {
return trimmed
}
return DOMAIN_RE.test(trimmed) ? `https://${trimmed}` : trimmed
}
function parseUrl(value: string): null | URL {
try {
return new URL(normalizeExternalUrl(value))
} catch {
return null
}
}
function titleCacheKey(value: string): string {
const url = parseUrl(value)
if (!url) {
return normalizeExternalUrl(value)
}
const host = url.hostname.replace(/^www\./i, '').toLowerCase()
const pathname = url.pathname === '/' ? '/' : url.pathname.replace(/\/+$/, '') || '/'
return `${host}${pathname}${url.search || ''}`
}
function cacheTitle(key: string, title: string): void {
if (titleCache.size >= TITLE_CACHE_LIMIT) {
titleCache.delete(titleCache.keys().next().value as string)
}
titleCache.set(key, title)
}
export function hostPathLabel(value: string): string {
const url = parseUrl(value)
if (!url) {
return value
}
const host = url.hostname.replace(/^www\./, '')
const path = url.pathname && url.pathname !== '/' ? url.pathname.replace(/\/$/, '') : ''
return `${host}${path}`
}
function cleanSlug(segment: string): string {
try {
return decodeURIComponent(segment)
.replace(/\.a\d+\..*$/i, '')
.replace(/\.(?:html?|php|aspx?)$/i, '')
.replace(/(?:[-_.](?:[a-z]{1,3}\d{2,}|i\d{2,}))+$/i, '')
.replace(/[_-]+/g, ' ')
.replace(/\s+/g, ' ')
.trim()
} catch {
return ''
}
}
export function urlSlugTitleLabel(value: string): string {
const url = parseUrl(value)
for (const segment of url?.pathname.split('/').filter(Boolean).reverse() ?? []) {
const cleaned = cleanSlug(segment)
if (!cleaned || !/[a-z]/i.test(cleaned)) {
continue
}
if (/^(?:[a-z]{1,3}\d+|\d+)$/i.test(cleaned.replace(/\s+/g, ''))) {
continue
}
const titled = cleaned.replace(/\b[a-z]/g, c => c.toUpperCase())
if (titled.length >= 4) {
return titled
}
}
return hostPathLabel(value)
}
function parseIpv4Octets(value: string): null | [number, number, number, number] {
const parts = value.split('.')
if (parts.length !== 4) {
return null
}
const octets: number[] = []
for (const part of parts) {
if (!/^\d{1,3}$/.test(part)) {
return null
}
const next = Number(part)
if (!Number.isInteger(next) || next < 0 || next > 255) {
return null
}
octets.push(next)
}
return [octets[0]!, octets[1]!, octets[2]!, octets[3]!]
}
function isPrivateIpv4(value: string): boolean {
const octets = parseIpv4Octets(value)
if (!octets) {
return false
}
const [a, b] = octets
return (
a === 0 ||
a === 10 ||
a === 127 ||
a === 255 ||
(a === 100 && b >= 64 && b <= 127) ||
(a === 169 && b === 254) ||
(a === 172 && b >= 16 && b <= 31) ||
(a === 192 && b === 168) ||
(a === 198 && (b === 18 || b === 19))
)
}
function isPrivateIpv6(value: string): boolean {
const normalized = value.toLowerCase()
if (normalized === '::' || normalized === '::1') {
return true
}
if (normalized.startsWith('fc') || normalized.startsWith('fd')) {
return true
}
if (normalized.startsWith('fe8') || normalized.startsWith('fe9') || normalized.startsWith('fea') || normalized.startsWith('feb')) {
return true
}
if (normalized.startsWith('::ffff:')) {
return isPrivateIpv4(normalized.slice('::ffff:'.length))
}
return false
}
function normalizeHostname(value: string): string {
const withoutBrackets = value.replace(/^\[/, '').replace(/\]$/, '')
const withoutZoneId = withoutBrackets.split('%', 1)[0]!
return withoutZoneId.replace(/\.$/, '').toLowerCase()
}
function isPrivateOrLocalHost(hostname: string): boolean {
const normalized = normalizeHostname(hostname)
if (!normalized) {
return true
}
if (LOCAL_HOSTNAME_RE.test(normalized)) {
return true
}
if (LOCAL_HOST_SUFFIXES.some(suffix => normalized.endsWith(suffix))) {
return true
}
const ipVersion = isIP(normalized)
if (ipVersion === 4) {
return isPrivateIpv4(normalized)
}
if (ipVersion === 6) {
return isPrivateIpv6(normalized)
}
// Single-label hostnames are usually LAN names or enterprise intranet aliases.
return !normalized.includes('.')
}
export function isTitleFetchable(value: string): boolean {
if (!value || SKIP_PROTO_RE.test(value)) {
return false
}
const url = parseUrl(value)
return Boolean(url && /^https?:$/.test(url.protocol) && !isPrivateOrLocalHost(url.hostname))
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&(amp|lt|gt|quot|apos|nbsp|#39);/gi, (_match, key: string) => HTML_ENTITIES[key.toLowerCase()] ?? '')
.replace(/&#x([0-9a-f]+);/gi, (_match, hex: string) => String.fromCodePoint(parseInt(hex, 16) || 32))
.replace(/&#(\d+);/g, (_match, decimal: string) => String.fromCodePoint(parseInt(decimal, 10) || 32))
}
function parseHtmlTitle(html: string): string {
const raw = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]
return raw ? decodeHtmlEntities(raw).replace(/\s+/g, ' ').trim() : ''
}
async function readResponseSnippet(response: Response): Promise<string> {
const reader = response.body?.getReader()
if (!reader) {
return (await response.text()).slice(0, TITLE_BYTE_BUDGET)
}
const chunks: Uint8Array[] = []
let done = false
let bytes = 0
try {
while (bytes < TITLE_BYTE_BUDGET) {
const chunk = await reader.read()
if (chunk.done) {
done = true
break
}
const value = chunk.value
if (!value?.length) {
continue
}
const remaining = TITLE_BYTE_BUDGET - bytes
const next = value.length > remaining ? value.subarray(0, remaining) : value
chunks.push(next)
bytes += next.length
if (next.length < value.length) {
break
}
}
} catch {
return ''
} finally {
if (!done) {
try {
await reader.cancel()
} catch {
// Ignore stream teardown failures.
}
}
}
if (!chunks.length) {
return ''
}
const joined = new Uint8Array(bytes)
let offset = 0
for (const chunk of chunks) {
joined.set(chunk, offset)
offset += chunk.length
}
return new TextDecoder().decode(joined)
}
function usableTitle(value: string): string {
const clean = value.replace(/\s+/g, ' ').trim()
return clean && !TITLE_ERROR_RE.test(clean) ? clean : ''
}
async function fetchHtmlTitle(normalizedUrl: string): Promise<string> {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), TITLE_TIMEOUT_MS)
try {
const response = await fetch(normalizedUrl, {
headers: {
Accept: 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.5',
'Accept-Language': 'en-US,en;q=0.7',
'User-Agent': TITLE_USER_AGENT
},
redirect: 'follow',
signal: controller.signal
})
if (!response.ok) {
return ''
}
const contentType = response.headers.get('content-type')
if (contentType && !/(?:html|xml|text\/html)/i.test(contentType)) {
return ''
}
const html = await readResponseSnippet(response)
return parseHtmlTitle(html).slice(0, TITLE_MAX_LENGTH)
} catch {
return ''
} finally {
clearTimeout(timeout)
}
}
export function fetchLinkTitle(url: string): Promise<string> {
const normalizedUrl = normalizeExternalUrl(url)
const key = titleCacheKey(normalizedUrl)
if (!isTitleFetchable(normalizedUrl)) {
return Promise.resolve('')
}
if (titleCache.has(key)) {
return Promise.resolve(titleCache.get(key) ?? '')
}
const pending = titleInflight.get(key)
if (pending) {
return pending
}
const promise = fetchHtmlTitle(normalizedUrl)
.then(usableTitle)
.catch(() => '')
.then(clean => {
cacheTitle(key, clean)
titleSubs.get(key)?.forEach(sub => sub(clean))
return clean
})
.finally(() => {
titleInflight.delete(key)
})
titleInflight.set(key, promise)
return promise
}
export function useLinkTitle(url?: null | string): string {
const normalizedUrl = useMemo(() => (url ? normalizeExternalUrl(url) : ''), [url])
const key = useMemo(() => (normalizedUrl ? titleCacheKey(normalizedUrl) : ''), [normalizedUrl])
const [title, setTitle] = useState(() => (key ? (titleCache.get(key) ?? '') : ''))
useEffect(() => {
setTitle(key ? (titleCache.get(key) ?? '') : '')
if (!key || !isTitleFetchable(normalizedUrl)) {
return
}
const subs = titleSubs.get(key) ?? new Set<(value: string) => void>()
subs.add(setTitle)
titleSubs.set(key, subs)
void fetchLinkTitle(normalizedUrl)
return () => {
subs.delete(setTitle)
if (!subs.size) {
titleSubs.delete(key)
}
}
}, [key, normalizedUrl])
return title
}
export function __resetLinkTitleCache(): void {
titleCache.clear()
titleInflight.clear()
titleSubs.clear()
}