mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(ui-tui): resolve markdown links to readable page titles (#24013)
* feat(ui-tui): resolve links to readable page titles Mirror desktop pretty-link behavior in the TUI by resolving HTTP links to page titles with shared caching and safe fetch filters, plus slug-based fallbacks so chat links stay readable even when title fetch fails. * refactor(ui-tui): tighten link-title fallback handling Clean up the link-title resolver by hardening in-flight cleanup and clarifying title length limits, while adding focused coverage for HTML entity decoding and markdown-label fallback behavior. * fix(ui-tui): block private-network targets in title fetches Prevent automatic link-title resolution from requesting local or private hosts by rejecting RFC1918, link-local, ULA, and intranet-style hostnames before fetch, and add regression coverage for blocked host patterns.
This commit is contained in:
parent
9a63b5f16c
commit
75b428c852
4 changed files with 644 additions and 16 deletions
138
ui-tui/src/__tests__/externalLink.test.ts
Normal file
138
ui-tui/src/__tests__/externalLink.test.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import {
|
||||
__resetLinkTitleCache,
|
||||
fetchLinkTitle,
|
||||
hostPathLabel,
|
||||
isTitleFetchable,
|
||||
normalizeExternalUrl,
|
||||
urlSlugTitleLabel
|
||||
} from '../lib/externalLink.js'
|
||||
|
||||
afterEach(() => {
|
||||
__resetLinkTitleCache()
|
||||
vi.restoreAllMocks()
|
||||
vi.unstubAllGlobals()
|
||||
})
|
||||
|
||||
describe('external link helpers', () => {
|
||||
it('formats URL fallbacks as host + path', () => {
|
||||
expect(
|
||||
hostPathLabel(
|
||||
'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/'
|
||||
)
|
||||
).toBe('getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894')
|
||||
})
|
||||
|
||||
it('derives readable title fallbacks from URL slugs', () => {
|
||||
expect(
|
||||
urlSlugTitleLabel('https://www.getyourguide.com/fajardo-l882/from-fajardo-icacos-island-full-day-catamaran-trip-t19891/')
|
||||
).toBe('From Fajardo Icacos Island Full Day Catamaran Trip')
|
||||
})
|
||||
|
||||
it('normalizes scheme-less links', () => {
|
||||
expect(normalizeExternalUrl(' expedia.com/things-to-do/puerto-rico-el-yunque ')).toBe(
|
||||
'https://expedia.com/things-to-do/puerto-rico-el-yunque'
|
||||
)
|
||||
})
|
||||
|
||||
it('filters out local/non-http targets for title fetches', () => {
|
||||
expect(isTitleFetchable('https://www.expedia.com/things-to-do/foo')).toBe(true)
|
||||
expect(isTitleFetchable('http://localhost:5174')).toBe(false)
|
||||
expect(isTitleFetchable('file:///tmp/demo.html')).toBe(false)
|
||||
expect(isTitleFetchable('mailto:hello@example.com')).toBe(false)
|
||||
})
|
||||
|
||||
it('blocks private, link-local, and intranet hosts', () => {
|
||||
expect(isTitleFetchable('http://10.0.0.12/path')).toBe(false)
|
||||
expect(isTitleFetchable('http://172.22.5.4/path')).toBe(false)
|
||||
expect(isTitleFetchable('http://192.168.1.22/path')).toBe(false)
|
||||
expect(isTitleFetchable('http://169.254.169.254/latest/meta-data')).toBe(false)
|
||||
expect(isTitleFetchable('http://[fd00::1]/')).toBe(false)
|
||||
expect(isTitleFetchable('http://[fe80::1]/')).toBe(false)
|
||||
expect(isTitleFetchable('http://printer.local/status')).toBe(false)
|
||||
expect(isTitleFetchable('http://intranet/status')).toBe(false)
|
||||
expect(isTitleFetchable('https://8.8.8.8/status')).toBe(true)
|
||||
})
|
||||
|
||||
it('deduplicates in-flight title fetches and caches results', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue(
|
||||
new Response('<html><head><title>El Yunque Tour Water Slide, Rope Swing & Pickup</title></head></html>', {
|
||||
headers: { 'content-type': 'text/html; charset=utf-8' },
|
||||
status: 200
|
||||
})
|
||||
)
|
||||
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
const url = 'https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure.a46272756.activity-details'
|
||||
const [first, second] = await Promise.all([fetchLinkTitle(url), fetchLinkTitle(url)])
|
||||
|
||||
expect(first).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
|
||||
expect(second).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1)
|
||||
|
||||
const third = await fetchLinkTitle(url)
|
||||
|
||||
expect(third).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup')
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('shares cache across protocol/www URL variants', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue(
|
||||
new Response('<html><head><title>Shared Canonical Title</title></head></html>', {
|
||||
headers: { 'content-type': 'text/html' },
|
||||
status: 200
|
||||
})
|
||||
)
|
||||
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
const first = 'https://www.getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/'
|
||||
const second = 'http://getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/'
|
||||
|
||||
const [a, b] = await Promise.all([fetchLinkTitle(first), fetchLinkTitle(second)])
|
||||
|
||||
expect(a).toBe('Shared Canonical Title')
|
||||
expect(b).toBe('Shared Canonical Title')
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('ignores error-like fetched titles', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue(
|
||||
new Response('<html><head><title>Just a moment...</title></head></html>', {
|
||||
headers: { 'content-type': 'text/html' },
|
||||
status: 200
|
||||
})
|
||||
)
|
||||
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
const url = 'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/'
|
||||
|
||||
await expect(fetchLinkTitle(url)).resolves.toBe('')
|
||||
})
|
||||
|
||||
it('decodes HTML entities in fetched titles', async () => {
|
||||
const fetchMock = vi.fn().mockResolvedValue(
|
||||
new Response('<html><head><title>AT&T 'Deals'</title></head></html>', {
|
||||
headers: { 'content-type': 'text/html' },
|
||||
status: 200
|
||||
})
|
||||
)
|
||||
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
await expect(fetchLinkTitle('https://example.com/offers')).resolves.toBe("AT&T 'Deals'")
|
||||
})
|
||||
|
||||
it('skips network fetch for non-fetchable targets', async () => {
|
||||
const fetchMock = vi.fn()
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
await expect(fetchLinkTitle('http://localhost:3000/path')).resolves.toBe('')
|
||||
await expect(fetchLinkTitle('mailto:hello@example.com')).resolves.toBe('')
|
||||
await expect(fetchLinkTitle('file:///tmp/demo.html')).resolves.toBe('')
|
||||
expect(fetchMock).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
|
@ -218,6 +218,41 @@ describe('Md wrapping', () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe('Md link labels', () => {
|
||||
it('renders bare URLs with readable slug labels', () => {
|
||||
const lines = renderPlain(
|
||||
React.createElement(
|
||||
Box,
|
||||
{ width: 120 },
|
||||
React.createElement(Md, {
|
||||
t: DEFAULT_THEME,
|
||||
text: 'see https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure for details'
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
const rendered = lines.join('\n')
|
||||
|
||||
expect(rendered).toContain('Puerto Rico El Yunque Rainforest Adventure')
|
||||
expect(rendered).not.toContain('https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure')
|
||||
})
|
||||
|
||||
it('keeps explicit markdown labels as the immediate fallback', () => {
|
||||
const lines = renderPlain(
|
||||
React.createElement(
|
||||
Box,
|
||||
{ width: 80 },
|
||||
React.createElement(Md, {
|
||||
t: DEFAULT_THEME,
|
||||
text: '[Trip details](https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure)'
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
expect(lines.join('\n')).toContain('Trip details')
|
||||
})
|
||||
})
|
||||
|
||||
describe('renderTable CJK width alignment', () => {
|
||||
it('column starts share the same display offset across CJK rows', async () => {
|
||||
const { stringWidth } = await import('@hermes/ink')
|
||||
|
|
@ -248,6 +283,7 @@ describe('renderTable CJK width alignment', () => {
|
|||
// unique anchor for column 2's start position on each row.
|
||||
const colStarts = (line: string, anchor: string): number => {
|
||||
const idx = line.indexOf(anchor)
|
||||
|
||||
return idx < 0 ? -1 : stringWidth(line.slice(0, idx))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Box, Link, stringWidth, Text } from '@hermes/ink'
|
|||
import { Fragment, memo, type ReactNode, useMemo } from 'react'
|
||||
|
||||
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
||||
import { normalizeExternalUrl, urlSlugTitleLabel, useLinkTitle } from '../lib/externalLink.js'
|
||||
import { BOX_CLOSE, BOX_OPEN, texToUnicode } from '../lib/mathUnicode.js'
|
||||
import { highlightLine, isHighlightable } from '../lib/syntax.js'
|
||||
import type { Theme } from '../theme.js'
|
||||
|
|
@ -143,13 +144,43 @@ const isTableDivider = (row: string) => {
|
|||
const autolinkUrl = (raw: string) =>
|
||||
raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}`
|
||||
|
||||
const renderAutolink = (k: number, t: Theme, raw: string) => (
|
||||
<Link key={k} url={autolinkUrl(raw)}>
|
||||
<Text color={t.color.accent} underline>
|
||||
{raw.replace(/^mailto:/, '')}
|
||||
</Text>
|
||||
</Link>
|
||||
)
|
||||
const defaultLinkLabel = (url: string) =>
|
||||
url.startsWith('mailto:') ? url.replace(/^mailto:/, '') : /^https?:\/\//i.test(url) ? urlSlugTitleLabel(url) : url
|
||||
|
||||
const pickFallbackLabel = (label: string | undefined, target: string): string | undefined => {
|
||||
const trimmed = label?.trim()
|
||||
|
||||
if (!trimmed) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return normalizeExternalUrl(trimmed) === target ? undefined : trimmed
|
||||
}
|
||||
|
||||
interface ResolvedLinkProps {
|
||||
fallbackLabel?: string
|
||||
t: Theme
|
||||
url: string
|
||||
}
|
||||
|
||||
function ResolvedLink({ fallbackLabel, t, url }: ResolvedLinkProps) {
|
||||
const fetched = useLinkTitle(url)
|
||||
const display = fetched || fallbackLabel || defaultLinkLabel(url)
|
||||
|
||||
return (
|
||||
<Link url={url}>
|
||||
<Text color={t.color.accent} underline>
|
||||
{display}
|
||||
</Text>
|
||||
</Link>
|
||||
)
|
||||
}
|
||||
|
||||
const renderResolvedLink = (k: number, t: Theme, rawUrl: string, label?: string) => {
|
||||
const target = normalizeExternalUrl(rawUrl)
|
||||
|
||||
return <ResolvedLink fallbackLabel={pickFallbackLabel(label, target)} key={k} t={t} url={target} />
|
||||
}
|
||||
|
||||
export const stripInlineMarkup = (v: string) =>
|
||||
v
|
||||
|
|
@ -232,15 +263,9 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
|||
</Text>
|
||||
)
|
||||
} else if (m[3] && m[4]) {
|
||||
parts.push(
|
||||
<Link key={parts.length} url={m[4]}>
|
||||
<Text color={t.color.accent} underline>
|
||||
{m[3]}
|
||||
</Text>
|
||||
</Link>
|
||||
)
|
||||
parts.push(renderResolvedLink(parts.length, t, m[4], m[3]))
|
||||
} else if (m[5]) {
|
||||
parts.push(renderAutolink(parts.length, t, m[5]))
|
||||
parts.push(renderResolvedLink(parts.length, t, autolinkUrl(m[5]), m[5].replace(/^mailto:/, '')))
|
||||
} else if (m[6]) {
|
||||
parts.push(
|
||||
<Text key={parts.length} strikethrough>
|
||||
|
|
@ -302,7 +327,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
|||
// so `see https://x.com/, which…` keeps the comma outside the link.
|
||||
const url = m[16].replace(/[),.;:!?]+$/g, '')
|
||||
|
||||
parts.push(renderAutolink(parts.length, t, url))
|
||||
parts.push(renderResolvedLink(parts.length, t, url))
|
||||
|
||||
if (url.length < m[16].length) {
|
||||
parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
|
||||
|
|
|
|||
429
ui-tui/src/lib/externalLink.ts
Normal file
429
ui-tui/src/lib/externalLink.ts
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
import { isIP } from 'node:net'
|
||||
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
|
||||
const titleCache = new Map<string, string>()
|
||||
const titleInflight = new Map<string, Promise<string>>()
|
||||
const titleSubs = new Map<string, Set<(value: string) => void>>()
|
||||
|
||||
const TITLE_CACHE_LIMIT = 500
|
||||
const TITLE_MAX_LENGTH = 240
|
||||
const TITLE_BYTE_BUDGET = 96 * 1024
|
||||
const TITLE_TIMEOUT_MS = 5000
|
||||
|
||||
const TITLE_USER_AGENT =
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
|
||||
|
||||
const TITLE_ERROR_RE =
|
||||
/\b(?:access denied|attention required|captcha|error|forbidden|just a moment|request blocked|too many requests)\b/i
|
||||
|
||||
const DOMAIN_RE = /^(?:www\.)?[a-z0-9](?:[a-z0-9-]*\.)+[a-z]{2,}(?::\d+)?(?:[/?#][^\s]*)?$/i
|
||||
const SKIP_PROTO_RE = /^(?:file|data|mailto|javascript|blob|chrome|about|hermes):/i
|
||||
const LOCAL_HOSTNAME_RE = /^(?:localhost|localhost\.localdomain)$/i
|
||||
const LOCAL_HOST_SUFFIXES = ['.corp', '.home', '.internal', '.lan', '.local', '.localdomain']
|
||||
|
||||
const HTML_ENTITIES: Record<string, string> = {
|
||||
'#39': "'",
|
||||
amp: '&',
|
||||
apos: "'",
|
||||
gt: '>',
|
||||
lt: '<',
|
||||
nbsp: ' ',
|
||||
quot: '"'
|
||||
}
|
||||
|
||||
export function normalizeExternalUrl(value: string): string {
|
||||
const trimmed = value.trim()
|
||||
|
||||
if (!trimmed || /^https?:\/\//i.test(trimmed)) {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
return DOMAIN_RE.test(trimmed) ? `https://${trimmed}` : trimmed
|
||||
}
|
||||
|
||||
function parseUrl(value: string): null | URL {
|
||||
try {
|
||||
return new URL(normalizeExternalUrl(value))
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function titleCacheKey(value: string): string {
|
||||
const url = parseUrl(value)
|
||||
|
||||
if (!url) {
|
||||
return normalizeExternalUrl(value)
|
||||
}
|
||||
|
||||
const host = url.hostname.replace(/^www\./i, '').toLowerCase()
|
||||
const pathname = url.pathname === '/' ? '/' : url.pathname.replace(/\/+$/, '') || '/'
|
||||
|
||||
return `${host}${pathname}${url.search || ''}`
|
||||
}
|
||||
|
||||
function cacheTitle(key: string, title: string): void {
|
||||
if (titleCache.size >= TITLE_CACHE_LIMIT) {
|
||||
titleCache.delete(titleCache.keys().next().value as string)
|
||||
}
|
||||
|
||||
titleCache.set(key, title)
|
||||
}
|
||||
|
||||
export function hostPathLabel(value: string): string {
|
||||
const url = parseUrl(value)
|
||||
|
||||
if (!url) {
|
||||
return value
|
||||
}
|
||||
|
||||
const host = url.hostname.replace(/^www\./, '')
|
||||
const path = url.pathname && url.pathname !== '/' ? url.pathname.replace(/\/$/, '') : ''
|
||||
|
||||
return `${host}${path}`
|
||||
}
|
||||
|
||||
function cleanSlug(segment: string): string {
|
||||
try {
|
||||
return decodeURIComponent(segment)
|
||||
.replace(/\.a\d+\..*$/i, '')
|
||||
.replace(/\.(?:html?|php|aspx?)$/i, '')
|
||||
.replace(/(?:[-_.](?:[a-z]{1,3}\d{2,}|i\d{2,}))+$/i, '')
|
||||
.replace(/[_-]+/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
} catch {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
export function urlSlugTitleLabel(value: string): string {
|
||||
const url = parseUrl(value)
|
||||
|
||||
for (const segment of url?.pathname.split('/').filter(Boolean).reverse() ?? []) {
|
||||
const cleaned = cleanSlug(segment)
|
||||
|
||||
if (!cleaned || !/[a-z]/i.test(cleaned)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (/^(?:[a-z]{1,3}\d+|\d+)$/i.test(cleaned.replace(/\s+/g, ''))) {
|
||||
continue
|
||||
}
|
||||
|
||||
const titled = cleaned.replace(/\b[a-z]/g, c => c.toUpperCase())
|
||||
|
||||
if (titled.length >= 4) {
|
||||
return titled
|
||||
}
|
||||
}
|
||||
|
||||
return hostPathLabel(value)
|
||||
}
|
||||
|
||||
function parseIpv4Octets(value: string): null | [number, number, number, number] {
|
||||
const parts = value.split('.')
|
||||
|
||||
if (parts.length !== 4) {
|
||||
return null
|
||||
}
|
||||
|
||||
const octets: number[] = []
|
||||
|
||||
for (const part of parts) {
|
||||
if (!/^\d{1,3}$/.test(part)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const next = Number(part)
|
||||
|
||||
if (!Number.isInteger(next) || next < 0 || next > 255) {
|
||||
return null
|
||||
}
|
||||
|
||||
octets.push(next)
|
||||
}
|
||||
|
||||
return [octets[0]!, octets[1]!, octets[2]!, octets[3]!]
|
||||
}
|
||||
|
||||
function isPrivateIpv4(value: string): boolean {
|
||||
const octets = parseIpv4Octets(value)
|
||||
|
||||
if (!octets) {
|
||||
return false
|
||||
}
|
||||
|
||||
const [a, b] = octets
|
||||
|
||||
return (
|
||||
a === 0 ||
|
||||
a === 10 ||
|
||||
a === 127 ||
|
||||
a === 255 ||
|
||||
(a === 100 && b >= 64 && b <= 127) ||
|
||||
(a === 169 && b === 254) ||
|
||||
(a === 172 && b >= 16 && b <= 31) ||
|
||||
(a === 192 && b === 168) ||
|
||||
(a === 198 && (b === 18 || b === 19))
|
||||
)
|
||||
}
|
||||
|
||||
function isPrivateIpv6(value: string): boolean {
|
||||
const normalized = value.toLowerCase()
|
||||
|
||||
if (normalized === '::' || normalized === '::1') {
|
||||
return true
|
||||
}
|
||||
|
||||
if (normalized.startsWith('fc') || normalized.startsWith('fd')) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (normalized.startsWith('fe8') || normalized.startsWith('fe9') || normalized.startsWith('fea') || normalized.startsWith('feb')) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (normalized.startsWith('::ffff:')) {
|
||||
return isPrivateIpv4(normalized.slice('::ffff:'.length))
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
function normalizeHostname(value: string): string {
|
||||
const withoutBrackets = value.replace(/^\[/, '').replace(/\]$/, '')
|
||||
const withoutZoneId = withoutBrackets.split('%', 1)[0]!
|
||||
|
||||
return withoutZoneId.replace(/\.$/, '').toLowerCase()
|
||||
}
|
||||
|
||||
function isPrivateOrLocalHost(hostname: string): boolean {
|
||||
const normalized = normalizeHostname(hostname)
|
||||
|
||||
if (!normalized) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (LOCAL_HOSTNAME_RE.test(normalized)) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (LOCAL_HOST_SUFFIXES.some(suffix => normalized.endsWith(suffix))) {
|
||||
return true
|
||||
}
|
||||
|
||||
const ipVersion = isIP(normalized)
|
||||
|
||||
if (ipVersion === 4) {
|
||||
return isPrivateIpv4(normalized)
|
||||
}
|
||||
|
||||
if (ipVersion === 6) {
|
||||
return isPrivateIpv6(normalized)
|
||||
}
|
||||
|
||||
// Single-label hostnames are usually LAN names or enterprise intranet aliases.
|
||||
return !normalized.includes('.')
|
||||
}
|
||||
|
||||
export function isTitleFetchable(value: string): boolean {
|
||||
if (!value || SKIP_PROTO_RE.test(value)) {
|
||||
return false
|
||||
}
|
||||
|
||||
const url = parseUrl(value)
|
||||
|
||||
return Boolean(url && /^https?:$/.test(url.protocol) && !isPrivateOrLocalHost(url.hostname))
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(value: string): string {
|
||||
return value
|
||||
.replace(/&(amp|lt|gt|quot|apos|nbsp|#39);/gi, (_match, key: string) => HTML_ENTITIES[key.toLowerCase()] ?? '')
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_match, hex: string) => String.fromCodePoint(parseInt(hex, 16) || 32))
|
||||
.replace(/&#(\d+);/g, (_match, decimal: string) => String.fromCodePoint(parseInt(decimal, 10) || 32))
|
||||
}
|
||||
|
||||
function parseHtmlTitle(html: string): string {
|
||||
const raw = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]
|
||||
|
||||
return raw ? decodeHtmlEntities(raw).replace(/\s+/g, ' ').trim() : ''
|
||||
}
|
||||
|
||||
async function readResponseSnippet(response: Response): Promise<string> {
|
||||
const reader = response.body?.getReader()
|
||||
|
||||
if (!reader) {
|
||||
return (await response.text()).slice(0, TITLE_BYTE_BUDGET)
|
||||
}
|
||||
|
||||
const chunks: Uint8Array[] = []
|
||||
let done = false
|
||||
let bytes = 0
|
||||
|
||||
try {
|
||||
while (bytes < TITLE_BYTE_BUDGET) {
|
||||
const chunk = await reader.read()
|
||||
|
||||
if (chunk.done) {
|
||||
done = true
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
const value = chunk.value
|
||||
|
||||
if (!value?.length) {
|
||||
continue
|
||||
}
|
||||
|
||||
const remaining = TITLE_BYTE_BUDGET - bytes
|
||||
const next = value.length > remaining ? value.subarray(0, remaining) : value
|
||||
|
||||
chunks.push(next)
|
||||
bytes += next.length
|
||||
|
||||
if (next.length < value.length) {
|
||||
break
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return ''
|
||||
} finally {
|
||||
if (!done) {
|
||||
try {
|
||||
await reader.cancel()
|
||||
} catch {
|
||||
// Ignore stream teardown failures.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!chunks.length) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const joined = new Uint8Array(bytes)
|
||||
let offset = 0
|
||||
|
||||
for (const chunk of chunks) {
|
||||
joined.set(chunk, offset)
|
||||
offset += chunk.length
|
||||
}
|
||||
|
||||
return new TextDecoder().decode(joined)
|
||||
}
|
||||
|
||||
function usableTitle(value: string): string {
|
||||
const clean = value.replace(/\s+/g, ' ').trim()
|
||||
|
||||
return clean && !TITLE_ERROR_RE.test(clean) ? clean : ''
|
||||
}
|
||||
|
||||
async function fetchHtmlTitle(normalizedUrl: string): Promise<string> {
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), TITLE_TIMEOUT_MS)
|
||||
|
||||
try {
|
||||
const response = await fetch(normalizedUrl, {
|
||||
headers: {
|
||||
Accept: 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.5',
|
||||
'Accept-Language': 'en-US,en;q=0.7',
|
||||
'User-Agent': TITLE_USER_AGENT
|
||||
},
|
||||
redirect: 'follow',
|
||||
signal: controller.signal
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type')
|
||||
|
||||
if (contentType && !/(?:html|xml|text\/html)/i.test(contentType)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const html = await readResponseSnippet(response)
|
||||
|
||||
return parseHtmlTitle(html).slice(0, TITLE_MAX_LENGTH)
|
||||
} catch {
|
||||
return ''
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
}
|
||||
|
||||
export function fetchLinkTitle(url: string): Promise<string> {
|
||||
const normalizedUrl = normalizeExternalUrl(url)
|
||||
const key = titleCacheKey(normalizedUrl)
|
||||
|
||||
if (!isTitleFetchable(normalizedUrl)) {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
if (titleCache.has(key)) {
|
||||
return Promise.resolve(titleCache.get(key) ?? '')
|
||||
}
|
||||
|
||||
const pending = titleInflight.get(key)
|
||||
|
||||
if (pending) {
|
||||
return pending
|
||||
}
|
||||
|
||||
const promise = fetchHtmlTitle(normalizedUrl)
|
||||
.then(usableTitle)
|
||||
.catch(() => '')
|
||||
.then(clean => {
|
||||
cacheTitle(key, clean)
|
||||
titleSubs.get(key)?.forEach(sub => sub(clean))
|
||||
|
||||
return clean
|
||||
})
|
||||
.finally(() => {
|
||||
titleInflight.delete(key)
|
||||
})
|
||||
|
||||
titleInflight.set(key, promise)
|
||||
|
||||
return promise
|
||||
}
|
||||
|
||||
export function useLinkTitle(url?: null | string): string {
|
||||
const normalizedUrl = useMemo(() => (url ? normalizeExternalUrl(url) : ''), [url])
|
||||
const key = useMemo(() => (normalizedUrl ? titleCacheKey(normalizedUrl) : ''), [normalizedUrl])
|
||||
const [title, setTitle] = useState(() => (key ? (titleCache.get(key) ?? '') : ''))
|
||||
|
||||
useEffect(() => {
|
||||
setTitle(key ? (titleCache.get(key) ?? '') : '')
|
||||
|
||||
if (!key || !isTitleFetchable(normalizedUrl)) {
|
||||
return
|
||||
}
|
||||
|
||||
const subs = titleSubs.get(key) ?? new Set<(value: string) => void>()
|
||||
|
||||
subs.add(setTitle)
|
||||
titleSubs.set(key, subs)
|
||||
void fetchLinkTitle(normalizedUrl)
|
||||
|
||||
return () => {
|
||||
subs.delete(setTitle)
|
||||
|
||||
if (!subs.size) {
|
||||
titleSubs.delete(key)
|
||||
}
|
||||
}
|
||||
}, [key, normalizedUrl])
|
||||
|
||||
return title
|
||||
}
|
||||
|
||||
export function __resetLinkTitleCache(): void {
|
||||
titleCache.clear()
|
||||
titleInflight.clear()
|
||||
titleSubs.clear()
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue