diff --git a/ui-tui/src/__tests__/externalLink.test.ts b/ui-tui/src/__tests__/externalLink.test.ts new file mode 100644 index 00000000000..31be5e83af3 --- /dev/null +++ b/ui-tui/src/__tests__/externalLink.test.ts @@ -0,0 +1,138 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +import { + __resetLinkTitleCache, + fetchLinkTitle, + hostPathLabel, + isTitleFetchable, + normalizeExternalUrl, + urlSlugTitleLabel +} from '../lib/externalLink.js' + +afterEach(() => { + __resetLinkTitleCache() + vi.restoreAllMocks() + vi.unstubAllGlobals() +}) + +describe('external link helpers', () => { + it('formats URL fallbacks as host + path', () => { + expect( + hostPathLabel( + 'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/' + ) + ).toBe('getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894') + }) + + it('derives readable title fallbacks from URL slugs', () => { + expect( + urlSlugTitleLabel('https://www.getyourguide.com/fajardo-l882/from-fajardo-icacos-island-full-day-catamaran-trip-t19891/') + ).toBe('From Fajardo Icacos Island Full Day Catamaran Trip') + }) + + it('normalizes scheme-less links', () => { + expect(normalizeExternalUrl(' expedia.com/things-to-do/puerto-rico-el-yunque ')).toBe( + 'https://expedia.com/things-to-do/puerto-rico-el-yunque' + ) + }) + + it('filters out local/non-http targets for title fetches', () => { + expect(isTitleFetchable('https://www.expedia.com/things-to-do/foo')).toBe(true) + expect(isTitleFetchable('http://localhost:5174')).toBe(false) + expect(isTitleFetchable('file:///tmp/demo.html')).toBe(false) + expect(isTitleFetchable('mailto:hello@example.com')).toBe(false) + }) + + it('blocks private, link-local, and intranet hosts', () => { + expect(isTitleFetchable('http://10.0.0.12/path')).toBe(false) + expect(isTitleFetchable('http://172.22.5.4/path')).toBe(false) + expect(isTitleFetchable('http://192.168.1.22/path')).toBe(false) + expect(isTitleFetchable('http://169.254.169.254/latest/meta-data')).toBe(false) + expect(isTitleFetchable('http://[fd00::1]/')).toBe(false) + expect(isTitleFetchable('http://[fe80::1]/')).toBe(false) + expect(isTitleFetchable('http://printer.local/status')).toBe(false) + expect(isTitleFetchable('http://intranet/status')).toBe(false) + expect(isTitleFetchable('https://8.8.8.8/status')).toBe(true) + }) + + it('deduplicates in-flight title fetches and caches results', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response('El Yunque Tour Water Slide, Rope Swing & Pickup', { + headers: { 'content-type': 'text/html; charset=utf-8' }, + status: 200 + }) + ) + + vi.stubGlobal('fetch', fetchMock) + + const url = 'https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure.a46272756.activity-details' + const [first, second] = await Promise.all([fetchLinkTitle(url), fetchLinkTitle(url)]) + + expect(first).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup') + expect(second).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup') + expect(fetchMock).toHaveBeenCalledTimes(1) + + const third = await fetchLinkTitle(url) + + expect(third).toBe('El Yunque Tour Water Slide, Rope Swing & Pickup') + expect(fetchMock).toHaveBeenCalledTimes(1) + }) + + it('shares cache across protocol/www URL variants', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response('Shared Canonical Title', { + headers: { 'content-type': 'text/html' }, + status: 200 + }) + ) + + vi.stubGlobal('fetch', fetchMock) + + const first = 'https://www.getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/' + const second = 'http://getyourguide.com/san-juan-puerto-rico-l355/sunset-tours-tc306/' + + const [a, b] = await Promise.all([fetchLinkTitle(first), fetchLinkTitle(second)]) + + expect(a).toBe('Shared Canonical Title') + expect(b).toBe('Shared Canonical Title') + expect(fetchMock).toHaveBeenCalledTimes(1) + }) + + it('ignores error-like fetched titles', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response('Just a moment...', { + headers: { 'content-type': 'text/html' }, + status: 200 + }) + ) + + vi.stubGlobal('fetch', fetchMock) + + const url = 'https://www.getyourguide.com/culebra-island-l145468/from-fajardo-full-day-cordillera-islands-catamaran-tour-t19894/' + + await expect(fetchLinkTitle(url)).resolves.toBe('') + }) + + it('decodes HTML entities in fetched titles', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response('AT&T 'Deals'', { + headers: { 'content-type': 'text/html' }, + status: 200 + }) + ) + + vi.stubGlobal('fetch', fetchMock) + + await expect(fetchLinkTitle('https://example.com/offers')).resolves.toBe("AT&T 'Deals'") + }) + + it('skips network fetch for non-fetchable targets', async () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + + await expect(fetchLinkTitle('http://localhost:3000/path')).resolves.toBe('') + await expect(fetchLinkTitle('mailto:hello@example.com')).resolves.toBe('') + await expect(fetchLinkTitle('file:///tmp/demo.html')).resolves.toBe('') + expect(fetchMock).not.toHaveBeenCalled() + }) +}) diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts index 716a2bbc093..b2fab923271 100644 --- a/ui-tui/src/__tests__/markdown.test.ts +++ b/ui-tui/src/__tests__/markdown.test.ts @@ -218,6 +218,41 @@ describe('Md wrapping', () => { }) }) +describe('Md link labels', () => { + it('renders bare URLs with readable slug labels', () => { + const lines = renderPlain( + React.createElement( + Box, + { width: 120 }, + React.createElement(Md, { + t: DEFAULT_THEME, + text: 'see https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure for details' + }) + ) + ) + + const rendered = lines.join('\n') + + expect(rendered).toContain('Puerto Rico El Yunque Rainforest Adventure') + expect(rendered).not.toContain('https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure') + }) + + it('keeps explicit markdown labels as the immediate fallback', () => { + const lines = renderPlain( + React.createElement( + Box, + { width: 80 }, + React.createElement(Md, { + t: DEFAULT_THEME, + text: '[Trip details](https://www.expedia.com/things-to-do/puerto-rico-el-yunque-rainforest-adventure)' + }) + ) + ) + + expect(lines.join('\n')).toContain('Trip details') + }) +}) + describe('renderTable CJK width alignment', () => { it('column starts share the same display offset across CJK rows', async () => { const { stringWidth } = await import('@hermes/ink') @@ -248,6 +283,7 @@ describe('renderTable CJK width alignment', () => { // unique anchor for column 2's start position on each row. const colStarts = (line: string, anchor: string): number => { const idx = line.indexOf(anchor) + return idx < 0 ? -1 : stringWidth(line.slice(0, idx)) } diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index c12efb35dc7..ae234eb9ec7 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -2,6 +2,7 @@ import { Box, Link, stringWidth, Text } from '@hermes/ink' import { Fragment, memo, type ReactNode, useMemo } from 'react' import { ensureEmojiPresentation } from '../lib/emoji.js' +import { normalizeExternalUrl, urlSlugTitleLabel, useLinkTitle } from '../lib/externalLink.js' import { BOX_CLOSE, BOX_OPEN, texToUnicode } from '../lib/mathUnicode.js' import { highlightLine, isHighlightable } from '../lib/syntax.js' import type { Theme } from '../theme.js' @@ -143,13 +144,43 @@ const isTableDivider = (row: string) => { const autolinkUrl = (raw: string) => raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}` -const renderAutolink = (k: number, t: Theme, raw: string) => ( - - - {raw.replace(/^mailto:/, '')} - - -) +const defaultLinkLabel = (url: string) => + url.startsWith('mailto:') ? url.replace(/^mailto:/, '') : /^https?:\/\//i.test(url) ? urlSlugTitleLabel(url) : url + +const pickFallbackLabel = (label: string | undefined, target: string): string | undefined => { + const trimmed = label?.trim() + + if (!trimmed) { + return undefined + } + + return normalizeExternalUrl(trimmed) === target ? undefined : trimmed +} + +interface ResolvedLinkProps { + fallbackLabel?: string + t: Theme + url: string +} + +function ResolvedLink({ fallbackLabel, t, url }: ResolvedLinkProps) { + const fetched = useLinkTitle(url) + const display = fetched || fallbackLabel || defaultLinkLabel(url) + + return ( + + + {display} + + + ) +} + +const renderResolvedLink = (k: number, t: Theme, rawUrl: string, label?: string) => { + const target = normalizeExternalUrl(rawUrl) + + return +} export const stripInlineMarkup = (v: string) => v @@ -232,15 +263,9 @@ function MdInline({ t, text }: { t: Theme; text: string }) { ) } else if (m[3] && m[4]) { - parts.push( - - - {m[3]} - - - ) + parts.push(renderResolvedLink(parts.length, t, m[4], m[3])) } else if (m[5]) { - parts.push(renderAutolink(parts.length, t, m[5])) + parts.push(renderResolvedLink(parts.length, t, autolinkUrl(m[5]), m[5].replace(/^mailto:/, ''))) } else if (m[6]) { parts.push( @@ -302,7 +327,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) { // so `see https://x.com/, which…` keeps the comma outside the link. const url = m[16].replace(/[),.;:!?]+$/g, '') - parts.push(renderAutolink(parts.length, t, url)) + parts.push(renderResolvedLink(parts.length, t, url)) if (url.length < m[16].length) { parts.push({m[16].slice(url.length)}) diff --git a/ui-tui/src/lib/externalLink.ts b/ui-tui/src/lib/externalLink.ts new file mode 100644 index 00000000000..04721bfa3f6 --- /dev/null +++ b/ui-tui/src/lib/externalLink.ts @@ -0,0 +1,429 @@ +import { isIP } from 'node:net' + +import { useEffect, useMemo, useState } from 'react' + +const titleCache = new Map() +const titleInflight = new Map>() +const titleSubs = new Map void>>() + +const TITLE_CACHE_LIMIT = 500 +const TITLE_MAX_LENGTH = 240 +const TITLE_BYTE_BUDGET = 96 * 1024 +const TITLE_TIMEOUT_MS = 5000 + +const TITLE_USER_AGENT = + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36' + +const TITLE_ERROR_RE = + /\b(?:access denied|attention required|captcha|error|forbidden|just a moment|request blocked|too many requests)\b/i + +const DOMAIN_RE = /^(?:www\.)?[a-z0-9](?:[a-z0-9-]*\.)+[a-z]{2,}(?::\d+)?(?:[/?#][^\s]*)?$/i +const SKIP_PROTO_RE = /^(?:file|data|mailto|javascript|blob|chrome|about|hermes):/i +const LOCAL_HOSTNAME_RE = /^(?:localhost|localhost\.localdomain)$/i +const LOCAL_HOST_SUFFIXES = ['.corp', '.home', '.internal', '.lan', '.local', '.localdomain'] + +const HTML_ENTITIES: Record = { + '#39': "'", + amp: '&', + apos: "'", + gt: '>', + lt: '<', + nbsp: ' ', + quot: '"' +} + +export function normalizeExternalUrl(value: string): string { + const trimmed = value.trim() + + if (!trimmed || /^https?:\/\//i.test(trimmed)) { + return trimmed + } + + return DOMAIN_RE.test(trimmed) ? `https://${trimmed}` : trimmed +} + +function parseUrl(value: string): null | URL { + try { + return new URL(normalizeExternalUrl(value)) + } catch { + return null + } +} + +function titleCacheKey(value: string): string { + const url = parseUrl(value) + + if (!url) { + return normalizeExternalUrl(value) + } + + const host = url.hostname.replace(/^www\./i, '').toLowerCase() + const pathname = url.pathname === '/' ? '/' : url.pathname.replace(/\/+$/, '') || '/' + + return `${host}${pathname}${url.search || ''}` +} + +function cacheTitle(key: string, title: string): void { + if (titleCache.size >= TITLE_CACHE_LIMIT) { + titleCache.delete(titleCache.keys().next().value as string) + } + + titleCache.set(key, title) +} + +export function hostPathLabel(value: string): string { + const url = parseUrl(value) + + if (!url) { + return value + } + + const host = url.hostname.replace(/^www\./, '') + const path = url.pathname && url.pathname !== '/' ? url.pathname.replace(/\/$/, '') : '' + + return `${host}${path}` +} + +function cleanSlug(segment: string): string { + try { + return decodeURIComponent(segment) + .replace(/\.a\d+\..*$/i, '') + .replace(/\.(?:html?|php|aspx?)$/i, '') + .replace(/(?:[-_.](?:[a-z]{1,3}\d{2,}|i\d{2,}))+$/i, '') + .replace(/[_-]+/g, ' ') + .replace(/\s+/g, ' ') + .trim() + } catch { + return '' + } +} + +export function urlSlugTitleLabel(value: string): string { + const url = parseUrl(value) + + for (const segment of url?.pathname.split('/').filter(Boolean).reverse() ?? []) { + const cleaned = cleanSlug(segment) + + if (!cleaned || !/[a-z]/i.test(cleaned)) { + continue + } + + if (/^(?:[a-z]{1,3}\d+|\d+)$/i.test(cleaned.replace(/\s+/g, ''))) { + continue + } + + const titled = cleaned.replace(/\b[a-z]/g, c => c.toUpperCase()) + + if (titled.length >= 4) { + return titled + } + } + + return hostPathLabel(value) +} + +function parseIpv4Octets(value: string): null | [number, number, number, number] { + const parts = value.split('.') + + if (parts.length !== 4) { + return null + } + + const octets: number[] = [] + + for (const part of parts) { + if (!/^\d{1,3}$/.test(part)) { + return null + } + + const next = Number(part) + + if (!Number.isInteger(next) || next < 0 || next > 255) { + return null + } + + octets.push(next) + } + + return [octets[0]!, octets[1]!, octets[2]!, octets[3]!] +} + +function isPrivateIpv4(value: string): boolean { + const octets = parseIpv4Octets(value) + + if (!octets) { + return false + } + + const [a, b] = octets + + return ( + a === 0 || + a === 10 || + a === 127 || + a === 255 || + (a === 100 && b >= 64 && b <= 127) || + (a === 169 && b === 254) || + (a === 172 && b >= 16 && b <= 31) || + (a === 192 && b === 168) || + (a === 198 && (b === 18 || b === 19)) + ) +} + +function isPrivateIpv6(value: string): boolean { + const normalized = value.toLowerCase() + + if (normalized === '::' || normalized === '::1') { + return true + } + + if (normalized.startsWith('fc') || normalized.startsWith('fd')) { + return true + } + + if (normalized.startsWith('fe8') || normalized.startsWith('fe9') || normalized.startsWith('fea') || normalized.startsWith('feb')) { + return true + } + + if (normalized.startsWith('::ffff:')) { + return isPrivateIpv4(normalized.slice('::ffff:'.length)) + } + + return false +} + +function normalizeHostname(value: string): string { + const withoutBrackets = value.replace(/^\[/, '').replace(/\]$/, '') + const withoutZoneId = withoutBrackets.split('%', 1)[0]! + + return withoutZoneId.replace(/\.$/, '').toLowerCase() +} + +function isPrivateOrLocalHost(hostname: string): boolean { + const normalized = normalizeHostname(hostname) + + if (!normalized) { + return true + } + + if (LOCAL_HOSTNAME_RE.test(normalized)) { + return true + } + + if (LOCAL_HOST_SUFFIXES.some(suffix => normalized.endsWith(suffix))) { + return true + } + + const ipVersion = isIP(normalized) + + if (ipVersion === 4) { + return isPrivateIpv4(normalized) + } + + if (ipVersion === 6) { + return isPrivateIpv6(normalized) + } + + // Single-label hostnames are usually LAN names or enterprise intranet aliases. + return !normalized.includes('.') +} + +export function isTitleFetchable(value: string): boolean { + if (!value || SKIP_PROTO_RE.test(value)) { + return false + } + + const url = parseUrl(value) + + return Boolean(url && /^https?:$/.test(url.protocol) && !isPrivateOrLocalHost(url.hostname)) +} + +function decodeHtmlEntities(value: string): string { + return value + .replace(/&(amp|lt|gt|quot|apos|nbsp|#39);/gi, (_match, key: string) => HTML_ENTITIES[key.toLowerCase()] ?? '') + .replace(/&#x([0-9a-f]+);/gi, (_match, hex: string) => String.fromCodePoint(parseInt(hex, 16) || 32)) + .replace(/&#(\d+);/g, (_match, decimal: string) => String.fromCodePoint(parseInt(decimal, 10) || 32)) +} + +function parseHtmlTitle(html: string): string { + const raw = html.match(/]*>([\s\S]*?)<\/title>/i)?.[1] + + return raw ? decodeHtmlEntities(raw).replace(/\s+/g, ' ').trim() : '' +} + +async function readResponseSnippet(response: Response): Promise { + const reader = response.body?.getReader() + + if (!reader) { + return (await response.text()).slice(0, TITLE_BYTE_BUDGET) + } + + const chunks: Uint8Array[] = [] + let done = false + let bytes = 0 + + try { + while (bytes < TITLE_BYTE_BUDGET) { + const chunk = await reader.read() + + if (chunk.done) { + done = true + + break + } + + const value = chunk.value + + if (!value?.length) { + continue + } + + const remaining = TITLE_BYTE_BUDGET - bytes + const next = value.length > remaining ? value.subarray(0, remaining) : value + + chunks.push(next) + bytes += next.length + + if (next.length < value.length) { + break + } + } + } catch { + return '' + } finally { + if (!done) { + try { + await reader.cancel() + } catch { + // Ignore stream teardown failures. + } + } + } + + if (!chunks.length) { + return '' + } + + const joined = new Uint8Array(bytes) + let offset = 0 + + for (const chunk of chunks) { + joined.set(chunk, offset) + offset += chunk.length + } + + return new TextDecoder().decode(joined) +} + +function usableTitle(value: string): string { + const clean = value.replace(/\s+/g, ' ').trim() + + return clean && !TITLE_ERROR_RE.test(clean) ? clean : '' +} + +async function fetchHtmlTitle(normalizedUrl: string): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), TITLE_TIMEOUT_MS) + + try { + const response = await fetch(normalizedUrl, { + headers: { + Accept: 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.5', + 'Accept-Language': 'en-US,en;q=0.7', + 'User-Agent': TITLE_USER_AGENT + }, + redirect: 'follow', + signal: controller.signal + }) + + if (!response.ok) { + return '' + } + + const contentType = response.headers.get('content-type') + + if (contentType && !/(?:html|xml|text\/html)/i.test(contentType)) { + return '' + } + + const html = await readResponseSnippet(response) + + return parseHtmlTitle(html).slice(0, TITLE_MAX_LENGTH) + } catch { + return '' + } finally { + clearTimeout(timeout) + } +} + +export function fetchLinkTitle(url: string): Promise { + const normalizedUrl = normalizeExternalUrl(url) + const key = titleCacheKey(normalizedUrl) + + if (!isTitleFetchable(normalizedUrl)) { + return Promise.resolve('') + } + + if (titleCache.has(key)) { + return Promise.resolve(titleCache.get(key) ?? '') + } + + const pending = titleInflight.get(key) + + if (pending) { + return pending + } + + const promise = fetchHtmlTitle(normalizedUrl) + .then(usableTitle) + .catch(() => '') + .then(clean => { + cacheTitle(key, clean) + titleSubs.get(key)?.forEach(sub => sub(clean)) + + return clean + }) + .finally(() => { + titleInflight.delete(key) + }) + + titleInflight.set(key, promise) + + return promise +} + +export function useLinkTitle(url?: null | string): string { + const normalizedUrl = useMemo(() => (url ? normalizeExternalUrl(url) : ''), [url]) + const key = useMemo(() => (normalizedUrl ? titleCacheKey(normalizedUrl) : ''), [normalizedUrl]) + const [title, setTitle] = useState(() => (key ? (titleCache.get(key) ?? '') : '')) + + useEffect(() => { + setTitle(key ? (titleCache.get(key) ?? '') : '') + + if (!key || !isTitleFetchable(normalizedUrl)) { + return + } + + const subs = titleSubs.get(key) ?? new Set<(value: string) => void>() + + subs.add(setTitle) + titleSubs.set(key, subs) + void fetchLinkTitle(normalizedUrl) + + return () => { + subs.delete(setTitle) + + if (!subs.size) { + titleSubs.delete(key) + } + } + }, [key, normalizedUrl]) + + return title +} + +export function __resetLinkTitleCache(): void { + titleCache.clear() + titleInflight.clear() + titleSubs.clear() +}