v3: localization fixes, char counters & limits on all text fields, ux fixes
This commit is contained in:
@@ -83,6 +83,199 @@ export function extractOgTag(
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Extract content from `<meta name="…" content="…">` (both attribute orderings). */
|
||||
export function extractMetaName(
|
||||
html: string,
|
||||
name: string,
|
||||
): string | undefined {
|
||||
const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
const patterns = [
|
||||
new RegExp(
|
||||
`<meta[^>]+name=["']${escaped}["'][^>]+content=["']([^"']+)["']`,
|
||||
"i",
|
||||
),
|
||||
new RegExp(
|
||||
`<meta[^>]+content=["']([^"']+)["'][^>]+name=["']${escaped}["']`,
|
||||
"i",
|
||||
),
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = html.match(pattern);
|
||||
if (match) return decodeHtmlEntities(match[1]);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Extract the text content of the `<title>` element. */
|
||||
export function extractPageTitle(html: string): string | undefined {
|
||||
const match = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
||||
return match ? decodeHtmlEntities(match[1].trim()) : undefined;
|
||||
}
|
||||
|
||||
// ── JSON-LD helpers (file-private) ────────────────────────────────────────────
|
||||
|
||||
type JsonLdResult = {
|
||||
title?: string;
|
||||
description?: string;
|
||||
thumbnailUrl?: string;
|
||||
};
|
||||
|
||||
function ldString(v: unknown): string | undefined {
|
||||
if (typeof v === "string" && v.trim()) return v.trim();
|
||||
if (Array.isArray(v) && typeof v[0] === "string" && v[0].trim()) {
|
||||
return v[0].trim();
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function ldImage(v: unknown): string | undefined {
|
||||
if (
|
||||
typeof v === "string" &&
|
||||
(v.startsWith("http://") || v.startsWith("https://"))
|
||||
) return v;
|
||||
if (Array.isArray(v)) return ldImage(v[0]);
|
||||
if (v && typeof v === "object") {
|
||||
const o = v as Record<string, unknown>;
|
||||
return ldImage(o.url ?? o.contentUrl);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function ldExtractNode(data: unknown): JsonLdResult {
|
||||
if (Array.isArray(data)) {
|
||||
for (const item of data) {
|
||||
const r = ldExtractNode(item);
|
||||
if (r.title || r.thumbnailUrl) return r;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (!data || typeof data !== "object") return {};
|
||||
const o = data as Record<string, unknown>;
|
||||
if (o["@graph"]) return ldExtractNode(o["@graph"]);
|
||||
return {
|
||||
title: ldString(o.name ?? o.headline),
|
||||
description: ldString(o.description),
|
||||
thumbnailUrl: ldImage(o.image ?? o.thumbnailUrl ?? o.thumbnail),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse every `<script type="application/ld+json">` block and return the first
|
||||
* node that yields a title or image. Handles `@graph`, arrays, and the common
|
||||
* `image` shapes (string, string[], ImageObject).
|
||||
*/
|
||||
export function extractJsonLd(html: string): JsonLdResult {
|
||||
const pattern =
|
||||
/<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = pattern.exec(html)) !== null) {
|
||||
try {
|
||||
const result = ldExtractNode(JSON.parse(match[1]));
|
||||
if (result.title || result.thumbnailUrl) return result;
|
||||
} catch { /* invalid JSON — skip */ }
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the `src` of the first `<img>` whose declared width or height is at
|
||||
* least `minSize` pixels (default 200). Skips data URIs. Resolves relative URLs.
|
||||
*/
|
||||
export function extractLargeImage(
|
||||
html: string,
|
||||
baseUrl: string,
|
||||
minSize = 200,
|
||||
): string | undefined {
|
||||
const imgPattern = /<img[^>]+>/gi;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = imgPattern.exec(html)) !== null) {
|
||||
const tag = match[0];
|
||||
const src = /\bsrc=["']([^"']+)["']/i.exec(tag)?.[1];
|
||||
if (!src || src.startsWith("data:")) continue;
|
||||
const w = parseInt(/\bwidth=["']?(\d+)/i.exec(tag)?.[1] ?? "0");
|
||||
const h = parseInt(/\bheight=["']?(\d+)/i.exec(tag)?.[1] ?? "0");
|
||||
if (w >= minSize && h >= minSize) {
|
||||
try {
|
||||
return new URL(src, baseUrl).toString();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect all `<link rel="icon">` / `<link rel="apple-touch-icon">` tags, rank
|
||||
* them by declared size (largest wins), and return the best resolved URL.
|
||||
* Falls back to the first match when no `sizes` attribute is present.
|
||||
*/
|
||||
export function extractBestIcon(
|
||||
html: string,
|
||||
baseUrl: string,
|
||||
): string | undefined {
|
||||
const linkRe = /<link[^>]+>/gi;
|
||||
const relRe = /\brel=["']([^"']+)["']/i;
|
||||
const hrefRe = /\bhref=["']([^"']+)["']/i;
|
||||
const sizesRe = /\bsizes=["']([^"']+)["']/i;
|
||||
|
||||
const candidates: { href: string; area: number }[] = [];
|
||||
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = linkRe.exec(html)) !== null) {
|
||||
const tag = m[0];
|
||||
const rel = relRe.exec(tag)?.[1] ?? "";
|
||||
if (!/\bicon\b/i.test(rel) && !/apple-touch-icon/i.test(rel)) continue;
|
||||
const href = hrefRe.exec(tag)?.[1];
|
||||
if (!href) continue;
|
||||
const sizesStr = sizesRe.exec(tag)?.[1] ?? "";
|
||||
const sm = sizesStr.match(/(\d+)x(\d+)/i);
|
||||
const area = sm ? parseInt(sm[1]) * parseInt(sm[2]) : 0;
|
||||
try {
|
||||
candidates.push({ href: new URL(href, baseUrl).toString(), area });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (candidates.length === 0) return undefined;
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates[0].href;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract `href` from the first `<link rel="…">` whose rel contains `relFragment`,
|
||||
* resolved to an absolute URL using `baseUrl`.
|
||||
*/
|
||||
export function extractLinkHref(
|
||||
html: string,
|
||||
relFragment: string,
|
||||
baseUrl: string,
|
||||
): string | undefined {
|
||||
const escaped = relFragment.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
const patterns = [
|
||||
new RegExp(
|
||||
`<link[^>]+rel=["'][^"']*${escaped}[^"']*["'][^>]+href=["']([^"']+)["']`,
|
||||
"i",
|
||||
),
|
||||
new RegExp(
|
||||
`<link[^>]+href=["']([^"']+)["'][^>]+rel=["'][^"']*${escaped}[^"']*["']`,
|
||||
"i",
|
||||
),
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = html.match(pattern);
|
||||
if (match) {
|
||||
try {
|
||||
return new URL(match[1], baseUrl).toString();
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isPrivateHost(hostname: string): boolean {
|
||||
// Block loopback and RFC-1918 ranges. Note: DNS rebinding is not fully mitigated.
|
||||
if (hostname === "localhost" || hostname === "::1") return true;
|
||||
|
||||
Reference in New Issue
Block a user