Files
gerbeur/api/services/rich-content-service.ts
2026-03-23 07:47:49 +00:00

111 lines
3.1 KiB
TypeScript

import type { RichContent } from "../model/interfaces.ts";
import { youtubeProvider } from "./providers/youtube.ts";
import { bandcampProvider } from "./providers/bandcamp.ts";
import { soundcloudProvider } from "./providers/soundcloud.ts";
import { genericProvider } from "./providers/generic.ts";
export interface RichContentProvider {
name: string;
matches(url: string): boolean;
fetch(url: string): Promise<RichContent>;
}
/**
* Register providers in priority order. The first match wins.
* `genericProvider` must stay last — it always matches.
*/
const providers: RichContentProvider[] = [
youtubeProvider,
bandcampProvider,
soundcloudProvider,
genericProvider,
];
// Shared utilities exported for use by providers
export async function fetchWithTimeout(
url: string,
timeoutMs = 5000,
): Promise<Response> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
return await fetch(url, {
signal: controller.signal,
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
},
});
} finally {
clearTimeout(timer);
}
}
function decodeHtmlEntities(str: string): string {
return str
.replace(/&amp;/gi, "&")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">")
.replace(/&quot;/gi, '"')
.replace(/&apos;/gi, "'")
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number(dec)))
.replace(
/&#x([0-9a-f]+);/gi,
(_, hex) => String.fromCodePoint(parseInt(hex, 16)),
);
}
export function extractOgTag(
html: string,
tag: string,
): string | undefined {
const patterns = [
new RegExp(
`<meta[^>]+property=["']og:${tag}["'][^>]+content=["']([^"']+)["']`,
"i",
),
new RegExp(
`<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:${tag}["']`,
"i",
),
];
for (const pattern of patterns) {
const match = html.match(pattern);
if (match) return decodeHtmlEntities(match[1]);
}
return undefined;
}
function isPrivateHost(hostname: string): boolean {
// Block loopback and RFC-1918 ranges. Note: DNS rebinding is not fully mitigated.
if (hostname === "localhost" || hostname === "::1") return true;
return /^(127\.|10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.)/.test(hostname);
}
export function isValidHttpUrl(raw: string): boolean {
try {
const u = new URL(raw);
if (u.protocol !== "http:" && u.protocol !== "https:") return false;
if (isPrivateHost(u.hostname)) return false;
return true;
} catch {
return false;
}
}
export async function fetchRichContent(
url: string,
): Promise<RichContent | undefined> {
try {
const provider = providers.find((p) => p.matches(url))!;
return await provider.fetch(url);
} catch (err) {
console.error(`[rich-content] Failed to fetch metadata for ${url}:`, err);
return undefined;
}
}