Files
gerbeur/api/services/providers/generic.ts

65 lines
1.9 KiB
TypeScript

import type { RichContent } from "../../model/interfaces.ts";
import type { RichContentProvider } from "../rich-content-service.ts";
import {
extractBestIcon,
extractJsonLd,
extractLargeImage,
extractMetaName,
extractOgTag,
extractPageTitle,
fetchWithTimeout,
} from "../rich-content-service.ts";
export const genericProvider: RichContentProvider = {
name: "generic",
matches(_url: string): boolean {
return true; // fallback — always matches
},
async fetch(url: string): Promise<RichContent> {
const res = await fetchWithTimeout(url);
const contentType = res.headers.get("content-type") ?? "";
if (!contentType.startsWith("text/html")) {
return { type: "generic", url };
}
const html = await res.text();
const ld = extractJsonLd(html);
// Title: og:title → twitter:title → JSON-LD → <title>
const title = extractOgTag(html, "title") ??
extractMetaName(html, "twitter:title") ??
ld.title ??
extractPageTitle(html);
// Description: og:description → twitter:description → JSON-LD → <meta name="description">
const description = extractOgTag(html, "description") ??
extractMetaName(html, "twitter:description") ??
ld.description ??
extractMetaName(html, "description");
// Image: og:image → twitter:image → JSON-LD → first large <img> → best icon → /favicon.ico
const thumbnailUrl = extractOgTag(html, "image") ??
extractMetaName(html, "twitter:image") ??
ld.thumbnailUrl ??
extractLargeImage(html, url) ??
extractBestIcon(html, url) ??
`${new URL(url).origin}/favicon.ico`;
// Site name: og:site_name → hostname
const siteName = extractOgTag(html, "site_name") ??
new URL(url).hostname.replace(/^www\./, "");
return {
type: "generic",
url,
title,
description,
thumbnailUrl,
siteName,
};
},
};