All checks were successful
Build and Publish Docker Image / build-and-push (push) Successful in 3m15s
81 lines
2.6 KiB
TypeScript
81 lines
2.6 KiB
TypeScript
import type { RichContent } from "../../model/interfaces.ts";
|
|
import type { RichContentProvider } from "../rich-content-service.ts";
|
|
import {
|
|
extractBestIcon,
|
|
extractFirstContentImage,
|
|
extractJsonLd,
|
|
extractLargeImage,
|
|
extractMetaName,
|
|
extractOgTag,
|
|
extractPageTitle,
|
|
fetchWithTimeout,
|
|
} from "../rich-content-service.ts";
|
|
|
|
export const genericProvider: RichContentProvider = {
|
|
name: "generic",
|
|
|
|
matches(_url: string): boolean {
|
|
return true; // fallback — always matches
|
|
},
|
|
|
|
async fetch(url: string): Promise<RichContent> {
|
|
const res = await fetchWithTimeout(url);
|
|
const contentType = res.headers.get("content-type") ?? "";
|
|
|
|
if (!contentType.startsWith("text/html")) {
|
|
return { type: "generic", url };
|
|
}
|
|
|
|
const html = await res.text();
|
|
const ld = extractJsonLd(html);
|
|
|
|
// If og:url is present but points to a different page (e.g. the homepage),
|
|
// the og: block is a site-level fallback, not page-specific metadata.
|
|
// In that case skip og:title and og:image so page-level signals win.
|
|
const ogUrl = extractOgTag(html, "url");
|
|
const useOg = !ogUrl || (() => {
|
|
try {
|
|
const ogPath = new URL(ogUrl).pathname.replace(/\/+$/, "") || "/";
|
|
const pagePath = new URL(url).pathname.replace(/\/+$/, "") || "/";
|
|
return ogPath === pagePath;
|
|
} catch {
|
|
return true;
|
|
}
|
|
})();
|
|
|
|
// Title: og:title (page-matched) → twitter:title → JSON-LD → <title>
|
|
const title = (useOg ? extractOgTag(html, "title") : undefined) ??
|
|
extractMetaName(html, "twitter:title") ??
|
|
ld.title ??
|
|
extractPageTitle(html);
|
|
|
|
// Site name: og:site_name → hostname
|
|
const siteName = extractOgTag(html, "site_name") ??
|
|
new URL(url).hostname.replace(/^www\./, "");
|
|
|
|
// Description: og:description → twitter:description → JSON-LD → <meta name="description">
|
|
const description = extractOgTag(html, "description") ??
|
|
extractMetaName(html, "twitter:description") ??
|
|
ld.description ??
|
|
extractMetaName(html, "description");
|
|
|
|
// Image: og:image (page-matched) → twitter:image → JSON-LD → large <img> → first content <img> → best icon → /favicon.ico
|
|
const thumbnailUrl = (useOg ? extractOgTag(html, "image") : undefined) ??
|
|
extractMetaName(html, "twitter:image") ??
|
|
ld.thumbnailUrl ??
|
|
extractLargeImage(html, url) ??
|
|
extractFirstContentImage(html, url) ??
|
|
extractBestIcon(html, url) ??
|
|
`${new URL(url).origin}/favicon.ico`;
|
|
|
|
return {
|
|
type: "generic",
|
|
url,
|
|
title,
|
|
description,
|
|
thumbnailUrl,
|
|
siteName,
|
|
};
|
|
},
|
|
};
|