diff --git a/api/services/providers/generic.ts b/api/services/providers/generic.ts
index 7278822..3bced69 100644
--- a/api/services/providers/generic.ts
+++ b/api/services/providers/generic.ts
@@ -1,6 +1,14 @@
import type { RichContent } from "../../model/interfaces.ts";
import type { RichContentProvider } from "../rich-content-service.ts";
-import { extractOgTag, fetchWithTimeout } from "../rich-content-service.ts";
+import {
+ extractBestIcon,
+ extractJsonLd,
+ extractLargeImage,
+ extractMetaName,
+ extractOgTag,
+ extractPageTitle,
+ fetchWithTimeout,
+} from "../rich-content-service.ts";
export const genericProvider: RichContentProvider = {
name: "generic",
@@ -18,14 +26,39 @@ export const genericProvider: RichContentProvider = {
}
const html = await res.text();
+ const ld = extractJsonLd(html);
+
+ // Title: og:title → twitter:title → JSON-LD →
+ const title = extractOgTag(html, "title") ??
+ extractMetaName(html, "twitter:title") ??
+ ld.title ??
+ extractPageTitle(html);
+
+ // Description: og:description → twitter:description → JSON-LD →
+ const description = extractOgTag(html, "description") ??
+ extractMetaName(html, "twitter:description") ??
+ ld.description ??
+ extractMetaName(html, "description");
+
+ // Image: og:image → twitter:image → JSON-LD → first large
→ best icon → /favicon.ico
+ const thumbnailUrl = extractOgTag(html, "image") ??
+ extractMetaName(html, "twitter:image") ??
+ ld.thumbnailUrl ??
+ extractLargeImage(html, url) ??
+ extractBestIcon(html, url) ??
+ `${new URL(url).origin}/favicon.ico`;
+
+ // Site name: og:site_name → hostname
+ const siteName = extractOgTag(html, "site_name") ??
+ new URL(url).hostname.replace(/^www\./, "");
return {
type: "generic",
url,
- title: extractOgTag(html, "title"),
- description: extractOgTag(html, "description"),
- thumbnailUrl: extractOgTag(html, "image"),
- siteName: extractOgTag(html, "site_name"),
+ title,
+ description,
+ thumbnailUrl,
+ siteName,
};
},
};
diff --git a/api/services/rich-content-service.ts b/api/services/rich-content-service.ts
index 6682332..85f882b 100644
--- a/api/services/rich-content-service.ts
+++ b/api/services/rich-content-service.ts
@@ -83,6 +83,199 @@ export function extractOgTag(
return undefined;
}
+/** Extract content from `` (both attribute orderings). */
+export function extractMetaName(
+ html: string,
+ name: string,
+): string | undefined {
+ const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+ const patterns = [
+ new RegExp(
+ `]+name=["']${escaped}["'][^>]+content=["']([^"']+)["']`,
+ "i",
+ ),
+ new RegExp(
+ `]+content=["']([^"']+)["'][^>]+name=["']${escaped}["']`,
+ "i",
+ ),
+ ];
+ for (const pattern of patterns) {
+ const match = html.match(pattern);
+ if (match) return decodeHtmlEntities(match[1]);
+ }
+ return undefined;
+}
+
+/** Extract the text content of the `` element. */
+export function extractPageTitle(html: string): string | undefined {
+ const match = html.match(/]*>([^<]+)<\/title>/i);
+ return match ? decodeHtmlEntities(match[1].trim()) : undefined;
+}
+
+// ── JSON-LD helpers (file-private) ────────────────────────────────────────────
+
+type JsonLdResult = {
+ title?: string;
+ description?: string;
+ thumbnailUrl?: string;
+};
+
+function ldString(v: unknown): string | undefined {
+ if (typeof v === "string" && v.trim()) return v.trim();
+ if (Array.isArray(v) && typeof v[0] === "string" && v[0].trim()) {
+ return v[0].trim();
+ }
+ return undefined;
+}
+
+function ldImage(v: unknown): string | undefined {
+ if (
+ typeof v === "string" &&
+ (v.startsWith("http://") || v.startsWith("https://"))
+ ) return v;
+ if (Array.isArray(v)) return ldImage(v[0]);
+ if (v && typeof v === "object") {
+ const o = v as Record;
+ return ldImage(o.url ?? o.contentUrl);
+ }
+ return undefined;
+}
+
+function ldExtractNode(data: unknown): JsonLdResult {
+ if (Array.isArray(data)) {
+ for (const item of data) {
+ const r = ldExtractNode(item);
+ if (r.title || r.thumbnailUrl) return r;
+ }
+ return {};
+ }
+ if (!data || typeof data !== "object") return {};
+ const o = data as Record;
+ if (o["@graph"]) return ldExtractNode(o["@graph"]);
+ return {
+ title: ldString(o.name ?? o.headline),
+ description: ldString(o.description),
+ thumbnailUrl: ldImage(o.image ?? o.thumbnailUrl ?? o.thumbnail),
+ };
+}
+
+/**
+ * Parse every `