diff --git a/Dockerfile b/Dockerfile
index 16490f8..76af45b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,7 +25,7 @@ RUN deno task build
# ── Stage 2: runtime ──────────────────────────────────────────────────────────
FROM denoland/deno:alpine-2.7.11
-RUN apk add --no-cache ffmpeg
+RUN apk add --no-cache ffmpeg curl
WORKDIR /app
diff --git a/api/routes/preview.ts b/api/routes/preview.ts
index 31619ff..9f6e6d3 100644
--- a/api/routes/preview.ts
+++ b/api/routes/preview.ts
@@ -1,6 +1,7 @@
import { Router } from "@oak/oak";
import {
fetchRichContent,
+ fetchWithTimeout,
isValidHttpUrl,
} from "../services/rich-content-service.ts";
import { APIErrorCode } from "../model/interfaces.ts";
@@ -21,4 +22,44 @@ previewRouter.get("/api/preview", async (ctx) => {
ctx.response.body = { success: true, data: data ?? null };
});
+/**
+ * Proxy an external image through the server so HTTP thumbnail URLs don't
+ * trigger mixed-content blocks when the frontend is served over HTTPS.
+ */
+previewRouter.get("/api/proxy-image", async (ctx) => {
+ const url = ctx.request.url.searchParams.get("url") ?? "";
+ if (!isValidHttpUrl(url)) {
+ ctx.response.status = 400;
+ return;
+ }
+
+ try {
+ const res = await fetchWithTimeout(url, 8000);
+ const contentType = res.headers.get("content-type") ?? "";
+ if (!contentType.startsWith("image/")) {
+ ctx.response.status = 400;
+ return;
+ }
+
+ const MAX_SIZE = 5 * 1024 * 1024; // 5 MB
+ const contentLength = Number(res.headers.get("content-length") ?? "0");
+ if (contentLength > MAX_SIZE) {
+ ctx.response.status = 400;
+ return;
+ }
+
+ const bytes = new Uint8Array(await res.arrayBuffer());
+ if (bytes.length > MAX_SIZE) {
+ ctx.response.status = 400;
+ return;
+ }
+
+ ctx.response.headers.set("Content-Type", contentType);
+ ctx.response.headers.set("Cache-Control", "public, max-age=86400");
+ ctx.response.body = bytes;
+ } catch {
+ ctx.response.status = 502;
+ }
+});
+
export default previewRouter;
diff --git a/api/services/providers/generic.ts b/api/services/providers/generic.ts
index 3bced69..f3a00da 100644
--- a/api/services/providers/generic.ts
+++ b/api/services/providers/generic.ts
@@ -2,6 +2,7 @@ import type { RichContent } from "../../model/interfaces.ts";
import type { RichContentProvider } from "../rich-content-service.ts";
import {
extractBestIcon,
+ extractFirstContentImage,
extractJsonLd,
extractLargeImage,
extractMetaName,
@@ -28,30 +29,45 @@ export const genericProvider: RichContentProvider = {
const html = await res.text();
const ld = extractJsonLd(html);
- // Title: og:title → twitter:title → JSON-LD →
- const title = extractOgTag(html, "title") ??
+ // If og:url is present but points to a different page (e.g. the homepage),
+ // the og: block is a site-level fallback, not page-specific metadata.
+ // In that case skip og:title and og:image so page-level signals win.
+ const ogUrl = extractOgTag(html, "url");
+ const useOg = !ogUrl || (() => {
+ try {
+ const ogPath = new URL(ogUrl).pathname.replace(/\/+$/, "") || "/";
+ const pagePath = new URL(url).pathname.replace(/\/+$/, "") || "/";
+ return ogPath === pagePath;
+ } catch {
+ return true;
+ }
+ })();
+
+ // Title: og:title (page-matched) → twitter:title → JSON-LD →
+ const title = (useOg ? extractOgTag(html, "title") : undefined) ??
extractMetaName(html, "twitter:title") ??
ld.title ??
extractPageTitle(html);
+ // Site name: og:site_name → hostname
+ const siteName = extractOgTag(html, "site_name") ??
+ new URL(url).hostname.replace(/^www\./, "");
+
// Description: og:description → twitter:description → JSON-LD →
const description = extractOgTag(html, "description") ??
extractMetaName(html, "twitter:description") ??
ld.description ??
extractMetaName(html, "description");
- // Image: og:image → twitter:image → JSON-LD → first large
→ best icon → /favicon.ico
- const thumbnailUrl = extractOgTag(html, "image") ??
+ // Image: og:image (page-matched) → twitter:image → JSON-LD → large
→ first content
→ best icon → /favicon.ico
+ const thumbnailUrl = (useOg ? extractOgTag(html, "image") : undefined) ??
extractMetaName(html, "twitter:image") ??
ld.thumbnailUrl ??
extractLargeImage(html, url) ??
+ extractFirstContentImage(html, url) ??
extractBestIcon(html, url) ??
`${new URL(url).origin}/favicon.ico`;
- // Site name: og:site_name → hostname
- const siteName = extractOgTag(html, "site_name") ??
- new URL(url).hostname.replace(/^www\./, "");
-
return {
type: "generic",
url,
diff --git a/api/services/rich-content-service.ts b/api/services/rich-content-service.ts
index 85f882b..8d37c22 100644
--- a/api/services/rich-content-service.ts
+++ b/api/services/rich-content-service.ts
@@ -26,25 +26,113 @@ const providers: RichContentProvider[] = [
// Shared utilities exported for use by providers
+const FETCH_HEADERS = {
+ "User-Agent":
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+ "Accept":
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
+};
+
+/**
+ * Fetch `url` via a `curl --insecure` subprocess and return a minimal Response.
+ * Used as a last resort when Deno's rustls rejects the server's TLS certificate
+ * due to an unsupported algorithm (e.g. UnsupportedSignatureAlgorithm).
+ * Returns null if curl is unavailable or exits non-zero.
+ */
+async function fetchViaCurl(
+ url: string,
+ timeoutMs: number,
+): Promise {
+ const tmpPath = await Deno.makeTempFile();
+ try {
+ const { code, stdout } = await new Deno.Command("curl", {
+ args: [
+ "--silent",
+ "--insecure",
+ "--location",
+ "--max-time",
+ String(Math.ceil(timeoutMs / 1000)),
+ "--user-agent",
+ FETCH_HEADERS["User-Agent"],
+ "--header",
+ `Accept: ${FETCH_HEADERS["Accept"]}`,
+ "--header",
+ `Accept-Language: ${FETCH_HEADERS["Accept-Language"]}`,
+ "--output",
+ tmpPath,
+ "--write-out",
+ "%{content_type}",
+ url,
+ ],
+ stdout: "piped",
+ stderr: "null",
+ }).output();
+
+ if (code !== 0) return null;
+
+ const contentType = new TextDecoder().decode(stdout).trim();
+ const bytes = await Deno.readFile(tmpPath);
+ // Decode using the charset declared in the Content-Type header so that
+ // pages served in ISO-8859-1, windows-1252, etc. are read correctly.
+ const charset = /charset=([\w-]+)/i.exec(contentType)?.[1] ?? "utf-8";
+ let bodyText: string;
+ try {
+ bodyText = new TextDecoder(charset, { fatal: false }).decode(bytes);
+ } catch {
+ bodyText = new TextDecoder("utf-8", { fatal: false }).decode(bytes);
+ }
+ return new Response(bodyText, { headers: { "content-type": "text/html" } });
+ } catch {
+ return null;
+ } finally {
+ await Deno.remove(tmpPath).catch(() => {});
+ }
+}
+
export async function fetchWithTimeout(
url: string,
timeoutMs = 5000,
): Promise {
- const controller = new AbortController();
- const timer = setTimeout(() => controller.abort(), timeoutMs);
+ async function attempt(
+ extraInit?: Record,
+ ): Promise {
+ const controller = new AbortController();
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
+ try {
+ return await fetch(url, {
+ signal: controller.signal,
+ headers: FETCH_HEADERS,
+ ...extraInit,
+ } as RequestInit);
+ } finally {
+ clearTimeout(timer);
+ }
+ }
+
try {
- return await fetch(url, {
- signal: controller.signal,
- headers: {
- "User-Agent":
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
- "Accept":
- "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
- },
- });
- } finally {
- clearTimeout(timer);
+ return await attempt();
+ } catch (err) {
+ if (!(err instanceof TypeError && err.message.includes("certificate"))) {
+ throw err;
+ }
+
+ // Retry 1: allowInsecureCertificates handles expired / self-signed certs.
+ const client = Deno.createHttpClient({ allowInsecureCertificates: true });
+ try {
+ return await attempt({ client });
+ } catch {
+ /* UnsupportedSignatureAlgorithm etc. — rustls can't help */
+ } finally {
+ client.close();
+ }
+
+ // Retry 2: curl uses its own TLS stack and supports a wider set of
+ // certificate algorithms that Deno/rustls rejects.
+ const curlRes = await fetchViaCurl(url, timeoutMs);
+ if (curlRes) return curlRes;
+
+ throw err;
}
}
@@ -243,6 +331,66 @@ export function extractBestIcon(
return candidates[0].href;
}
+/**
+ * Return the `src` of the first `
` that looks like content rather than UI
+ * chrome. Skips SVGs, data URIs, and images whose filename matches common
+ * icon/logo/nav patterns (logo, icon, sprite, favicon, avatar, banner, etc.).
+ * Resolves relative and protocol-relative URLs.
+ */
+const UI_IMAGE_KEYWORDS = new Set([
+ "logo",
+ "icon",
+ "sprite",
+ "favicon",
+ "avatar",
+ "banner",
+ "header",
+ "nav",
+ "menu",
+ "cart",
+ "search",
+ "tracking",
+ "pixel",
+ "bg",
+ "background",
+]);
+
+function isUiImage(src: string): boolean {
+ if (/\.svg(\?|$)/i.test(src)) return true;
+ const filename = src.split("?")[0].split("/").pop() ?? "";
+ const baseName = filename.replace(/\.[^.]+$/, ""); // strip extension
+ // Split on common filename separators (-, _, .) and check each token
+ return baseName.toLowerCase().split(/[-_.]/).some((t) =>
+ UI_IMAGE_KEYWORDS.has(t)
+ );
+}
+
+/**
+ * Return the `src` of the first `
` that looks like content rather than UI
+ * chrome. Skips SVGs, data URIs, and images whose filename tokens match common
+ * icon/logo/nav patterns (logo, icon, sprite, etc.).
+ * Resolves relative and protocol-relative URLs.
+ */
+export function extractFirstContentImage(
+ html: string,
+ baseUrl: string,
+): string | undefined {
+ const imgPattern = /
]+>/gi;
+ let match: RegExpExecArray | null;
+ while ((match = imgPattern.exec(html)) !== null) {
+ const tag = match[0];
+ const src = /\bsrc=["']([^"']+)["']/i.exec(tag)?.[1];
+ if (!src || src.startsWith("data:")) continue;
+ if (isUiImage(src)) continue;
+ try {
+ return new URL(src, baseUrl).toString();
+ } catch {
+ continue;
+ }
+ }
+ return undefined;
+}
+
/**
* Extract `href` from the first `` whose rel contains `relFragment`,
* resolved to an absolute URL using `baseUrl`.
@@ -307,6 +455,16 @@ export async function fetchRichContent(
return await provider.fetch(url);
} catch (err) {
console.error(`[rich-content] Failed to fetch metadata for ${url}:`, err);
- return undefined;
+ // Return a minimal stub so the caller always gets something displayable
+ // (e.g. when the site has a bad TLS cert or the fetch times out).
+ try {
+ return {
+ type: "generic",
+ url,
+ siteName: new URL(url).hostname.replace(/^www\./, ""),
+ };
+ } catch {
+ return undefined;
+ }
}
}
diff --git a/src/components/JournalCard.tsx b/src/components/JournalCard.tsx
index 37a0dcb..05e44ca 100644
--- a/src/components/JournalCard.tsx
+++ b/src/components/JournalCard.tsx
@@ -38,10 +38,28 @@ export function JournalCard(
navigate(dumpUrl(dump));
}
- const thumbnailUrl = dump.kind === "file" &&
- dump.fileMime?.startsWith("image/")
- ? `${API_URL}/api/files/${dump.id}?v=${dump.fileSize ?? 0}`
- : (dump.richContent?.thumbnailUrl ?? null);
+ const rawThumbnail =
+ dump.kind === "file" && dump.fileMime?.startsWith("image/")
+ ? `${API_URL}/api/files/${dump.id}?v=${dump.fileSize ?? 0}`
+ : (dump.richContent?.thumbnailUrl ?? null);
+
+ // Route external HTTP thumbnails through the server proxy to avoid
+ // mixed-content blocks when the frontend is served over HTTPS.
+ const thumbnailUrl = (() => {
+ if (!rawThumbnail) return null;
+ try {
+ const u = new URL(rawThumbnail);
+ if (
+ u.protocol === "http:" && u.hostname !== "localhost" &&
+ u.hostname !== "127.0.0.1"
+ ) {
+ return `${API_URL}/api/proxy-image?url=${
+ encodeURIComponent(rawThumbnail)
+ }`;
+ }
+ } catch { /* relative URL */ }
+ return rawThumbnail;
+ })();
const fallbackIcon = dump.kind === "file"
? (() => {
diff --git a/src/components/RichContentCard.tsx b/src/components/RichContentCard.tsx
index 99d792e..d11a1ca 100644
--- a/src/components/RichContentCard.tsx
+++ b/src/components/RichContentCard.tsx
@@ -1,6 +1,21 @@
import { useContext } from "react";
import type { RichContent } from "../model.ts";
import { PlayerContext } from "../contexts/PlayerContext.ts";
+import { API_URL } from "../config/api.ts";
+
+/** Route HTTP thumbnail URLs through the server proxy to avoid mixed-content blocks. */
+function proxyIfHttp(url: string): string {
+ try {
+ const u = new URL(url);
+ if (
+ u.protocol === "http:" && u.hostname !== "localhost" &&
+ u.hostname !== "127.0.0.1"
+ ) {
+ return `${API_URL}/api/proxy-image?url=${encodeURIComponent(url)}`;
+ }
+ } catch { /* relative URL — leave as-is */ }
+ return url;
+}
interface RichContentCardProps {
richContent: RichContent;
@@ -38,7 +53,7 @@ export default function RichContentCard(
{richContent.thumbnailUrl
? (
{
@@ -65,7 +80,7 @@ export default function RichContentCard(
{richContent.thumbnailUrl
? (
{
@@ -96,7 +111,7 @@ export default function RichContentCard(
aria-label="Play"
>
{
@@ -108,7 +123,7 @@ export default function RichContentCard(
)
: (
{