diff --git a/components/ModalContent/PreservedFormatsModal.tsx b/components/ModalContent/PreservedFormatsModal.tsx
index 0030250e..0f2ef369 100644
--- a/components/ModalContent/PreservedFormatsModal.tsx
+++ b/components/ModalContent/PreservedFormatsModal.tsx
@@ -124,7 +124,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
clearInterval(interval);
}
};
- }, [link, getLink, link?.singlefile]);
+ }, [link?.singlefile]);
const updateArchive = async () => {
const load = toast.loading(t("sending_request"));
@@ -210,10 +210,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
className="mx-auto mb-3"
size={20}
/>
-
-
- There are more preserved formats in the queue
-
+ {t("there_are_more_formats")}
{t("check_back_later")}
) : undefined}
diff --git a/lib/api/archiveHandler.ts b/lib/api/archiveHandler.ts
index 862c6a26..cd95db4d 100644
--- a/lib/api/archiveHandler.ts
+++ b/lib/api/archiveHandler.ts
@@ -1,4 +1,4 @@
-import { LaunchOptions, chromium, devices } from "playwright";
+import { LaunchOptions, Page, chromium, devices } from "playwright";
import { prisma } from "./db";
import createFile from "./storage/createFile";
import sendToWayback from "./preservationScheme/sendToWayback";
@@ -9,6 +9,7 @@ import generatePreview from "./generatePreview";
import { removeFiles } from "./manageLinkFiles";
import archiveAsSinglefile from "./preservationScheme/archiveAsSinglefile";
import archiveAsReadability from "./preservationScheme/archiveAsReadablility";
+import shell from "shelljs";
type LinksAndCollectionAndOwner = Link & {
collection: Collection & {
@@ -50,6 +51,26 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
const page = await context.newPage();
+ // await page.goto("https://github.com", {
+ // waitUntil: "domcontentloaded",
+ // });
+
+ // console.log("Opening page:", link.url);
+
+ // await page.evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30);
+
+ // const dom = await page.content();
+
+ // console.log("The content", dom);
+
+ // shell
+ // .echo(dom)
+ // .exec(
+ // "monolith - -I -b https://marketplace.visualstudio.com/items?itemName=42Crunch.vscode-openapi -j -F -o monolith.html"
+ // );
+
+ // console.log("Monolith created!");
+
createFolder({
filePath: `archives/preview/${link.collectionId}`,
});
@@ -111,13 +132,13 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
});
// SingleFile
- if (
- !link.singlefile?.startsWith("archive") &&
- !link.singlefile?.startsWith("unavailable") &&
- user.archiveAsSinglefile &&
- link.url
- )
- await archiveAsSinglefile(link);
+ // if (
+ // !link.singlefile?.startsWith("archive") &&
+ // !link.singlefile?.startsWith("unavailable") &&
+ // user.archiveAsSinglefile &&
+ // link.url
+ // )
+ // await archiveAsSinglefile(link);
// send to archive.org
if (user.archiveAsWaybackMachine && link.url) sendToWayback(link.url);
@@ -131,13 +152,6 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
} else if (link.url) {
// archive url
- const context = await browser.newContext({
- ...devices["Desktop Chrome"],
- ignoreHTTPSErrors: process.env.IGNORE_HTTPS_ERRORS === "true",
- });
-
- const page = await context.newPage();
-
await page.goto(link.url, { waitUntil: "domcontentloaded" });
const content = await page.content();
@@ -150,115 +164,20 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
await archiveAsReadability(content, link);
// Preview
+ if (
+ !link.preview?.startsWith("archives") &&
+ !link.preview?.startsWith("unavailable")
+ )
+ await getArchivePreview(link, page);
- const ogImageUrl = await page.evaluate(() => {
- const metaTag = document.querySelector('meta[property="og:image"]');
- return metaTag ? (metaTag as any).content : null;
- });
-
- if (ogImageUrl) {
- console.log("Found og:image URL:", ogImageUrl);
-
- // Download the image
- const imageResponse = await page.goto(ogImageUrl);
-
- // Check if imageResponse is not null
- if (imageResponse && !link.preview?.startsWith("archive")) {
- const buffer = await imageResponse.body();
- await generatePreview(buffer, link.collectionId, link.id);
- }
-
- await page.goBack();
- } else if (!link.preview?.startsWith("archive")) {
- console.log("No og:image found");
- await page
- .screenshot({ type: "jpeg", quality: 20 })
- .then((screenshot) => {
- return createFile({
- data: screenshot,
- filePath: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
- });
- })
- .then(() => {
- return prisma.link.update({
- where: { id: link.id },
- data: {
- preview: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
- },
- });
- });
- }
- }
-
- if (
- (!link.image?.startsWith("archives") &&
- !link.image?.startsWith("unavailable")) ||
- (!link.pdf?.startsWith("archives") &&
- !link.pdf?.startsWith("unavailable"))
- ) {
// Screenshot/PDF
- await page.evaluate(
- autoScroll,
- Number(process.env.AUTOSCROLL_TIMEOUT) || 30
- );
-
- // Check if the user hasn't deleted the link by the time we're done scrolling
- const linkExists = await prisma.link.findUnique({
- where: { id: link.id },
- });
- if (linkExists) {
- const processingPromises = [];
-
- if (
- user.archiveAsScreenshot &&
- !link.image?.startsWith("archive")
- ) {
- processingPromises.push(
- page.screenshot({ fullPage: true }).then((screenshot) => {
- return createFile({
- data: screenshot,
- filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
- });
- })
- );
- }
-
- // apply administrator's defined pdf margins or default to 15px
- const margins = {
- top: process.env.PDF_MARGIN_TOP || "15px",
- bottom: process.env.PDF_MARGIN_BOTTOM || "15px",
- };
-
- if (user.archiveAsPDF && !link.pdf?.startsWith("archive")) {
- processingPromises.push(
- page
- .pdf({
- width: "1366px",
- height: "1931px",
- printBackground: true,
- margin: margins,
- })
- .then((pdf) => {
- return createFile({
- data: pdf,
- filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
- });
- })
- );
- }
- await Promise.allSettled(processingPromises);
- await prisma.link.update({
- where: { id: link.id },
- data: {
- image: user.archiveAsScreenshot
- ? `archives/${linkExists.collectionId}/${link.id}.png`
- : undefined,
- pdf: user.archiveAsPDF
- ? `archives/${linkExists.collectionId}/${link.id}.pdf`
- : undefined,
- },
- });
- }
+ if (
+ (!link.image?.startsWith("archives") &&
+ !link.image?.startsWith("unavailable")) ||
+ (!link.pdf?.startsWith("archives") &&
+ !link.pdf?.startsWith("unavailable"))
+ )
+ await captureScreenshotAndPdf(link, page, user);
}
})(),
timeoutPromise,
@@ -302,31 +221,6 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
}
}
-const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
- const timeoutPromise = new Promise((_, reject) => {
- setTimeout(() => {
- reject(new Error(`Webpage was too long to be archived.`));
- }, AUTOSCROLL_TIMEOUT * 1000);
- });
-
- const scrollingPromise = new Promise((resolve) => {
- let totalHeight = 0;
- let distance = 100;
- let scrollDown = setInterval(() => {
- let scrollHeight = document.body.scrollHeight;
- window.scrollBy(0, distance);
- totalHeight += distance;
- if (totalHeight >= scrollHeight) {
- clearInterval(scrollDown);
- window.scroll(0, 0);
- resolve();
- }
- }, 100);
- });
-
- await Promise.race([scrollingPromise, timeoutPromise]);
-};
-
const imageHandler = async ({ url, id }: Link, extension: string) => {
const image = await fetch(url as string).then((res) => res.blob());
@@ -374,3 +268,133 @@ const pdfHandler = async ({ url, id }: Link) => {
});
}
};
+
+const getArchivePreview = async (
+ link: LinksAndCollectionAndOwner,
+ page: Page
+) => {
+ const ogImageUrl = await page.evaluate(() => {
+ const metaTag = document.querySelector('meta[property="og:image"]');
+ return metaTag ? (metaTag as any).content : null;
+ });
+
+ if (ogImageUrl) {
+ console.log("Found og:image URL:", ogImageUrl);
+
+ // Download the image
+ const imageResponse = await page.goto(ogImageUrl);
+
+ // Check if imageResponse is not null
+ if (imageResponse && !link.preview?.startsWith("archive")) {
+ const buffer = await imageResponse.body();
+ generatePreview(buffer, link.collectionId, link.id);
+ }
+
+ await page.goBack();
+ } else if (!link.preview?.startsWith("archive")) {
+ console.log("No og:image found");
+ await page
+ .screenshot({ type: "jpeg", quality: 20 })
+ .then((screenshot) => {
+ return createFile({
+ data: screenshot,
+ filePath: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
+ });
+ })
+ .then(() => {
+ return prisma.link.update({
+ where: { id: link.id },
+ data: {
+ preview: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
+ },
+ });
+ });
+ }
+};
+
+const captureScreenshotAndPdf = async (
+ link: LinksAndCollectionAndOwner,
+ page: Page,
+ user: User
+) => {
+ await page.evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30);
+
+ // Check if the user hasn't deleted the link by the time we're done scrolling
+ const linkExists = await prisma.link.findUnique({
+ where: { id: link.id },
+ });
+ if (linkExists) {
+ const processingPromises = [];
+
+ if (user.archiveAsScreenshot && !link.image?.startsWith("archive")) {
+ processingPromises.push(
+ page.screenshot({ fullPage: true, type: "png" }).then((screenshot) => {
+ return createFile({
+ data: screenshot,
+ filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
+ });
+ })
+ );
+ }
+
+ const margins = {
+ top: process.env.PDF_MARGIN_TOP || "15px",
+ bottom: process.env.PDF_MARGIN_BOTTOM || "15px",
+ };
+
+ if (user.archiveAsPDF && !link.pdf?.startsWith("archive")) {
+ processingPromises.push(
+ page
+ .pdf({
+ width: "1366px",
+ height: "1931px",
+ printBackground: true,
+ margin: margins,
+ })
+ .then((pdf) => {
+ return createFile({
+ data: pdf,
+ filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
+ });
+ })
+ );
+ }
+ await Promise.allSettled(processingPromises);
+ await prisma.link.update({
+ where: { id: link.id },
+ data: {
+ image: user.archiveAsScreenshot
+ ? `archives/${linkExists.collectionId}/${link.id}.png`
+ : undefined,
+ pdf: user.archiveAsPDF
+ ? `archives/${linkExists.collectionId}/${link.id}.pdf`
+ : undefined,
+ },
+ });
+ }
+};
+
+const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
+ const timeoutPromise = new Promise((resolve) => {
+ setTimeout(() => {
+ resolve();
+ }, AUTOSCROLL_TIMEOUT * 1000);
+ });
+
+ const scrollingPromise = new Promise((resolve) => {
+ let totalHeight = 0;
+ let distance = 100;
+ let scrollDown = setInterval(() => {
+ let scrollHeight = document.body.scrollHeight;
+ window.scrollBy(0, distance);
+ totalHeight += distance;
+ if (totalHeight >= scrollHeight) {
+ clearInterval(scrollDown);
+ window.scroll(0, 0);
+ resolve();
+ }
+ }, 100);
+ });
+
+ await Promise.race([scrollingPromise, timeoutPromise]);
+};
diff --git a/lib/api/generatePreview.ts b/lib/api/generatePreview.ts
index 6e816303..3c2da0f2 100644
--- a/lib/api/generatePreview.ts
+++ b/lib/api/generatePreview.ts
@@ -1,7 +1,6 @@
import Jimp from "jimp";
import { prisma } from "./db";
import createFile from "./storage/createFile";
-import createFolder from "./storage/createFolder";
const generatePreview = async (
buffer: Buffer,
diff --git a/lib/api/preservationScheme/archiveAsReadablility.ts b/lib/api/preservationScheme/archiveAsReadablility.ts
index dec27dac..0f9a5c7a 100644
--- a/lib/api/preservationScheme/archiveAsReadablility.ts
+++ b/lib/api/preservationScheme/archiveAsReadablility.ts
@@ -14,11 +14,9 @@ const archiveAsReadablility = async (content: string, link: Link) => {
const articleText = article?.textContent
.replace(/ +(?= )/g, "") // strip out multiple spaces
.replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
- if (
- articleText &&
- articleText !== "" &&
- !link.readable?.startsWith("archive")
- ) {
+
+ console.log(articleText);
+ if (articleText && articleText !== "") {
const collectionId = (
await prisma.link.findUnique({
where: { id: link.id },
diff --git a/package.json b/package.json
index 0045802c..70271221 100644
--- a/package.json
+++ b/package.json
@@ -70,6 +70,7 @@
"react-masonry-css": "^1.0.16",
"react-select": "^5.7.4",
"react-spinners": "^0.13.8",
+ "shelljs": "^0.8.5",
"socks-proxy-agent": "^8.0.2",
"stripe": "^12.13.0",
"tailwind-merge": "^2.3.0",
diff --git a/public/locales/en/common.json b/public/locales/en/common.json
index 7b6dd576..88f5c331 100644
--- a/public/locales/en/common.json
+++ b/public/locales/en/common.json
@@ -220,8 +220,9 @@
"github": "GitHub",
"twitter": "Twitter",
"mastodon": "Mastodon",
- "link_preservation_in_queue": "LThe Link preservation is currently in the queue",
+ "link_preservation_in_queue": "The Link preservation is currently in the queue",
"check_back_later": "Please check back later to see the result",
+ "there_are_more_formats": "There are more preserved formats in the queue",
"settings": "Settings",
"switch_to": "Switch to {{theme}}",
"logout": "Logout",
diff --git a/yarn.lock b/yarn.lock
index 55074b61..05b76f35 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3514,6 +3514,11 @@ function-bind@^1.1.1:
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
+function-bind@^1.1.2:
+ version "1.1.2"
+ resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c"
+ integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==
+
function.prototype.name@^1.1.5:
version "1.1.5"
resolved "https://registry.yarnpkg.com/function.prototype.name/-/function.prototype.name-1.1.5.tgz#cce0505fe1ffb80503e6f9e46cc64e46a12a9621"
@@ -3651,7 +3656,7 @@ glob@7.1.7:
once "^1.3.0"
path-is-absolute "^1.0.0"
-glob@^7.1.3:
+glob@^7.0.0, glob@^7.1.3:
version "7.2.3"
resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b"
integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==
@@ -3821,6 +3826,13 @@ has@^1.0.3:
dependencies:
function-bind "^1.1.1"
+hasown@^2.0.0:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003"
+ integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==
+ dependencies:
+ function-bind "^1.1.2"
+
hexoid@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/hexoid/-/hexoid-1.0.0.tgz#ad10c6573fb907de23d9ec63a711267d9dc9bc18"
@@ -3972,6 +3984,11 @@ internal-slot@^1.0.3, internal-slot@^1.0.4:
has "^1.0.3"
side-channel "^1.0.4"
+interpret@^1.0.0:
+ version "1.4.0"
+ resolved "https://registry.yarnpkg.com/interpret/-/interpret-1.4.0.tgz#665ab8bc4da27a774a40584e812e3e0fa45b1a1e"
+ integrity sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==
+
invariant@^2.2.4:
version "2.2.4"
resolved "https://registry.yarnpkg.com/invariant/-/invariant-2.2.4.tgz#610f3c92c9359ce1db616e538008d23ff35158e6"
@@ -4053,6 +4070,13 @@ is-core-module@^2.10.0, is-core-module@^2.11.0, is-core-module@^2.9.0:
dependencies:
has "^1.0.3"
+is-core-module@^2.13.0:
+ version "2.13.1"
+ resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.13.1.tgz#ad0d7532c6fea9da1ebdc82742d74525c6273384"
+ integrity sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==
+ dependencies:
+ hasown "^2.0.0"
+
is-date-object@^1.0.1, is-date-object@^1.0.5:
version "1.0.5"
resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.5.tgz#0841d5536e724c25597bf6ea62e1bd38298df31f"
@@ -5372,6 +5396,13 @@ readdirp@~3.6.0:
dependencies:
picomatch "^2.2.1"
+rechoir@^0.6.2:
+ version "0.6.2"
+ resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"
+ integrity sha512-HFM8rkZ+i3zrV+4LQjwQ0W+ez98pApMGM3HUrN04j3CqzPOzl9nmP15Y8YXNm8QHGv/eacOVEjqhmWpkRV0NAw==
+ dependencies:
+ resolve "^1.1.6"
+
redux@^4.0.0, redux@^4.0.1:
version "4.2.1"
resolved "https://registry.yarnpkg.com/redux/-/redux-4.2.1.tgz#c08f4306826c49b5e9dc901dee0452ea8fce6197"
@@ -5439,6 +5470,15 @@ resolve-from@^4.0.0:
resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==
+resolve@^1.1.6:
+ version "1.22.8"
+ resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.8.tgz#b6c87a9f2aa06dfab52e3d70ac8cde321fa5a48d"
+ integrity sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==
+ dependencies:
+ is-core-module "^2.13.0"
+ path-parse "^1.0.7"
+ supports-preserve-symlinks-flag "^1.0.0"
+
resolve@^1.1.7, resolve@^1.19.0, resolve@^1.22.1, resolve@^1.22.2:
version "1.22.2"
resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.2.tgz#0ed0943d4e301867955766c9f3e1ae6d01c6845f"
@@ -5565,6 +5605,15 @@ shell-quote@^1.8.1:
resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.8.1.tgz#6dbf4db75515ad5bac63b4f1894c3a154c766680"
integrity sha512-6j1W9l1iAs/4xYBI1SYOVZyFcCis9b4KCLQ8fgAGG07QvzaRLVVRQvAy85yNmmZSjYjg4MWh4gNvlPujU/5LpA==
+shelljs@^0.8.5:
+ version "0.8.5"
+ resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.5.tgz#de055408d8361bed66c669d2f000538ced8ee20c"
+ integrity sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==
+ dependencies:
+ glob "^7.0.0"
+ interpret "^1.0.0"
+ rechoir "^0.6.2"
+
side-channel@^1.0.4:
version "1.0.4"
resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf"