code refactoring

This commit is contained in:
daniel31x13
2024-06-26 13:54:03 -04:00
parent c68f9d68ad
commit 3618ba907d
7 changed files with 229 additions and 160 deletions
+171 -147
View File
@@ -1,4 +1,4 @@
import { LaunchOptions, chromium, devices } from "playwright";
import { LaunchOptions, Page, chromium, devices } from "playwright";
import { prisma } from "./db";
import createFile from "./storage/createFile";
import sendToWayback from "./preservationScheme/sendToWayback";
@@ -9,6 +9,7 @@ import generatePreview from "./generatePreview";
import { removeFiles } from "./manageLinkFiles";
import archiveAsSinglefile from "./preservationScheme/archiveAsSinglefile";
import archiveAsReadability from "./preservationScheme/archiveAsReadablility";
import shell from "shelljs";
type LinksAndCollectionAndOwner = Link & {
collection: Collection & {
@@ -50,6 +51,26 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
const page = await context.newPage();
// await page.goto("https://github.com", {
// waitUntil: "domcontentloaded",
// });
// console.log("Opening page:", link.url);
// await page.evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30);
// const dom = await page.content();
// console.log("The content", dom);
// shell
// .echo(dom)
// .exec(
// "monolith - -I -b https://marketplace.visualstudio.com/items?itemName=42Crunch.vscode-openapi -j -F -o monolith.html"
// );
// console.log("Monolith created!");
createFolder({
filePath: `archives/preview/${link.collectionId}`,
});
@@ -111,13 +132,13 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
});
// SingleFile
if (
!link.singlefile?.startsWith("archive") &&
!link.singlefile?.startsWith("unavailable") &&
user.archiveAsSinglefile &&
link.url
)
await archiveAsSinglefile(link);
// if (
// !link.singlefile?.startsWith("archive") &&
// !link.singlefile?.startsWith("unavailable") &&
// user.archiveAsSinglefile &&
// link.url
// )
// await archiveAsSinglefile(link);
// send to archive.org
if (user.archiveAsWaybackMachine && link.url) sendToWayback(link.url);
@@ -131,13 +152,6 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
} else if (link.url) {
// archive url
const context = await browser.newContext({
...devices["Desktop Chrome"],
ignoreHTTPSErrors: process.env.IGNORE_HTTPS_ERRORS === "true",
});
const page = await context.newPage();
await page.goto(link.url, { waitUntil: "domcontentloaded" });
const content = await page.content();
@@ -150,115 +164,20 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
await archiveAsReadability(content, link);
// Preview
if (
!link.preview?.startsWith("archives") &&
!link.preview?.startsWith("unavailable")
)
await getArchivePreview(link, page);
const ogImageUrl = await page.evaluate(() => {
const metaTag = document.querySelector('meta[property="og:image"]');
return metaTag ? (metaTag as any).content : null;
});
if (ogImageUrl) {
console.log("Found og:image URL:", ogImageUrl);
// Download the image
const imageResponse = await page.goto(ogImageUrl);
// Check if imageResponse is not null
if (imageResponse && !link.preview?.startsWith("archive")) {
const buffer = await imageResponse.body();
await generatePreview(buffer, link.collectionId, link.id);
}
await page.goBack();
} else if (!link.preview?.startsWith("archive")) {
console.log("No og:image found");
await page
.screenshot({ type: "jpeg", quality: 20 })
.then((screenshot) => {
return createFile({
data: screenshot,
filePath: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
});
})
.then(() => {
return prisma.link.update({
where: { id: link.id },
data: {
preview: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
},
});
});
}
}
if (
(!link.image?.startsWith("archives") &&
!link.image?.startsWith("unavailable")) ||
(!link.pdf?.startsWith("archives") &&
!link.pdf?.startsWith("unavailable"))
) {
// Screenshot/PDF
await page.evaluate(
autoScroll,
Number(process.env.AUTOSCROLL_TIMEOUT) || 30
);
// Check if the user hasn't deleted the link by the time we're done scrolling
const linkExists = await prisma.link.findUnique({
where: { id: link.id },
});
if (linkExists) {
const processingPromises = [];
if (
user.archiveAsScreenshot &&
!link.image?.startsWith("archive")
) {
processingPromises.push(
page.screenshot({ fullPage: true }).then((screenshot) => {
return createFile({
data: screenshot,
filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
});
})
);
}
// apply administrator's defined pdf margins or default to 15px
const margins = {
top: process.env.PDF_MARGIN_TOP || "15px",
bottom: process.env.PDF_MARGIN_BOTTOM || "15px",
};
if (user.archiveAsPDF && !link.pdf?.startsWith("archive")) {
processingPromises.push(
page
.pdf({
width: "1366px",
height: "1931px",
printBackground: true,
margin: margins,
})
.then((pdf) => {
return createFile({
data: pdf,
filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
});
})
);
}
await Promise.allSettled(processingPromises);
await prisma.link.update({
where: { id: link.id },
data: {
image: user.archiveAsScreenshot
? `archives/${linkExists.collectionId}/${link.id}.png`
: undefined,
pdf: user.archiveAsPDF
? `archives/${linkExists.collectionId}/${link.id}.pdf`
: undefined,
},
});
}
if (
(!link.image?.startsWith("archives") &&
!link.image?.startsWith("unavailable")) ||
(!link.pdf?.startsWith("archives") &&
!link.pdf?.startsWith("unavailable"))
)
await captureScreenshotAndPdf(link, page, user);
}
})(),
timeoutPromise,
@@ -302,31 +221,6 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
}
}
const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
const timeoutPromise = new Promise<void>((_, reject) => {
setTimeout(() => {
reject(new Error(`Webpage was too long to be archived.`));
}, AUTOSCROLL_TIMEOUT * 1000);
});
const scrollingPromise = new Promise<void>((resolve) => {
let totalHeight = 0;
let distance = 100;
let scrollDown = setInterval(() => {
let scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(scrollDown);
window.scroll(0, 0);
resolve();
}
}, 100);
});
await Promise.race([scrollingPromise, timeoutPromise]);
};
const imageHandler = async ({ url, id }: Link, extension: string) => {
const image = await fetch(url as string).then((res) => res.blob());
@@ -374,3 +268,133 @@ const pdfHandler = async ({ url, id }: Link) => {
});
}
};
const getArchivePreview = async (
link: LinksAndCollectionAndOwner,
page: Page
) => {
const ogImageUrl = await page.evaluate(() => {
const metaTag = document.querySelector('meta[property="og:image"]');
return metaTag ? (metaTag as any).content : null;
});
if (ogImageUrl) {
console.log("Found og:image URL:", ogImageUrl);
// Download the image
const imageResponse = await page.goto(ogImageUrl);
// Check if imageResponse is not null
if (imageResponse && !link.preview?.startsWith("archive")) {
const buffer = await imageResponse.body();
generatePreview(buffer, link.collectionId, link.id);
}
await page.goBack();
} else if (!link.preview?.startsWith("archive")) {
console.log("No og:image found");
await page
.screenshot({ type: "jpeg", quality: 20 })
.then((screenshot) => {
return createFile({
data: screenshot,
filePath: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
});
})
.then(() => {
return prisma.link.update({
where: { id: link.id },
data: {
preview: `archives/preview/${link.collectionId}/${link.id}.jpeg`,
},
});
});
}
};
const captureScreenshotAndPdf = async (
link: LinksAndCollectionAndOwner,
page: Page,
user: User
) => {
await page.evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30);
// Check if the user hasn't deleted the link by the time we're done scrolling
const linkExists = await prisma.link.findUnique({
where: { id: link.id },
});
if (linkExists) {
const processingPromises = [];
if (user.archiveAsScreenshot && !link.image?.startsWith("archive")) {
processingPromises.push(
page.screenshot({ fullPage: true, type: "png" }).then((screenshot) => {
return createFile({
data: screenshot,
filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
});
})
);
}
const margins = {
top: process.env.PDF_MARGIN_TOP || "15px",
bottom: process.env.PDF_MARGIN_BOTTOM || "15px",
};
if (user.archiveAsPDF && !link.pdf?.startsWith("archive")) {
processingPromises.push(
page
.pdf({
width: "1366px",
height: "1931px",
printBackground: true,
margin: margins,
})
.then((pdf) => {
return createFile({
data: pdf,
filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
});
})
);
}
await Promise.allSettled(processingPromises);
await prisma.link.update({
where: { id: link.id },
data: {
image: user.archiveAsScreenshot
? `archives/${linkExists.collectionId}/${link.id}.png`
: undefined,
pdf: user.archiveAsPDF
? `archives/${linkExists.collectionId}/${link.id}.pdf`
: undefined,
},
});
}
};
const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
const timeoutPromise = new Promise<void>((resolve) => {
setTimeout(() => {
resolve();
}, AUTOSCROLL_TIMEOUT * 1000);
});
const scrollingPromise = new Promise<void>((resolve) => {
let totalHeight = 0;
let distance = 100;
let scrollDown = setInterval(() => {
let scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(scrollDown);
window.scroll(0, 0);
resolve();
}
}, 100);
});
await Promise.race([scrollingPromise, timeoutPromise]);
};