Merge pull request #525 from rutkai/archive-singlefile

Add Single file archive method.
This commit is contained in:
Daniel
2024-03-24 08:11:20 +03:30
committed by GitHub
33 changed files with 228 additions and 31 deletions
+70 -13
View File
@@ -9,6 +9,9 @@ import { Collection, Link, User } from "@prisma/client";
import validateUrlSize from "./validateUrlSize";
import removeFile from "./storage/removeFile";
import Jimp from "jimp";
import { execSync } from "child_process";
import axios from "axios";
import { Agent } from "http";
import createFolder from "./storage/createFolder";
type LinksAndCollectionAndOwner = Link & {
@@ -93,6 +96,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
readable: !link.readable?.startsWith("archive")
? "pending"
: undefined,
singlefile: !link.singlefile?.startsWith("archive")
? "pending"
: undefined,
preview: !link.readable?.startsWith("archive")
? "pending"
: undefined,
@@ -113,19 +119,63 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
const content = await page.content();
// TODO single file
// const session = await page.context().newCDPSession(page);
// const doc = await session.send("Page.captureSnapshot", {
// format: "mhtml",
// });
// const saveDocLocally = (doc: any) => {
// console.log(doc);
// return createFile({
// data: doc,
// filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
// });
// };
// saveDocLocally(doc.data);
// Singlefile
if (
user.archiveAsSinglefile &&
!link.singlefile?.startsWith("archive")
) {
let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
if (command) {
if (command.includes("{{URL}}")) {
try {
let html = execSync(command.replace("{{URL}}", link.url), {
timeout: 60000,
maxBuffer: 1024 * 1024 * 100,
});
await createFile({
data: html,
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
});
} catch (err) {
console.error(
"Error running SINGLEFILE_ARCHIVE_COMMAND:",
err
);
}
} else {
console.error(
"Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}"
);
}
} else if (httpApi) {
try {
let html = await axios.post(
httpApi,
{ url: link.url },
{
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
httpAgent: new Agent({ keepAlive: false }),
}
);
await createFile({
data: html.data,
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
});
} catch (err) {
console.error(
"Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:",
err
);
}
} else {
console.error(
"No SINGLEFILE_ARCHIVE_COMMAND or SINGLEFILE_ARCHIVE_HTTP_API defined."
);
}
}
// Readability
const window = new JSDOM("").window;
@@ -284,6 +334,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: user.archiveAsScreenshot
? `archives/${linkExists.collectionId}/${link.id}.png`
: undefined,
singlefile: user.archiveAsSinglefile
? `archives/${linkExists.collectionId}/${link.id}.html`
: undefined,
pdf: user.archiveAsPDF
? `archives/${linkExists.collectionId}/${link.id}.pdf`
: undefined,
@@ -314,6 +367,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: !finalLink.image?.startsWith("archives")
? "unavailable"
: undefined,
singlefile: !finalLink.singlefile?.startsWith("archives")
? "unavailable"
: undefined,
pdf: !finalLink.pdf?.startsWith("archives")
? "unavailable"
: undefined,
@@ -324,6 +380,7 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
});
else {
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.png` });
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.html` });
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.pdf` });
removeFile({
filePath: `archives/${link.collectionId}/${link.id}_readability.json`,
@@ -52,6 +52,9 @@ export default async function deleteLinksById(
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
});
}
return { response: deletedLinks, status: 200 };
@@ -30,6 +30,9 @@ export default async function deleteLink(userId: number, linkId: number) {
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
});
return { response: deleteLink, status: 200 };
}
@@ -160,6 +160,11 @@ export default async function updateLinkById(
`archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
`archives/${data.collection.id}/${linkId}_readability.json`
);
await moveFile(
`archives/${collectionIsAccessible?.id}/${linkId}.html`,
`archives/${data.collection.id}/${linkId}.html`
);
}
return { response: updatedLink, status: 200 };
@@ -75,6 +75,7 @@ export default async function getPublicUser(
username: lessSensitiveInfo.username,
image: lessSensitiveInfo.image,
archiveAsScreenshot: lessSensitiveInfo.archiveAsScreenshot,
archiveAsSinglefile: lessSensitiveInfo.archiveAsSinglefile,
archiveAsPDF: lessSensitiveInfo.archiveAsPDF,
};
@@ -187,6 +187,7 @@ export default async function updateUserById(
(value, index, self) => self.indexOf(value) === index
),
archiveAsScreenshot: data.archiveAsScreenshot,
archiveAsSinglefile: data.archiveAsSinglefile,
archiveAsPDF: data.archiveAsPDF,
archiveAsWaybackMachine: data.archiveAsWaybackMachine,
linksRouteTo: data.linksRouteTo,
+5
View File
@@ -10,6 +10,7 @@ import util from "util";
type ReturnContentTypes =
| "text/plain"
| "text/html"
| "image/jpeg"
| "image/png"
| "application/pdf"
@@ -61,6 +62,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
} else if (filePath.endsWith(".html")) {
contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";
@@ -88,6 +91,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
} else if (filePath.endsWith(".html")) {
contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";