Merge pull request #525 from rutkai/archive-singlefile
Add Single file archive method.
This commit is contained in:
+70
-13
@@ -9,6 +9,9 @@ import { Collection, Link, User } from "@prisma/client";
|
||||
import validateUrlSize from "./validateUrlSize";
|
||||
import removeFile from "./storage/removeFile";
|
||||
import Jimp from "jimp";
|
||||
import { execSync } from "child_process";
|
||||
import axios from "axios";
|
||||
import { Agent } from "http";
|
||||
import createFolder from "./storage/createFolder";
|
||||
|
||||
type LinksAndCollectionAndOwner = Link & {
|
||||
@@ -93,6 +96,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
|
||||
readable: !link.readable?.startsWith("archive")
|
||||
? "pending"
|
||||
: undefined,
|
||||
singlefile: !link.singlefile?.startsWith("archive")
|
||||
? "pending"
|
||||
: undefined,
|
||||
preview: !link.readable?.startsWith("archive")
|
||||
? "pending"
|
||||
: undefined,
|
||||
@@ -113,19 +119,63 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
|
||||
|
||||
const content = await page.content();
|
||||
|
||||
// TODO single file
|
||||
// const session = await page.context().newCDPSession(page);
|
||||
// const doc = await session.send("Page.captureSnapshot", {
|
||||
// format: "mhtml",
|
||||
// });
|
||||
// const saveDocLocally = (doc: any) => {
|
||||
// console.log(doc);
|
||||
// return createFile({
|
||||
// data: doc,
|
||||
// filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
|
||||
// });
|
||||
// };
|
||||
// saveDocLocally(doc.data);
|
||||
// Singlefile
|
||||
if (
|
||||
user.archiveAsSinglefile &&
|
||||
!link.singlefile?.startsWith("archive")
|
||||
) {
|
||||
let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
|
||||
let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
|
||||
if (command) {
|
||||
if (command.includes("{{URL}}")) {
|
||||
try {
|
||||
let html = execSync(command.replace("{{URL}}", link.url), {
|
||||
timeout: 60000,
|
||||
maxBuffer: 1024 * 1024 * 100,
|
||||
});
|
||||
await createFile({
|
||||
data: html,
|
||||
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"Error running SINGLEFILE_ARCHIVE_COMMAND:",
|
||||
err
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.error(
|
||||
"Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}"
|
||||
);
|
||||
}
|
||||
} else if (httpApi) {
|
||||
try {
|
||||
let html = await axios.post(
|
||||
httpApi,
|
||||
{ url: link.url },
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
httpAgent: new Agent({ keepAlive: false }),
|
||||
}
|
||||
);
|
||||
await createFile({
|
||||
data: html.data,
|
||||
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:",
|
||||
err
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.error(
|
||||
"No SINGLEFILE_ARCHIVE_COMMAND or SINGLEFILE_ARCHIVE_HTTP_API defined."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Readability
|
||||
const window = new JSDOM("").window;
|
||||
@@ -284,6 +334,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
|
||||
image: user.archiveAsScreenshot
|
||||
? `archives/${linkExists.collectionId}/${link.id}.png`
|
||||
: undefined,
|
||||
singlefile: user.archiveAsSinglefile
|
||||
? `archives/${linkExists.collectionId}/${link.id}.html`
|
||||
: undefined,
|
||||
pdf: user.archiveAsPDF
|
||||
? `archives/${linkExists.collectionId}/${link.id}.pdf`
|
||||
: undefined,
|
||||
@@ -314,6 +367,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
|
||||
image: !finalLink.image?.startsWith("archives")
|
||||
? "unavailable"
|
||||
: undefined,
|
||||
singlefile: !finalLink.singlefile?.startsWith("archives")
|
||||
? "unavailable"
|
||||
: undefined,
|
||||
pdf: !finalLink.pdf?.startsWith("archives")
|
||||
? "unavailable"
|
||||
: undefined,
|
||||
@@ -324,6 +380,7 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
|
||||
});
|
||||
else {
|
||||
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.png` });
|
||||
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.html` });
|
||||
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.pdf` });
|
||||
removeFile({
|
||||
filePath: `archives/${link.collectionId}/${link.id}_readability.json`,
|
||||
|
||||
@@ -52,6 +52,9 @@ export default async function deleteLinksById(
|
||||
removeFile({
|
||||
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
|
||||
});
|
||||
removeFile({
|
||||
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
|
||||
});
|
||||
}
|
||||
|
||||
return { response: deletedLinks, status: 200 };
|
||||
|
||||
@@ -30,6 +30,9 @@ export default async function deleteLink(userId: number, linkId: number) {
|
||||
removeFile({
|
||||
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
|
||||
});
|
||||
removeFile({
|
||||
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
|
||||
});
|
||||
|
||||
return { response: deleteLink, status: 200 };
|
||||
}
|
||||
|
||||
@@ -160,6 +160,11 @@ export default async function updateLinkById(
|
||||
`archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
|
||||
`archives/${data.collection.id}/${linkId}_readability.json`
|
||||
);
|
||||
|
||||
await moveFile(
|
||||
`archives/${collectionIsAccessible?.id}/${linkId}.html`,
|
||||
`archives/${data.collection.id}/${linkId}.html`
|
||||
);
|
||||
}
|
||||
|
||||
return { response: updatedLink, status: 200 };
|
||||
|
||||
@@ -75,6 +75,7 @@ export default async function getPublicUser(
|
||||
username: lessSensitiveInfo.username,
|
||||
image: lessSensitiveInfo.image,
|
||||
archiveAsScreenshot: lessSensitiveInfo.archiveAsScreenshot,
|
||||
archiveAsSinglefile: lessSensitiveInfo.archiveAsSinglefile,
|
||||
archiveAsPDF: lessSensitiveInfo.archiveAsPDF,
|
||||
};
|
||||
|
||||
|
||||
@@ -187,6 +187,7 @@ export default async function updateUserById(
|
||||
(value, index, self) => self.indexOf(value) === index
|
||||
),
|
||||
archiveAsScreenshot: data.archiveAsScreenshot,
|
||||
archiveAsSinglefile: data.archiveAsSinglefile,
|
||||
archiveAsPDF: data.archiveAsPDF,
|
||||
archiveAsWaybackMachine: data.archiveAsWaybackMachine,
|
||||
linksRouteTo: data.linksRouteTo,
|
||||
|
||||
@@ -10,6 +10,7 @@ import util from "util";
|
||||
|
||||
type ReturnContentTypes =
|
||||
| "text/plain"
|
||||
| "text/html"
|
||||
| "image/jpeg"
|
||||
| "image/png"
|
||||
| "application/pdf"
|
||||
@@ -61,6 +62,8 @@ export default async function readFile(filePath: string) {
|
||||
contentType = "image/png";
|
||||
} else if (filePath.endsWith("_readability.json")) {
|
||||
contentType = "application/json";
|
||||
} else if (filePath.endsWith(".html")) {
|
||||
contentType = "text/html";
|
||||
} else {
|
||||
// if (filePath.endsWith(".jpg"))
|
||||
contentType = "image/jpeg";
|
||||
@@ -88,6 +91,8 @@ export default async function readFile(filePath: string) {
|
||||
contentType = "image/png";
|
||||
} else if (filePath.endsWith("_readability.json")) {
|
||||
contentType = "application/json";
|
||||
} else if (filePath.endsWith(".html")) {
|
||||
contentType = "text/html";
|
||||
} else {
|
||||
// if (filePath.endsWith(".jpg"))
|
||||
contentType = "image/jpeg";
|
||||
|
||||
Reference in New Issue
Block a user