code improvements
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
import { Readability } from "@mozilla/readability";
|
||||
import { JSDOM } from "jsdom";
|
||||
import DOMPurify from "dompurify";
|
||||
import { prisma } from "../db";
|
||||
import createFile from "../storage/createFile";
|
||||
import { Link } from "@prisma/client";
|
||||
|
||||
const archiveAsReadablility = async (content: string, link: Link) => {
|
||||
const window = new JSDOM("").window;
|
||||
const purify = DOMPurify(window);
|
||||
const cleanedUpContent = purify.sanitize(content);
|
||||
const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
|
||||
const article = new Readability(dom.window.document).parse();
|
||||
const articleText = article?.textContent
|
||||
.replace(/ +(?= )/g, "") // strip out multiple spaces
|
||||
.replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
|
||||
if (
|
||||
articleText &&
|
||||
articleText !== "" &&
|
||||
!link.readable?.startsWith("archive")
|
||||
) {
|
||||
const collectionId = (
|
||||
await prisma.link.findUnique({
|
||||
where: { id: link.id },
|
||||
select: { collectionId: true },
|
||||
})
|
||||
)?.collectionId;
|
||||
|
||||
await createFile({
|
||||
data: JSON.stringify(article),
|
||||
filePath: `archives/${collectionId}/${link.id}_readability.json`,
|
||||
});
|
||||
|
||||
await prisma.link.update({
|
||||
where: { id: link.id },
|
||||
data: {
|
||||
readable: `archives/${collectionId}/${link.id}_readability.json`,
|
||||
textContent: articleText,
|
||||
},
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
export default archiveAsReadablility;
|
||||
@@ -0,0 +1,111 @@
|
||||
import { execSync } from "child_process";
|
||||
import createFile from "../storage/createFile";
|
||||
import axios from "axios";
|
||||
import { Agent } from "http";
|
||||
import { prisma } from "../db";
|
||||
import { Link } from "@prisma/client";
|
||||
|
||||
const archiveAsSinglefile = async (link: Link) => {
|
||||
if (!link.url) return;
|
||||
|
||||
let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
|
||||
let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
|
||||
if (command) {
|
||||
if (command.includes("{{URL}}")) {
|
||||
try {
|
||||
let html = execSync(command.replace("{{URL}}", link.url), {
|
||||
timeout: 120000,
|
||||
maxBuffer: 1024 * 1024 * 30,
|
||||
});
|
||||
|
||||
if (!html.length) {
|
||||
console.error(
|
||||
"Error running SINGLEFILE_ARCHIVE_COMMAND: Empty buffer"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const collectionId = (
|
||||
await prisma.link.findUnique({
|
||||
where: { id: link.id },
|
||||
select: { collectionId: true },
|
||||
})
|
||||
)?.collectionId;
|
||||
|
||||
if (!collectionId) {
|
||||
console.error(
|
||||
"Error running SINGLEFILE_ARCHIVE_COMMAND: Collection ID not found"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
await createFile({
|
||||
data: html,
|
||||
filePath: `archives/${collectionId}/${link.id}.html`,
|
||||
}).then(async () => {
|
||||
await prisma.link.update({
|
||||
where: { id: link.id },
|
||||
data: {
|
||||
singlefile: `archives/${collectionId}/${link.id}.html`,
|
||||
},
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Error running SINGLEFILE_ARCHIVE_COMMAND:", err);
|
||||
}
|
||||
} else {
|
||||
console.error("Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}");
|
||||
}
|
||||
} else if (httpApi) {
|
||||
try {
|
||||
let html = await axios.post(
|
||||
httpApi,
|
||||
{ url: link.url },
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
httpAgent: new Agent({ keepAlive: false }),
|
||||
}
|
||||
);
|
||||
|
||||
if (!html.data.length) {
|
||||
console.error("Error running SINGLEFILE_ARCHIVE_COMMAND: Empty buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
const collectionId = (
|
||||
await prisma.link.findUnique({
|
||||
where: { id: link.id },
|
||||
select: { collectionId: true },
|
||||
})
|
||||
)?.collectionId;
|
||||
|
||||
if (!collectionId) {
|
||||
console.error(
|
||||
"Error running SINGLEFILE_ARCHIVE_COMMAND: Collection ID not found"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
await createFile({
|
||||
data: html.data,
|
||||
filePath: `archives/${collectionId}/${link.id}.html`,
|
||||
}).then(async () => {
|
||||
await prisma.link.update({
|
||||
where: { id: link.id },
|
||||
data: {
|
||||
singlefile: `archives/${collectionId}/${link.id}.html`,
|
||||
},
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(
|
||||
"Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
export default archiveAsSinglefile;
|
||||
@@ -0,0 +1,23 @@
|
||||
import axios from "axios";
|
||||
|
||||
export default async function sendToWayback(url: string) {
|
||||
const headers = {
|
||||
Accept: "text/html,application/xhtml+xml,application/xml",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
Dnt: "1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
|
||||
};
|
||||
|
||||
await axios
|
||||
.get(`https://web.archive.org/save/${url}`, {
|
||||
headers: headers,
|
||||
})
|
||||
.then((response) => {
|
||||
console.log(response.data);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(error);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user