code improvements

This commit is contained in:
daniel31x13
2024-03-26 01:38:08 -04:00
parent 797ddc4b73
commit 2b8f7d4be2
5 changed files with 401 additions and 280 deletions
@@ -0,0 +1,44 @@
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import DOMPurify from "dompurify";
import { prisma } from "../db";
import createFile from "../storage/createFile";
import { Link } from "@prisma/client";
const archiveAsReadablility = async (content: string, link: Link) => {
const window = new JSDOM("").window;
const purify = DOMPurify(window);
const cleanedUpContent = purify.sanitize(content);
const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
const article = new Readability(dom.window.document).parse();
const articleText = article?.textContent
.replace(/ +(?= )/g, "") // strip out multiple spaces
.replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
if (
articleText &&
articleText !== "" &&
!link.readable?.startsWith("archive")
) {
const collectionId = (
await prisma.link.findUnique({
where: { id: link.id },
select: { collectionId: true },
})
)?.collectionId;
await createFile({
data: JSON.stringify(article),
filePath: `archives/${collectionId}/${link.id}_readability.json`,
});
await prisma.link.update({
where: { id: link.id },
data: {
readable: `archives/${collectionId}/${link.id}_readability.json`,
textContent: articleText,
},
});
}
};
export default archiveAsReadablility;
@@ -0,0 +1,111 @@
import { execSync } from "child_process";
import createFile from "../storage/createFile";
import axios from "axios";
import { Agent } from "http";
import { prisma } from "../db";
import { Link } from "@prisma/client";
const archiveAsSinglefile = async (link: Link) => {
if (!link.url) return;
let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
if (command) {
if (command.includes("{{URL}}")) {
try {
let html = execSync(command.replace("{{URL}}", link.url), {
timeout: 120000,
maxBuffer: 1024 * 1024 * 30,
});
if (!html.length) {
console.error(
"Error running SINGLEFILE_ARCHIVE_COMMAND: Empty buffer"
);
return;
}
const collectionId = (
await prisma.link.findUnique({
where: { id: link.id },
select: { collectionId: true },
})
)?.collectionId;
if (!collectionId) {
console.error(
"Error running SINGLEFILE_ARCHIVE_COMMAND: Collection ID not found"
);
return;
}
await createFile({
data: html,
filePath: `archives/${collectionId}/${link.id}.html`,
}).then(async () => {
await prisma.link.update({
where: { id: link.id },
data: {
singlefile: `archives/${collectionId}/${link.id}.html`,
},
});
});
} catch (err) {
console.error("Error running SINGLEFILE_ARCHIVE_COMMAND:", err);
}
} else {
console.error("Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}");
}
} else if (httpApi) {
try {
let html = await axios.post(
httpApi,
{ url: link.url },
{
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
httpAgent: new Agent({ keepAlive: false }),
}
);
if (!html.data.length) {
console.error("Error running SINGLEFILE_ARCHIVE_COMMAND: Empty buffer");
return;
}
const collectionId = (
await prisma.link.findUnique({
where: { id: link.id },
select: { collectionId: true },
})
)?.collectionId;
if (!collectionId) {
console.error(
"Error running SINGLEFILE_ARCHIVE_COMMAND: Collection ID not found"
);
return;
}
await createFile({
data: html.data,
filePath: `archives/${collectionId}/${link.id}.html`,
}).then(async () => {
await prisma.link.update({
where: { id: link.id },
data: {
singlefile: `archives/${collectionId}/${link.id}.html`,
},
});
});
} catch (err) {
console.error(
"Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:",
err
);
}
}
};
export default archiveAsSinglefile;
@@ -0,0 +1,23 @@
import axios from "axios";
export default async function sendToWayback(url: string) {
const headers = {
Accept: "text/html,application/xhtml+xml,application/xml",
"Accept-Encoding": "gzip, deflate",
Dnt: "1",
"Upgrade-Insecure-Requests": "1",
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
};
await axios
.get(`https://web.archive.org/save/${url}`, {
headers: headers,
})
.then((response) => {
console.log(response.data);
})
.catch((error) => {
console.error(error);
});
}