jobs. folder-job, file-change-trigger and some cleanup

This commit is contained in:
Felix Kaspar 2024-05-19 22:01:26 +02:00
parent fae524f8da
commit ecb12e66b6
11 changed files with 264 additions and 154 deletions

2
.gitignore vendored
View File

@ -6,3 +6,5 @@ android/
ios/
releases/
.vscode/
.env
/server-node/jobs

18
package-lock.json generated
View File

@ -4473,6 +4473,17 @@
"url": "https://bevry.me/fund"
}
},
"node_modules/dotenv": {
"version": "16.4.5",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dynamic-dedupe": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/dynamic-dedupe/-/dynamic-dedupe-0.3.0.tgz",
@ -8770,6 +8781,11 @@
"node": ">=0.6"
}
},
"node_modules/toml": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/toml/-/toml-3.0.0.tgz",
"integrity": "sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w=="
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
@ -10075,6 +10091,7 @@
"@types/multer": "^1.4.10",
"@wasmer/wasmfs": "^0.12.0",
"archiver": "^6.0.1",
"dotenv": "^16.4.5",
"express": "^4.18.2",
"express-fileupload": "^1.4.2",
"joi": "^17.11.0",
@ -10083,6 +10100,7 @@
"pdf-lib": "^1.17.1",
"rollup-plugin-copy": "^3.5.0",
"rollup-plugin-dynamic-import-variables": "^1.1.0",
"toml": "^3.0.0",
"tsconfig-paths": "^4.2.0",
"vite-plugin-compile-time": "^0.2.1",
"vite-plugin-dynamic-import": "^1.5.0",

View File

@ -28,6 +28,7 @@
"@types/multer": "^1.4.10",
"@wasmer/wasmfs": "^0.12.0",
"archiver": "^6.0.1",
"dotenv": "^16.4.5",
"express": "^4.18.2",
"express-fileupload": "^1.4.2",
"joi": "^17.11.0",
@ -36,6 +37,7 @@
"pdf-lib": "^1.17.1",
"rollup-plugin-copy": "^3.5.0",
"rollup-plugin-dynamic-import-variables": "^1.1.0",
"toml": "^3.0.0",
"tsconfig-paths": "^4.2.0",
"vite-plugin-compile-time": "^0.2.1",
"vite-plugin-dynamic-import": "^1.5.0",

View File

@ -1,3 +1,7 @@
/*
* Translation
*/
import i18next from "i18next";
import resourcesToBackend from "i18next-resources-to-backend";
@ -13,19 +17,28 @@ i18next.use(resourcesToBackend((language: string, namespace: string) => import(`
initImmediate: false // Makes loading blocking but sync
});
// list available modules
import { listOperatorNames } from "@stirling-pdf/shared-operations/src/workflow/operatorAccessor";
console.log("Available Modules: ", listOperatorNames());
/*
* jobs
*/
import "./jobs";
/*
* API
*/
import express from "express";
const app = express();
const PORT = 8000;
import { listOperatorNames } from "@stirling-pdf/shared-operations/src/workflow/operatorAccessor";
console.log("Available Modules: ", listOperatorNames())
// server-node: backend api
import api from "./routes/api/api-controller";
app.use("/api", api);
// serve
// viteNode
if (import.meta.env.PROD) {
app.listen(PORT, () => {
console.log(`http://localhost:${PORT}`);

198
server-node/src/jobs.ts Normal file
View File

@ -0,0 +1,198 @@
import { traverseOperations } from '@stirling-pdf/shared-operations/src/workflow/traverseOperations';
import { PdfFile, RepresentationType } from '@stirling-pdf/shared-operations/src/wrappers/PdfFile';
import { JoiPDFFileSchema } from '@stirling-pdf/shared-operations/src/wrappers/PdfFileJoi';
import 'dotenv/config';
import fs from 'fs';
import path from "path";
import toml from 'toml';
const jobsDir = process.env.JOBS_DIR;
// TODO: Also remove watched folders
const watchedFolders: {
[folderName: string]: Job
} = {};
if(jobsDir)
setupJobs(jobsDir);
function setupJobs(jobsDir: string) {
if(!fs.existsSync(jobsDir)) {
console.log("jobs dir does not exist. creating one...");
fs.mkdirSync(jobsDir);
}
fs.watch(jobsDir, {}, (e, f) => {
if(f === null) return;
if(f === "jobs.toml") {
handleJobsToml("jobs.toml", jobsDir);
}
})
fs.readdir(jobsDir, (err, files) => {
if (files.includes("jobs.toml")) {
handleJobsToml("jobs.toml", jobsDir);
}
else {
console.log("jobs.toml is not present, if you want to use jobs please configure it");
}
});
}
interface Job {
type: string
}
type cronString = string;
interface FolderJob extends Job {
trigger: "FILE_CHANGE" | "START_FILE_DELETION" | cronString,
delay: number | undefined,
respectFolderStructure: boolean | undefined,
enableLogsDir: boolean | undefined,
keepOriginals: boolean | undefined,
indicateStatus: boolean | undefined,
}
function handleJobsToml(jobsFile: string, jobsDir: string) {
console.log("jobs.toml was updated.");
fs.readFile(path.join(jobsDir, jobsFile), (err, data) => {
const jobsConfig = toml.parse(data.toString());
const jobs: { [key: string]: Job} = jobsConfig.jobs;
for (const jobName in jobs) {
const job = jobs[jobName];
switch (job.type) {
case "folder":
setupFolderJob(jobName, job as FolderJob, jobsDir);
break;
default:
console.error(`job-type ${job.type} of ${jobName} is not implemented`);
break;
}
}
})
}
const watchedWritingFiles: { [path: string]: NodeJS.Timeout } = {};
function setupFolderJob(jobName: string, job: FolderJob, jobsDir: string) {
const jobFolder = path.join(jobsDir, jobName, "/");
if(watchedFolders[path.join(jobFolder, "in/")]) {
return;
}
watchedFolders[path.join(jobFolder, "in/")] = job;
if(!fs.existsSync(jobFolder)) {
fs.mkdirSync(jobFolder);
if(!fs.existsSync(path.join(jobFolder, "workflow.json"))) {
fs.writeFileSync(path.join(jobFolder, "workflow.json"), "{}");
}
if(!fs.existsSync(path.join(jobFolder, "in/"))) {
fs.mkdirSync(path.join(jobFolder, "in"));
}
if(!fs.existsSync(path.join(jobFolder, "out/"))) {
fs.mkdirSync(path.join(jobFolder, "out"));
}
}
// trigger
switch (job.trigger) {
case "FILE_CHANGE":
// TODO: Process files that are already in there
fs.watch(path.join(jobFolder, "in/"), async (e, f) => {
if(!f || f == "") return;
const file = path.parse(f);
const filePath = path.join(jobFolder, "in/", f);
if(file.ext != ".pdf") {
if(file.ext == ".processing-pdf") {
return;
}
console.log("Non-pdf files aren't supported at the moment.");
return;
}
if(watchedWritingFiles[filePath]) {
clearTimeout(watchedWritingFiles[filePath]);
}
console.log("in/", e, f)
watchedWritingFiles[filePath] = setTimeout(async () => {
processSingleFile(file, filePath, jobFolder);
}, (job.delay || 5) * 1000)
});
break;
default:
console.error(`The trigger ${job.trigger} for ${jobName} could not be setup.`)
break;
}
}
async function processSingleFile(file: path.ParsedPath, filePath: string, jobFolder: string) {
console.log("Processing file ", file.base);
try {
var workflow = JSON.parse(fs.readFileSync(path.join(jobFolder, "workflow.json")).toString());
} catch (err) {
if (err instanceof Error) {
console.error("malformed workflow-json was provided", err.message);
return;
} else {
throw err;
}
}
if(!workflow.actions) {
console.error("The provided workflow does not contain any actions.");
return
}
console.log("Reading File");
fs.readFile(filePath, (err, data) => {
const input: PdfFile = new PdfFile(file.name, new Uint8Array(data), RepresentationType.Uint8Array, file.name);
if(fs.existsSync(filePath))
fs.renameSync(filePath, filePath + ".processing-pdf");
else {
console.log(`${filePath} does not exist anymore. Either it was already processed or it was deleted by the user.`);
return
}
// TODO: Check if file type == inputType for operator
traverseOperations(workflow.actions, [input], (state) => {
console.log("State: ", state);
}).then(async (pdfResults) => {
console.log("Download");
//TODO: Write files to fs
pdfResults.forEach(async pdfResult => {
fs.writeFile(path.join(jobFolder, "out/", pdfResult.filename + ".pdf"), await pdfResult.uint8Array, (err) => {
if(err) console.error(err);
});
});
fs.rmSync(filePath + ".processing-pdf");
}).catch((err) => {
if(err.validationError) {
// Bad Request
console.log(err);
}
else if (err instanceof Error) {
console.error("Internal Server Error", err);
} else {
throw err;
}
});
});
}

View File

@ -81,7 +81,7 @@ router.post("/:workflowUuid?", [
});
}
else {
console.log("Start Aync Workflow");
console.log("Start Async Workflow");
// TODO: UUID collision checks
let workflowID = req.params.workflowUuid;
if(!workflowID)

View File

@ -3,7 +3,7 @@ import { Response } from "express";
import { PdfFile } from "@stirling-pdf/shared-operations/src/wrappers/PdfFile";
import Archiver from "archiver";
export async function respondWithFile(res: Response, uint8Array: Uint8Array, filename: string, mimeType: string): Promise<void> {
async function respondWithFile(res: Response, uint8Array: Uint8Array, filename: string, mimeType: string): Promise<void> {
res.writeHead(200, {
"Content-Type": mimeType,
"Content-disposition": `attachment; filename="${filename}"`,
@ -12,12 +12,12 @@ export async function respondWithFile(res: Response, uint8Array: Uint8Array, fil
res.end(uint8Array);
}
export async function respondWithPdfFile(res: Response, file: PdfFile): Promise<void> {
async function respondWithPdfFile(res: Response, file: PdfFile): Promise<void> {
const byteArray = await file.uint8Array;
respondWithFile(res, byteArray, file.filename+".pdf", "application/pdf");
}
export async function respondWithZip(res: Response, filename: string, files: {uint8Array: Uint8Array, filename: string}[]): Promise<void> {
async function respondWithZip(res: Response, filename: string, files: {uint8Array: Uint8Array, filename: string}[]): Promise<void> {
if (files.length == 0) {
res.status(500).json({"warning": "The workflow had no outputs."});
return;
@ -58,28 +58,3 @@ export async function respondWithPdfFiles(res: Response, pdfFiles: PdfFile[] | u
respondWithZip(res, filename, files);
}
}
export function response_mustHaveExactlyOneFile(res: Response): void {
res.status(400).send([
{
"message": "file is required",
"path": [
"pdfFile"
],
"type": "file",
"context": {
"label": "pdfFile",
"key": "pdfFile"
}
}
]);
}
export function response_dependencyNotConfigured(res: Response, dependencyName: string): void {
res.status(400).send([
{
"message": `${dependencyName} is not configured correctly on the server.`,
"type": "dependency_error",
}
]);
}

View File

@ -1,106 +0,0 @@
import fs from "fs";
import os from "os";
import path from "path";
import { exec, spawn } from "child_process";
import { PdfFile, RepresentationType } from "@stirling-pdf/shared-operations/src/wrappers/PdfFile";
export async function fileToPdf(byteArray: Uint8Array, filename: string): Promise<PdfFile> {
const parentDir = path.join(os.tmpdir(), "StirlingPDF");
fs.mkdirSync(parentDir, {recursive: true});
const tempDir = fs.mkdtempSync(parentDir+"/");
const srcFile = path.join(tempDir, filename);
const randFolderName = path.parse(tempDir).base;
await writeBytesToFile(srcFile, byteArray);
const messages = await runLibreOfficeCommand(randFolderName, ["--headless","--convert-to","pdf",srcFile,"--outdir",tempDir]);
const files = fs.readdirSync(tempDir).filter(file => file.endsWith(".pdf"));
if (files.length > 1) {
console.warn("Ambiguous file to pdf outputs: Returning first result", files);
} else if (files.length == 0) {
throw new Error("File to pdf failed: no output files found. Messages: "+messages);
}
const outputFileName = files[0];
const outputFilePath = path.join(tempDir, outputFileName);
const outputBytes = await readBytesFromFile(outputFilePath);
fs.rmdirSync(tempDir, {recursive: true});
return new PdfFile(outputFileName, outputBytes, RepresentationType.Uint8Array);
}
export function isLibreOfficeInstalled() {
return new Promise((resolve, reject) => {
exec("libreoffice --version", (error, stdout, stderr) => {
if (error) {
resolve(false);
return;
}
if (stderr) {
resolve(false);
return;
}
const result = stdout.match("LibreOffice ([0-9]+\.){4}.*");
resolve(result ? true : false);
});
});
}
function writeBytesToFile(filePath: string, bytes: Uint8Array): Promise<void> {
return new Promise((resolve, reject) => {
fs.writeFile(filePath, bytes, function(err) {
if(err) {
reject(err);
return;
}
resolve();
});
});
}
function readBytesFromFile(filePath: string): Promise<Uint8Array> {
return new Promise((resolve, reject) => {
fs.readFile(filePath, (err, data) => {
if (err) {
reject(new Error(`Error reading file: ${err.message}`));
} else {
const uint8Array = new Uint8Array(data);
resolve(uint8Array);
}
});
});
}
function runLibreOfficeCommand(idKey: string, args: string[]): Promise<string[]> {
return new Promise(async (resolve, reject) => {
const messageList: string[] = [];
const process = spawn("libreoffice", args);
process.stdout.on("data", (data) => {
const dataStr = data.toString();
console.log(`Progress ${idKey}:`, dataStr);
messageList.push(dataStr);
});
process.stderr.on("data", (data) => {
console.error(`stderr ${idKey}:`, data.toString());
});
process.on("exit", (code) => {
if (code === 0) {
resolve(messageList);
} else {
reject(new Error(`Command failed with exit code ${code}`));
}
});
process.on("error", (err) => {
reject(err);
});
});
}

View File

@ -12,13 +12,17 @@ export default defineConfig({
},
plugins: [
...VitePluginNode({
// Nodejs native Request adapter
// currently this plugin support 'express', 'nest', 'koa' and 'fastify' out of box,
// you can also pass a function if you are using other frameworks, see Custom Adapter section
adapter: 'express',
// Nodejs native Request adapter
// currently this plugin support 'express', 'nest', 'koa' and 'fastify' out of box,
// you can also pass a function if you are using other frameworks, see Custom Adapter section
adapter: 'express',
// tell the plugin where is your project entry
appPath: './src/index.ts',
// tell the plugin where is your project entry
appPath: './src/index.ts',
// Optional, default: false
// if you want to init your app on boot, set this to true
initAppOnBoot: true,
}),
topLevelAwait({
// The export name of top-level await promise for each chunk module

View File

@ -13,16 +13,19 @@ export async function detectQRCodePages(file: PdfFile) {
const page = await pdfDoc.getPage(i + 1);
const images = await getImagesOnPage(page);
// console.log("images:", images);
console.log("images:", images);
for (const image of images) {
const data = await checkForQROnImage(image);
if(["https://github.com/Stirling-Tools/Stirling-PDF", "https://github.com/Frooodle/Stirling-PDF"].includes(data)) {
pagesWithQR.push(i);
}
else {
console.log("Found QR code with unrelated data: " + data);
}
}
}
if(pagesWithQR.length == 0) {
console.warn("Could not find any QR Codes in the provided PDF.");
console.warn("Could not find any QR Codes in the provided PDF. This may happen if the provided QR-Code is not an image but a path (e.g. SVG).");
}
return pagesWithQR;
}

View File

@ -16,10 +16,11 @@ export async function getImagesOnPage(page: PDFPageProxy): Promise<PDFJSImage[]>
const images: PDFJSImage[] = [];
for (let j=0; j < ops.fnArray.length; j++) {
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
const image: PDFJSImage = page.objs.get(ops.argsArray[j][0])
console.log("Image: ", image);
images.push(image);
console.log("Found", ops.argsArray[j])
page.objs.get(ops.argsArray[j][0], (image: PDFJSImage) => {
console.log("Image: ", image);
images.push(image);
})
}
}
return images;