mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-06-23 16:05:09 +00:00
jobs. folder-job, file-change-trigger and some cleanup
This commit is contained in:
parent
fae524f8da
commit
ecb12e66b6
2
.gitignore
vendored
2
.gitignore
vendored
@ -6,3 +6,5 @@ android/
|
||||
ios/
|
||||
releases/
|
||||
.vscode/
|
||||
.env
|
||||
/server-node/jobs
|
18
package-lock.json
generated
18
package-lock.json
generated
@ -4473,6 +4473,17 @@
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/dynamic-dedupe": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/dynamic-dedupe/-/dynamic-dedupe-0.3.0.tgz",
|
||||
@ -8770,6 +8781,11 @@
|
||||
"node": ">=0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/toml": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/toml/-/toml-3.0.0.tgz",
|
||||
"integrity": "sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w=="
|
||||
},
|
||||
"node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
||||
@ -10075,6 +10091,7 @@
|
||||
"@types/multer": "^1.4.10",
|
||||
"@wasmer/wasmfs": "^0.12.0",
|
||||
"archiver": "^6.0.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"express": "^4.18.2",
|
||||
"express-fileupload": "^1.4.2",
|
||||
"joi": "^17.11.0",
|
||||
@ -10083,6 +10100,7 @@
|
||||
"pdf-lib": "^1.17.1",
|
||||
"rollup-plugin-copy": "^3.5.0",
|
||||
"rollup-plugin-dynamic-import-variables": "^1.1.0",
|
||||
"toml": "^3.0.0",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"vite-plugin-compile-time": "^0.2.1",
|
||||
"vite-plugin-dynamic-import": "^1.5.0",
|
||||
|
@ -28,6 +28,7 @@
|
||||
"@types/multer": "^1.4.10",
|
||||
"@wasmer/wasmfs": "^0.12.0",
|
||||
"archiver": "^6.0.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"express": "^4.18.2",
|
||||
"express-fileupload": "^1.4.2",
|
||||
"joi": "^17.11.0",
|
||||
@ -36,6 +37,7 @@
|
||||
"pdf-lib": "^1.17.1",
|
||||
"rollup-plugin-copy": "^3.5.0",
|
||||
"rollup-plugin-dynamic-import-variables": "^1.1.0",
|
||||
"toml": "^3.0.0",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"vite-plugin-compile-time": "^0.2.1",
|
||||
"vite-plugin-dynamic-import": "^1.5.0",
|
||||
|
@ -1,3 +1,7 @@
|
||||
/*
|
||||
* Translation
|
||||
*/
|
||||
|
||||
import i18next from "i18next";
|
||||
import resourcesToBackend from "i18next-resources-to-backend";
|
||||
|
||||
@ -13,19 +17,28 @@ i18next.use(resourcesToBackend((language: string, namespace: string) => import(`
|
||||
initImmediate: false // Makes loading blocking but sync
|
||||
});
|
||||
|
||||
// list available modules
|
||||
import { listOperatorNames } from "@stirling-pdf/shared-operations/src/workflow/operatorAccessor";
|
||||
console.log("Available Modules: ", listOperatorNames());
|
||||
|
||||
/*
|
||||
* jobs
|
||||
*/
|
||||
|
||||
import "./jobs";
|
||||
|
||||
/*
|
||||
* API
|
||||
*/
|
||||
|
||||
import express from "express";
|
||||
const app = express();
|
||||
const PORT = 8000;
|
||||
|
||||
|
||||
import { listOperatorNames } from "@stirling-pdf/shared-operations/src/workflow/operatorAccessor";
|
||||
console.log("Available Modules: ", listOperatorNames())
|
||||
|
||||
// server-node: backend api
|
||||
import api from "./routes/api/api-controller";
|
||||
app.use("/api", api);
|
||||
|
||||
// serve
|
||||
// viteNode
|
||||
if (import.meta.env.PROD) {
|
||||
app.listen(PORT, () => {
|
||||
console.log(`http://localhost:${PORT}`);
|
||||
|
198
server-node/src/jobs.ts
Normal file
198
server-node/src/jobs.ts
Normal file
@ -0,0 +1,198 @@
|
||||
import { traverseOperations } from '@stirling-pdf/shared-operations/src/workflow/traverseOperations';
|
||||
import { PdfFile, RepresentationType } from '@stirling-pdf/shared-operations/src/wrappers/PdfFile';
|
||||
import { JoiPDFFileSchema } from '@stirling-pdf/shared-operations/src/wrappers/PdfFileJoi';
|
||||
import 'dotenv/config';
|
||||
import fs from 'fs';
|
||||
import path from "path";
|
||||
import toml from 'toml';
|
||||
|
||||
const jobsDir = process.env.JOBS_DIR;
|
||||
|
||||
// TODO: Also remove watched folders
|
||||
const watchedFolders: {
|
||||
[folderName: string]: Job
|
||||
} = {};
|
||||
|
||||
if(jobsDir)
|
||||
setupJobs(jobsDir);
|
||||
|
||||
function setupJobs(jobsDir: string) {
|
||||
if(!fs.existsSync(jobsDir)) {
|
||||
console.log("jobs dir does not exist. creating one...");
|
||||
fs.mkdirSync(jobsDir);
|
||||
}
|
||||
|
||||
fs.watch(jobsDir, {}, (e, f) => {
|
||||
if(f === null) return;
|
||||
|
||||
if(f === "jobs.toml") {
|
||||
handleJobsToml("jobs.toml", jobsDir);
|
||||
}
|
||||
})
|
||||
|
||||
fs.readdir(jobsDir, (err, files) => {
|
||||
if (files.includes("jobs.toml")) {
|
||||
handleJobsToml("jobs.toml", jobsDir);
|
||||
}
|
||||
else {
|
||||
console.log("jobs.toml is not present, if you want to use jobs please configure it");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
interface Job {
|
||||
type: string
|
||||
}
|
||||
|
||||
type cronString = string;
|
||||
|
||||
interface FolderJob extends Job {
|
||||
trigger: "FILE_CHANGE" | "START_FILE_DELETION" | cronString,
|
||||
delay: number | undefined,
|
||||
respectFolderStructure: boolean | undefined,
|
||||
enableLogsDir: boolean | undefined,
|
||||
keepOriginals: boolean | undefined,
|
||||
indicateStatus: boolean | undefined,
|
||||
}
|
||||
|
||||
function handleJobsToml(jobsFile: string, jobsDir: string) {
|
||||
console.log("jobs.toml was updated.");
|
||||
fs.readFile(path.join(jobsDir, jobsFile), (err, data) => {
|
||||
const jobsConfig = toml.parse(data.toString());
|
||||
const jobs: { [key: string]: Job} = jobsConfig.jobs;
|
||||
|
||||
for (const jobName in jobs) {
|
||||
const job = jobs[jobName];
|
||||
switch (job.type) {
|
||||
case "folder":
|
||||
setupFolderJob(jobName, job as FolderJob, jobsDir);
|
||||
break;
|
||||
default:
|
||||
console.error(`job-type ${job.type} of ${jobName} is not implemented`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const watchedWritingFiles: { [path: string]: NodeJS.Timeout } = {};
|
||||
|
||||
function setupFolderJob(jobName: string, job: FolderJob, jobsDir: string) {
|
||||
const jobFolder = path.join(jobsDir, jobName, "/");
|
||||
|
||||
if(watchedFolders[path.join(jobFolder, "in/")]) {
|
||||
return;
|
||||
}
|
||||
|
||||
watchedFolders[path.join(jobFolder, "in/")] = job;
|
||||
|
||||
if(!fs.existsSync(jobFolder)) {
|
||||
fs.mkdirSync(jobFolder);
|
||||
|
||||
if(!fs.existsSync(path.join(jobFolder, "workflow.json"))) {
|
||||
fs.writeFileSync(path.join(jobFolder, "workflow.json"), "{}");
|
||||
}
|
||||
|
||||
if(!fs.existsSync(path.join(jobFolder, "in/"))) {
|
||||
fs.mkdirSync(path.join(jobFolder, "in"));
|
||||
}
|
||||
|
||||
if(!fs.existsSync(path.join(jobFolder, "out/"))) {
|
||||
fs.mkdirSync(path.join(jobFolder, "out"));
|
||||
}
|
||||
}
|
||||
|
||||
// trigger
|
||||
|
||||
switch (job.trigger) {
|
||||
case "FILE_CHANGE":
|
||||
// TODO: Process files that are already in there
|
||||
fs.watch(path.join(jobFolder, "in/"), async (e, f) => {
|
||||
if(!f || f == "") return;
|
||||
|
||||
const file = path.parse(f);
|
||||
const filePath = path.join(jobFolder, "in/", f);
|
||||
|
||||
if(file.ext != ".pdf") {
|
||||
if(file.ext == ".processing-pdf") {
|
||||
return;
|
||||
}
|
||||
console.log("Non-pdf files aren't supported at the moment.");
|
||||
return;
|
||||
}
|
||||
|
||||
if(watchedWritingFiles[filePath]) {
|
||||
clearTimeout(watchedWritingFiles[filePath]);
|
||||
}
|
||||
|
||||
console.log("in/", e, f)
|
||||
watchedWritingFiles[filePath] = setTimeout(async () => {
|
||||
processSingleFile(file, filePath, jobFolder);
|
||||
}, (job.delay || 5) * 1000)
|
||||
});
|
||||
break;
|
||||
|
||||
default:
|
||||
console.error(`The trigger ${job.trigger} for ${jobName} could not be setup.`)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
async function processSingleFile(file: path.ParsedPath, filePath: string, jobFolder: string) {
|
||||
console.log("Processing file ", file.base);
|
||||
|
||||
try {
|
||||
var workflow = JSON.parse(fs.readFileSync(path.join(jobFolder, "workflow.json")).toString());
|
||||
} catch (err) {
|
||||
if (err instanceof Error) {
|
||||
console.error("malformed workflow-json was provided", err.message);
|
||||
return;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
if(!workflow.actions) {
|
||||
console.error("The provided workflow does not contain any actions.");
|
||||
return
|
||||
}
|
||||
|
||||
console.log("Reading File");
|
||||
|
||||
fs.readFile(filePath, (err, data) => {
|
||||
const input: PdfFile = new PdfFile(file.name, new Uint8Array(data), RepresentationType.Uint8Array, file.name);
|
||||
|
||||
if(fs.existsSync(filePath))
|
||||
fs.renameSync(filePath, filePath + ".processing-pdf");
|
||||
else {
|
||||
console.log(`${filePath} does not exist anymore. Either it was already processed or it was deleted by the user.`);
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Check if file type == inputType for operator
|
||||
|
||||
traverseOperations(workflow.actions, [input], (state) => {
|
||||
console.log("State: ", state);
|
||||
}).then(async (pdfResults) => {
|
||||
console.log("Download");
|
||||
//TODO: Write files to fs
|
||||
pdfResults.forEach(async pdfResult => {
|
||||
fs.writeFile(path.join(jobFolder, "out/", pdfResult.filename + ".pdf"), await pdfResult.uint8Array, (err) => {
|
||||
if(err) console.error(err);
|
||||
});
|
||||
});
|
||||
|
||||
fs.rmSync(filePath + ".processing-pdf");
|
||||
}).catch((err) => {
|
||||
if(err.validationError) {
|
||||
// Bad Request
|
||||
console.log(err);
|
||||
}
|
||||
else if (err instanceof Error) {
|
||||
console.error("Internal Server Error", err);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
@ -81,7 +81,7 @@ router.post("/:workflowUuid?", [
|
||||
});
|
||||
}
|
||||
else {
|
||||
console.log("Start Aync Workflow");
|
||||
console.log("Start Async Workflow");
|
||||
// TODO: UUID collision checks
|
||||
let workflowID = req.params.workflowUuid;
|
||||
if(!workflowID)
|
||||
|
@ -3,7 +3,7 @@ import { Response } from "express";
|
||||
import { PdfFile } from "@stirling-pdf/shared-operations/src/wrappers/PdfFile";
|
||||
import Archiver from "archiver";
|
||||
|
||||
export async function respondWithFile(res: Response, uint8Array: Uint8Array, filename: string, mimeType: string): Promise<void> {
|
||||
async function respondWithFile(res: Response, uint8Array: Uint8Array, filename: string, mimeType: string): Promise<void> {
|
||||
res.writeHead(200, {
|
||||
"Content-Type": mimeType,
|
||||
"Content-disposition": `attachment; filename="${filename}"`,
|
||||
@ -12,12 +12,12 @@ export async function respondWithFile(res: Response, uint8Array: Uint8Array, fil
|
||||
res.end(uint8Array);
|
||||
}
|
||||
|
||||
export async function respondWithPdfFile(res: Response, file: PdfFile): Promise<void> {
|
||||
async function respondWithPdfFile(res: Response, file: PdfFile): Promise<void> {
|
||||
const byteArray = await file.uint8Array;
|
||||
respondWithFile(res, byteArray, file.filename+".pdf", "application/pdf");
|
||||
}
|
||||
|
||||
export async function respondWithZip(res: Response, filename: string, files: {uint8Array: Uint8Array, filename: string}[]): Promise<void> {
|
||||
async function respondWithZip(res: Response, filename: string, files: {uint8Array: Uint8Array, filename: string}[]): Promise<void> {
|
||||
if (files.length == 0) {
|
||||
res.status(500).json({"warning": "The workflow had no outputs."});
|
||||
return;
|
||||
@ -58,28 +58,3 @@ export async function respondWithPdfFiles(res: Response, pdfFiles: PdfFile[] | u
|
||||
respondWithZip(res, filename, files);
|
||||
}
|
||||
}
|
||||
|
||||
export function response_mustHaveExactlyOneFile(res: Response): void {
|
||||
res.status(400).send([
|
||||
{
|
||||
"message": "file is required",
|
||||
"path": [
|
||||
"pdfFile"
|
||||
],
|
||||
"type": "file",
|
||||
"context": {
|
||||
"label": "pdfFile",
|
||||
"key": "pdfFile"
|
||||
}
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
||||
export function response_dependencyNotConfigured(res: Response, dependencyName: string): void {
|
||||
res.status(400).send([
|
||||
{
|
||||
"message": `${dependencyName} is not configured correctly on the server.`,
|
||||
"type": "dependency_error",
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
@ -1,106 +0,0 @@
|
||||
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import { exec, spawn } from "child_process";
|
||||
import { PdfFile, RepresentationType } from "@stirling-pdf/shared-operations/src/wrappers/PdfFile";
|
||||
|
||||
export async function fileToPdf(byteArray: Uint8Array, filename: string): Promise<PdfFile> {
|
||||
const parentDir = path.join(os.tmpdir(), "StirlingPDF");
|
||||
fs.mkdirSync(parentDir, {recursive: true});
|
||||
const tempDir = fs.mkdtempSync(parentDir+"/");
|
||||
const srcFile = path.join(tempDir, filename);
|
||||
const randFolderName = path.parse(tempDir).base;
|
||||
|
||||
await writeBytesToFile(srcFile, byteArray);
|
||||
|
||||
const messages = await runLibreOfficeCommand(randFolderName, ["--headless","--convert-to","pdf",srcFile,"--outdir",tempDir]);
|
||||
|
||||
const files = fs.readdirSync(tempDir).filter(file => file.endsWith(".pdf"));
|
||||
if (files.length > 1) {
|
||||
console.warn("Ambiguous file to pdf outputs: Returning first result", files);
|
||||
} else if (files.length == 0) {
|
||||
throw new Error("File to pdf failed: no output files found. Messages: "+messages);
|
||||
}
|
||||
|
||||
const outputFileName = files[0];
|
||||
const outputFilePath = path.join(tempDir, outputFileName);
|
||||
const outputBytes = await readBytesFromFile(outputFilePath);
|
||||
|
||||
fs.rmdirSync(tempDir, {recursive: true});
|
||||
|
||||
return new PdfFile(outputFileName, outputBytes, RepresentationType.Uint8Array);
|
||||
}
|
||||
|
||||
export function isLibreOfficeInstalled() {
|
||||
return new Promise((resolve, reject) => {
|
||||
exec("libreoffice --version", (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
if (stderr) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
const result = stdout.match("LibreOffice ([0-9]+\.){4}.*");
|
||||
resolve(result ? true : false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function writeBytesToFile(filePath: string, bytes: Uint8Array): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
fs.writeFile(filePath, bytes, function(err) {
|
||||
if(err) {
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function readBytesFromFile(filePath: string): Promise<Uint8Array> {
|
||||
return new Promise((resolve, reject) => {
|
||||
fs.readFile(filePath, (err, data) => {
|
||||
if (err) {
|
||||
reject(new Error(`Error reading file: ${err.message}`));
|
||||
} else {
|
||||
const uint8Array = new Uint8Array(data);
|
||||
resolve(uint8Array);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function runLibreOfficeCommand(idKey: string, args: string[]): Promise<string[]> {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const messageList: string[] = [];
|
||||
|
||||
const process = spawn("libreoffice", args);
|
||||
|
||||
process.stdout.on("data", (data) => {
|
||||
const dataStr = data.toString();
|
||||
console.log(`Progress ${idKey}:`, dataStr);
|
||||
messageList.push(dataStr);
|
||||
});
|
||||
|
||||
process.stderr.on("data", (data) => {
|
||||
console.error(`stderr ${idKey}:`, data.toString());
|
||||
});
|
||||
|
||||
process.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve(messageList);
|
||||
} else {
|
||||
reject(new Error(`Command failed with exit code ${code}`));
|
||||
}
|
||||
});
|
||||
|
||||
process.on("error", (err) => {
|
||||
reject(err);
|
||||
});
|
||||
|
||||
});
|
||||
}
|
@ -19,6 +19,10 @@ export default defineConfig({
|
||||
|
||||
// tell the plugin where is your project entry
|
||||
appPath: './src/index.ts',
|
||||
|
||||
// Optional, default: false
|
||||
// if you want to init your app on boot, set this to true
|
||||
initAppOnBoot: true,
|
||||
}),
|
||||
topLevelAwait({
|
||||
// The export name of top-level await promise for each chunk module
|
||||
|
@ -13,16 +13,19 @@ export async function detectQRCodePages(file: PdfFile) {
|
||||
const page = await pdfDoc.getPage(i + 1);
|
||||
|
||||
const images = await getImagesOnPage(page);
|
||||
// console.log("images:", images);
|
||||
console.log("images:", images);
|
||||
for (const image of images) {
|
||||
const data = await checkForQROnImage(image);
|
||||
if(["https://github.com/Stirling-Tools/Stirling-PDF", "https://github.com/Frooodle/Stirling-PDF"].includes(data)) {
|
||||
pagesWithQR.push(i);
|
||||
}
|
||||
else {
|
||||
console.log("Found QR code with unrelated data: " + data);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(pagesWithQR.length == 0) {
|
||||
console.warn("Could not find any QR Codes in the provided PDF.");
|
||||
console.warn("Could not find any QR Codes in the provided PDF. This may happen if the provided QR-Code is not an image but a path (e.g. SVG).");
|
||||
}
|
||||
return pagesWithQR;
|
||||
}
|
||||
|
@ -16,10 +16,11 @@ export async function getImagesOnPage(page: PDFPageProxy): Promise<PDFJSImage[]>
|
||||
const images: PDFJSImage[] = [];
|
||||
for (let j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image: PDFJSImage = page.objs.get(ops.argsArray[j][0])
|
||||
|
||||
console.log("Found", ops.argsArray[j])
|
||||
page.objs.get(ops.argsArray[j][0], (image: PDFJSImage) => {
|
||||
console.log("Image: ", image);
|
||||
images.push(image);
|
||||
})
|
||||
}
|
||||
}
|
||||
return images;
|
||||
|
Loading…
x
Reference in New Issue
Block a user