splitPagesByPreset

This commit is contained in:
Felix Kaspar 2024-05-18 21:56:47 +02:00
parent d0dbb7e708
commit 771e66100f
5 changed files with 119 additions and 82 deletions

View File

@ -5,9 +5,7 @@ import { PdfFile } from "../../wrappers/PdfFile";
import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage";
export async function detectQRCodePages(file: PdfFile) {
console.log("FileInQRPrev: ", file);
const pdfDoc = await file.pdfJsDocument;
console.log("FileInQRAfter: ", file);
const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) {

View File

@ -20,9 +20,9 @@ export class RotatePages extends Operator {
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.object({
rotation: Joi.alternatives().try(
Joi.number().integer().min(-360).max(360),
CommaArrayJoiExt.comma_array().items(Joi.number().integer().min(-360).max(360))
).required()
Joi.number().integer().min(-360).max(360).required(),
CommaArrayJoiExt.comma_array().items(Joi.number().integer().min(-360).max(360)).required()
)
.label(i18next.t("values.rotation.friendlyName", { ns: "rotatePages" })).description(i18next.t("values.rotation.description", { ns: "rotatePages" }))
.example("90").example("-180").example("[90, 0, 270]"),
});

View File

@ -20,9 +20,9 @@ export class ScaleContent extends Operator {
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.object({
scaleFactor: Joi.alternatives().try(
Joi.number(),
CommaArrayJoiExt.comma_array().items(Joi.number())
).required()
Joi.number().required(),
CommaArrayJoiExt.comma_array().items(Joi.number()).required()
)
.label(i18next.t("values.scaleFactor.friendlyName", { ns: "scaleContent" })).description(i18next.t("values.scaleFactor.description", { ns: "scaleContent" }))
.example("2").example("1.5").example("[1, 1.5, 0.9]"),
});

View File

@ -1,44 +1,82 @@
import { Operator, Progress, oneToN } from ".";
import Joi from "@stirling-tools/joi";
import { JoiPDFFileSchema } from "../wrappers/PdfFileJoi";
import i18next from "i18next";
import { PdfFile } from "../wrappers/PdfFile";
import { splitPagesByIndex } from "./common/splitPagesByIndex";
import { detectEmptyPages } from "./common/detectEmptyPages";
import { detectQRCodePages } from "./common/detectQRCodePages";
export interface SplitPageByPresetParamsType {
file: PdfFile;
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
whiteThreashold?: number;
}
export async function splitPagesByPreset(params: SplitPageByPresetParamsType): Promise<PdfFile[]> {
const { file, type, whiteThreashold } = params;
console.log("File: ", file);
export class SplitPagesByPreset extends Operator {
static type = "splitPagesByPreset";
let splitAtPages: number[];
switch (type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
/**
* Validation & Localisation
*/
case "QR_CODE":
splitAtPages = await detectQRCodePages(file);
break;
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.alternatives().try(
Joi.object({
type: Joi.string().valid("BAR_CODE").required()
}),
Joi.object({
type: Joi.string().valid("QR_CODE").required()
}),
Joi.object({
type: Joi.string().valid("BLANK_PAGE").required(),
whiteThreashold: Joi.number().min(0).max(255).required()
}),
)
.label(i18next.t("values.splitSettings.friendlyName", { ns: "splitPagesByPreset" })).description(i18next.t("values.splitSettings.description", { ns: "splitPagesByPreset" })
);
protected static outputSchema = JoiPDFFileSchema.label(i18next.t("outputs.pdffile.name")).description(i18next.t("outputs.pdffile.description"));
case "BLANK_PAGE":
if (!whiteThreashold)
throw new Error("White threshold not provided");
splitAtPages = await detectEmptyPages(file, whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
static schema = Joi.object({
input: SplitPagesByPreset.inputSchema,
values: SplitPagesByPreset.valueSchema.required(),
output: SplitPagesByPreset.outputSchema
}).label(i18next.t("friendlyName", { ns: "splitPagesByPreset" })).description(i18next.t("description", { ns: "splitPagesByPreset" }));
/**
* Logic
*/
/** Detect and remove white pages */
async run(input: PdfFile[], progressCallback: (state: Progress) => void): Promise<PdfFile[]> {
return oneToN<PdfFile, PdfFile>(input, async (input, index, max) => {
let splitAtPages: number[];
console.log("Running Detection...");
switch (this.actionValues.type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
splitAtPages = await detectQRCodePages(input);
break;
case "BLANK_PAGE":
splitAtPages = await detectEmptyPages(input, this.actionValues.whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.log("Split at Pages: ", splitAtPages);
const newFiles = await splitPagesByIndex(input, splitAtPages);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
progressCallback({ curFileProgress: 1, operationProgress: index/max });
return newFiles;
});
}
console.debug("Split At Pages: ", splitAtPages);
const newFiles = await splitPagesByIndex(file, splitAtPages);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
return newFiles;
}

View File

@ -22,25 +22,25 @@ export class PdfFile {
get uint8Array() : Promise<Uint8Array> {
switch (this.representationType) {
case RepresentationType.Uint8Array:
return new Promise((resolve) => {
resolve(this.representation as Uint8Array);
});
case RepresentationType.PDFLibDocument:
return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFLibDocument).save();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
case RepresentationType.PDFJSDocument:
return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFJSDocument).getData();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
default:
console.error("unhandeled PDF type: " + typeof this.representation );
throw Error("unhandeled PDF type");
case RepresentationType.Uint8Array:
return new Promise((resolve) => {
resolve(this.representation as Uint8Array);
});
case RepresentationType.PDFLibDocument:
return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFLibDocument).save();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
case RepresentationType.PDFJSDocument:
return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFJSDocument).getData();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
default:
console.error("unhandeled PDF type: " + typeof this.representation );
throw Error("unhandeled PDF type");
}
}
set uint8Array(value: Uint8Array) {
@ -50,19 +50,19 @@ export class PdfFile {
get pdfLibDocument() : Promise<PDFLibDocument> {
switch (this.representationType) {
case RepresentationType.PDFLibDocument:
return new Promise((resolve) => {
resolve(this.representation as PDFLibDocument);
});
default:
return new Promise(async (resolve) => {
const uint8Array = await this.uint8Array;
const pdfLibDoc = await PDFLibDocument.load(uint8Array, {
updateMetadata: false,
case RepresentationType.PDFLibDocument:
return new Promise((resolve) => {
resolve(this.representation as PDFLibDocument);
});
default:
return new Promise(async (resolve) => {
const uint8Array = await this.uint8Array;
const pdfLibDoc = await PDFLibDocument.load(uint8Array, {
updateMetadata: false,
});
this.pdfLibDocument = pdfLibDoc;
resolve(pdfLibDoc);
});
this.pdfLibDocument = pdfLibDoc;
resolve(pdfLibDoc);
});
}
}
set pdfLibDocument(value: PDFLibDocument) {
@ -72,16 +72,17 @@ export class PdfFile {
get pdfJsDocument() : Promise<PDFJSDocument> {
switch (this.representationType) {
case RepresentationType.PDFJSDocument:
return new Promise((resolve) => {
resolve(this.representation as PDFJSDocument);
});
default:
return new Promise(async (resolve) => {
const pdfjsDoc = await PDFJS.getDocument({ data: await this.uint8Array, isOffscreenCanvasSupported: false }).promise;
this.pdfJsDocument = pdfjsDoc;
resolve(pdfjsDoc);
});
case RepresentationType.PDFJSDocument:
return new Promise((resolve) => {
resolve(this.representation as PDFJSDocument);
});
default:
return new Promise(async (resolve) => {
console.log(`Converting representationType-${this.representationType} to pdfJsDocument`);
const pdfjsDoc = await PDFJS.getDocument({ data: await this.uint8Array, isOffscreenCanvasSupported: false }).promise;
this.pdfJsDocument = pdfjsDoc;
resolve(pdfjsDoc);
});
}
}
set pdfJsDocument(value: PDFJSDocument) {
@ -103,7 +104,7 @@ export class PdfFile {
}
static fromMulterFile(value: Express.Multer.File): PdfFile {
return new PdfFile(value.originalname, value.buffer as Uint8Array, RepresentationType.Uint8Array);
return new PdfFile(value.originalname, new Uint8Array(value.buffer), RepresentationType.Uint8Array);
}
static fromMulterFiles(values: Express.Multer.File[]): PdfFile[] {