splitPagesByPreset

This commit is contained in:
Felix Kaspar 2024-05-18 21:56:47 +02:00
parent d0dbb7e708
commit 771e66100f
5 changed files with 119 additions and 82 deletions

View File

@ -5,9 +5,7 @@ import { PdfFile } from "../../wrappers/PdfFile";
import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage"; import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage";
export async function detectQRCodePages(file: PdfFile) { export async function detectQRCodePages(file: PdfFile) {
console.log("FileInQRPrev: ", file);
const pdfDoc = await file.pdfJsDocument; const pdfDoc = await file.pdfJsDocument;
console.log("FileInQRAfter: ", file);
const pagesWithQR: number[] = []; const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) { for (let i = 0; i < pdfDoc.numPages; i++) {

View File

@ -20,9 +20,9 @@ export class RotatePages extends Operator {
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description")); protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.object({ protected static valueSchema = Joi.object({
rotation: Joi.alternatives().try( rotation: Joi.alternatives().try(
Joi.number().integer().min(-360).max(360), Joi.number().integer().min(-360).max(360).required(),
CommaArrayJoiExt.comma_array().items(Joi.number().integer().min(-360).max(360)) CommaArrayJoiExt.comma_array().items(Joi.number().integer().min(-360).max(360)).required()
).required() )
.label(i18next.t("values.rotation.friendlyName", { ns: "rotatePages" })).description(i18next.t("values.rotation.description", { ns: "rotatePages" })) .label(i18next.t("values.rotation.friendlyName", { ns: "rotatePages" })).description(i18next.t("values.rotation.description", { ns: "rotatePages" }))
.example("90").example("-180").example("[90, 0, 270]"), .example("90").example("-180").example("[90, 0, 270]"),
}); });

View File

@ -20,9 +20,9 @@ export class ScaleContent extends Operator {
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description")); protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.object({ protected static valueSchema = Joi.object({
scaleFactor: Joi.alternatives().try( scaleFactor: Joi.alternatives().try(
Joi.number(), Joi.number().required(),
CommaArrayJoiExt.comma_array().items(Joi.number()) CommaArrayJoiExt.comma_array().items(Joi.number()).required()
).required() )
.label(i18next.t("values.scaleFactor.friendlyName", { ns: "scaleContent" })).description(i18next.t("values.scaleFactor.description", { ns: "scaleContent" })) .label(i18next.t("values.scaleFactor.friendlyName", { ns: "scaleContent" })).description(i18next.t("values.scaleFactor.description", { ns: "scaleContent" }))
.example("2").example("1.5").example("[1, 1.5, 0.9]"), .example("2").example("1.5").example("[1, 1.5, 0.9]"),
}); });

View File

@ -1,44 +1,82 @@
import { Operator, Progress, oneToN } from ".";
import Joi from "@stirling-tools/joi";
import { JoiPDFFileSchema } from "../wrappers/PdfFileJoi";
import i18next from "i18next";
import { PdfFile } from "../wrappers/PdfFile"; import { PdfFile } from "../wrappers/PdfFile";
import { splitPagesByIndex } from "./common/splitPagesByIndex"; import { splitPagesByIndex } from "./common/splitPagesByIndex";
import { detectEmptyPages } from "./common/detectEmptyPages"; import { detectEmptyPages } from "./common/detectEmptyPages";
import { detectQRCodePages } from "./common/detectQRCodePages"; import { detectQRCodePages } from "./common/detectQRCodePages";
export interface SplitPageByPresetParamsType {
file: PdfFile;
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
whiteThreashold?: number;
}
export async function splitPagesByPreset(params: SplitPageByPresetParamsType): Promise<PdfFile[]> {
const { file, type, whiteThreashold } = params;
console.log("File: ", file); export class SplitPagesByPreset extends Operator {
static type = "splitPagesByPreset";
let splitAtPages: number[]; /**
switch (type) { * Validation & Localisation
case "BAR_CODE": */
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE": protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
splitAtPages = await detectQRCodePages(file); protected static valueSchema = Joi.alternatives().try(
break; Joi.object({
type: Joi.string().valid("BAR_CODE").required()
}),
Joi.object({
type: Joi.string().valid("QR_CODE").required()
}),
Joi.object({
type: Joi.string().valid("BLANK_PAGE").required(),
whiteThreashold: Joi.number().min(0).max(255).required()
}),
)
.label(i18next.t("values.splitSettings.friendlyName", { ns: "splitPagesByPreset" })).description(i18next.t("values.splitSettings.description", { ns: "splitPagesByPreset" })
);
protected static outputSchema = JoiPDFFileSchema.label(i18next.t("outputs.pdffile.name")).description(i18next.t("outputs.pdffile.description"));
case "BLANK_PAGE": static schema = Joi.object({
if (!whiteThreashold) input: SplitPagesByPreset.inputSchema,
throw new Error("White threshold not provided"); values: SplitPagesByPreset.valueSchema.required(),
splitAtPages = await detectEmptyPages(file, whiteThreashold); output: SplitPagesByPreset.outputSchema
break; }).label(i18next.t("friendlyName", { ns: "splitPagesByPreset" })).description(i18next.t("description", { ns: "splitPagesByPreset" }));
default:
throw new Error("An invalid split-type was provided."); /**
* Logic
*/
/** Detect and remove white pages */
async run(input: PdfFile[], progressCallback: (state: Progress) => void): Promise<PdfFile[]> {
return oneToN<PdfFile, PdfFile>(input, async (input, index, max) => {
let splitAtPages: number[];
console.log("Running Detection...");
switch (this.actionValues.type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
splitAtPages = await detectQRCodePages(input);
break;
case "BLANK_PAGE":
splitAtPages = await detectEmptyPages(input, this.actionValues.whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.log("Split at Pages: ", splitAtPages);
const newFiles = await splitPagesByIndex(input, splitAtPages);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
progressCallback({ curFileProgress: 1, operationProgress: index/max });
return newFiles;
});
} }
console.debug("Split At Pages: ", splitAtPages);
const newFiles = await splitPagesByIndex(file, splitAtPages);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
return newFiles;
} }

View File

@ -22,25 +22,25 @@ export class PdfFile {
get uint8Array() : Promise<Uint8Array> { get uint8Array() : Promise<Uint8Array> {
switch (this.representationType) { switch (this.representationType) {
case RepresentationType.Uint8Array: case RepresentationType.Uint8Array:
return new Promise((resolve) => { return new Promise((resolve) => {
resolve(this.representation as Uint8Array); resolve(this.representation as Uint8Array);
}); });
case RepresentationType.PDFLibDocument: case RepresentationType.PDFLibDocument:
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFLibDocument).save(); const uint8Array = await (this.representation as PDFLibDocument).save();
this.uint8Array = uint8Array; this.uint8Array = uint8Array;
resolve(uint8Array); resolve(uint8Array);
}); });
case RepresentationType.PDFJSDocument: case RepresentationType.PDFJSDocument:
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const uint8Array = await (this.representation as PDFJSDocument).getData(); const uint8Array = await (this.representation as PDFJSDocument).getData();
this.uint8Array = uint8Array; this.uint8Array = uint8Array;
resolve(uint8Array); resolve(uint8Array);
}); });
default: default:
console.error("unhandeled PDF type: " + typeof this.representation ); console.error("unhandeled PDF type: " + typeof this.representation );
throw Error("unhandeled PDF type"); throw Error("unhandeled PDF type");
} }
} }
set uint8Array(value: Uint8Array) { set uint8Array(value: Uint8Array) {
@ -50,19 +50,19 @@ export class PdfFile {
get pdfLibDocument() : Promise<PDFLibDocument> { get pdfLibDocument() : Promise<PDFLibDocument> {
switch (this.representationType) { switch (this.representationType) {
case RepresentationType.PDFLibDocument: case RepresentationType.PDFLibDocument:
return new Promise((resolve) => { return new Promise((resolve) => {
resolve(this.representation as PDFLibDocument); resolve(this.representation as PDFLibDocument);
}); });
default: default:
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const uint8Array = await this.uint8Array; const uint8Array = await this.uint8Array;
const pdfLibDoc = await PDFLibDocument.load(uint8Array, { const pdfLibDoc = await PDFLibDocument.load(uint8Array, {
updateMetadata: false, updateMetadata: false,
});
this.pdfLibDocument = pdfLibDoc;
resolve(pdfLibDoc);
}); });
this.pdfLibDocument = pdfLibDoc;
resolve(pdfLibDoc);
});
} }
} }
set pdfLibDocument(value: PDFLibDocument) { set pdfLibDocument(value: PDFLibDocument) {
@ -72,16 +72,17 @@ export class PdfFile {
get pdfJsDocument() : Promise<PDFJSDocument> { get pdfJsDocument() : Promise<PDFJSDocument> {
switch (this.representationType) { switch (this.representationType) {
case RepresentationType.PDFJSDocument: case RepresentationType.PDFJSDocument:
return new Promise((resolve) => { return new Promise((resolve) => {
resolve(this.representation as PDFJSDocument); resolve(this.representation as PDFJSDocument);
}); });
default: default:
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const pdfjsDoc = await PDFJS.getDocument({ data: await this.uint8Array, isOffscreenCanvasSupported: false }).promise; console.log(`Converting representationType-${this.representationType} to pdfJsDocument`);
this.pdfJsDocument = pdfjsDoc; const pdfjsDoc = await PDFJS.getDocument({ data: await this.uint8Array, isOffscreenCanvasSupported: false }).promise;
resolve(pdfjsDoc); this.pdfJsDocument = pdfjsDoc;
}); resolve(pdfjsDoc);
});
} }
} }
set pdfJsDocument(value: PDFJSDocument) { set pdfJsDocument(value: PDFJSDocument) {
@ -103,7 +104,7 @@ export class PdfFile {
} }
static fromMulterFile(value: Express.Multer.File): PdfFile { static fromMulterFile(value: Express.Multer.File): PdfFile {
return new PdfFile(value.originalname, value.buffer as Uint8Array, RepresentationType.Uint8Array); return new PdfFile(value.originalname, new Uint8Array(value.buffer), RepresentationType.Uint8Array);
} }
static fromMulterFiles(values: Express.Multer.File[]): PdfFile[] { static fromMulterFiles(values: Express.Multer.File[]): PdfFile[] {