mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-04-19 19:21:18 +00:00
Detect-Empty-Pages node (PDFJS, OpenCV)
This commit is contained in:
parent
0320d76f05
commit
1588ae4948
@ -1,4 +1,6 @@
|
||||
import PDFLib from 'pdf-lib';
|
||||
import OpenCV from 'opencv-wasm';
|
||||
import PDFJS from "pdfjs-dist";
|
||||
import * as pdfcpuWraopper from "./public/wasm/pdfcpu-wrapper-node.js";
|
||||
|
||||
import { extractPages as dependantExtractPages } from "./public/functions/extractPages.js";
|
||||
@ -10,6 +12,7 @@ import { scalePage as dependantScalePage } from './public/functions/scalePage.js
|
||||
import { splitPDF as dependantSplitPDF } from './public/functions/splitPDF.js';
|
||||
import { editMetadata as dependantEditMetadata } from './public/functions/editMetadata.js';
|
||||
import { organizePages as dependantOrganizePages } from './public/functions/organizePages.js';
|
||||
import { removeBlankPages as dependantRemoveBlankPages} from './public/functions/removeBlankPages.js';
|
||||
|
||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||
@ -45,4 +48,8 @@ export async function editMetadata(snapshot, metadata) {
|
||||
|
||||
export async function organizePages(snapshot, operation, customOrderString) {
|
||||
return dependantOrganizePages(snapshot, operation, customOrderString, PDFLib);
|
||||
}
|
||||
|
||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV);
|
||||
}
|
114
package-lock.json
generated
114
package-lock.json
generated
@ -39,6 +39,22 @@
|
||||
"negotiator": "0.6.3"
|
||||
}
|
||||
},
|
||||
"ajv": {
|
||||
"version": "6.12.6",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
|
||||
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
|
||||
"requires": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"fast-json-stable-stringify": "^2.0.0",
|
||||
"json-schema-traverse": "^0.4.1",
|
||||
"uri-js": "^4.2.2"
|
||||
}
|
||||
},
|
||||
"ajv-keywords": {
|
||||
"version": "3.5.2",
|
||||
"resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz",
|
||||
"integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ=="
|
||||
},
|
||||
"archiver": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/archiver/-/archiver-6.0.1.tgz",
|
||||
@ -103,6 +119,11 @@
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="
|
||||
},
|
||||
"big.js": {
|
||||
"version": "5.2.2",
|
||||
"resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
|
||||
"integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ=="
|
||||
},
|
||||
"bl": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
|
||||
@ -252,6 +273,11 @@
|
||||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
"integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
|
||||
},
|
||||
"emojis-list": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
|
||||
"integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q=="
|
||||
},
|
||||
"encodeurl": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
|
||||
@ -321,6 +347,11 @@
|
||||
"busboy": "^1.6.0"
|
||||
}
|
||||
},
|
||||
"fast-deep-equal": {
|
||||
"version": "3.1.3",
|
||||
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
|
||||
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
|
||||
},
|
||||
"fast-extend": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/fast-extend/-/fast-extend-1.0.2.tgz",
|
||||
@ -331,6 +362,11 @@
|
||||
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
|
||||
"integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
|
||||
},
|
||||
"fast-json-stable-stringify": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
|
||||
"integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="
|
||||
},
|
||||
"finalhandler": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
|
||||
@ -467,6 +503,19 @@
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
|
||||
},
|
||||
"json-schema-traverse": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
|
||||
"integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="
|
||||
},
|
||||
"json5": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz",
|
||||
"integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==",
|
||||
"requires": {
|
||||
"minimist": "^1.2.0"
|
||||
}
|
||||
},
|
||||
"lazystream": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
|
||||
@ -504,6 +553,16 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"loader-utils": {
|
||||
"version": "1.4.2",
|
||||
"resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.4.2.tgz",
|
||||
"integrity": "sha512-I5d00Pd/jwMD2QCduo657+YM/6L3KZu++pmX9VFncxaxvHcru9jx1lBaFft+r4Mt2jK0Yhp41XlRAihzPxHNCg==",
|
||||
"requires": {
|
||||
"big.js": "^5.2.2",
|
||||
"emojis-list": "^3.0.0",
|
||||
"json5": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"lodash": {
|
||||
"version": "4.17.21",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
@ -559,6 +618,11 @@
|
||||
"brace-expansion": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
|
||||
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="
|
||||
},
|
||||
"ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
@ -569,6 +633,11 @@
|
||||
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
|
||||
"integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="
|
||||
},
|
||||
"node-ensure": {
|
||||
"version": "0.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz",
|
||||
"integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw=="
|
||||
},
|
||||
"normalize-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
|
||||
@ -595,6 +664,11 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"opencv-wasm": {
|
||||
"version": "4.3.0-10",
|
||||
"resolved": "https://registry.npmjs.org/opencv-wasm/-/opencv-wasm-4.3.0-10.tgz",
|
||||
"integrity": "sha512-EWmWLUzp2suoc6N44Y4ouWT85QwvShx23Q430R+lp6NyS828bjQn6mCgA3NJ6Z/S59aaTeeu+RhqPQIJIYld1w=="
|
||||
},
|
||||
"pako": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
|
||||
@ -621,6 +695,15 @@
|
||||
"tslib": "^1.11.1"
|
||||
}
|
||||
},
|
||||
"pdfjs-dist": {
|
||||
"version": "2.0.943",
|
||||
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.0.943.tgz",
|
||||
"integrity": "sha512-iLhNcm4XceTHRaSU5o22ZGCm4YpuW5+rf4+BJFH/feBhMQLbCGBry+Jet8Q419QDI4qgARaIQzXuiNrsNWS8Yw==",
|
||||
"requires": {
|
||||
"node-ensure": "^0.0.0",
|
||||
"worker-loader": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"process-nextick-args": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
||||
@ -635,6 +718,11 @@
|
||||
"ipaddr.js": "1.9.1"
|
||||
}
|
||||
},
|
||||
"punycode": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
||||
"integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA=="
|
||||
},
|
||||
"qs": {
|
||||
"version": "6.11.0",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
|
||||
@ -692,6 +780,15 @@
|
||||
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
|
||||
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
|
||||
},
|
||||
"schema-utils": {
|
||||
"version": "0.4.7",
|
||||
"resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz",
|
||||
"integrity": "sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==",
|
||||
"requires": {
|
||||
"ajv": "^6.1.0",
|
||||
"ajv-keywords": "^3.1.0"
|
||||
}
|
||||
},
|
||||
"send": {
|
||||
"version": "0.18.0",
|
||||
"resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
|
||||
@ -808,6 +905,14 @@
|
||||
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
|
||||
"integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="
|
||||
},
|
||||
"uri-js": {
|
||||
"version": "4.4.1",
|
||||
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
|
||||
"integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
|
||||
"requires": {
|
||||
"punycode": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"util-deprecate": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||
@ -823,6 +928,15 @@
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
"integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="
|
||||
},
|
||||
"worker-loader": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/worker-loader/-/worker-loader-2.0.0.tgz",
|
||||
"integrity": "sha512-tnvNp4K3KQOpfRnD20m8xltE3eWh89Ye+5oj7wXEEHKac1P4oZ6p9oTj8/8ExqoSBnk9nu5Pr4nKfQ1hn2APJw==",
|
||||
"requires": {
|
||||
"loader-utils": "^1.0.0",
|
||||
"schema-utils": "^0.4.0"
|
||||
}
|
||||
},
|
||||
"wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
|
@ -13,7 +13,9 @@
|
||||
"archiver": "^6.0.1",
|
||||
"express": "^4.18.2",
|
||||
"express-fileupload": "^1.4.1",
|
||||
"pdf-lib": "^1.17.1"
|
||||
"opencv-wasm": "^4.3.0-10",
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pdfjs-dist": "^2.0.943"
|
||||
},
|
||||
"type": "module"
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
// PDFLib gets importet via index.html script-tag
|
||||
// TODO: OpenCV
|
||||
import * as pdfcpuWraopper from "./wasm/pdfcpu-wrapper-browser.js";
|
||||
|
||||
import { extractPages as dependantExtractPages } from "./functions/extractPages.js";
|
||||
@ -10,6 +11,7 @@ import { scalePage as dependantScalePage } from './functions/scalePage.js';
|
||||
import { splitPDF as dependantSplitPDF } from './functions/splitPDF.js';
|
||||
import { editMetadata as dependantEditMetadata} from "./functions/editMetadata.js";
|
||||
import { organizePages as dependantOrganizePages} from "./functions/organizePages.js";
|
||||
import { removeBlankPages as dependantRemoveBlankPages} from "./functions/removeBlankPages.js";
|
||||
|
||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||
@ -45,4 +47,8 @@ export async function editMetadata(snapshot, metadata) {
|
||||
|
||||
export async function organizePages(snapshot, operation, customOrderString) {
|
||||
return dependantOrganizePages(snapshot, operation, customOrderString, PDFLib);
|
||||
}
|
||||
|
||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFLib, OpenCV);
|
||||
}
|
67
public/functions/removeBlankPages.js
Normal file
67
public/functions/removeBlankPages.js
Normal file
@ -0,0 +1,67 @@
|
||||
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV) {
|
||||
|
||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||
|
||||
const emptyPages = [];
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const page = await pdfDoc.getPage(i);
|
||||
|
||||
if(!await hasText(page)) {
|
||||
console.log("Found text on Page, page is not empty");
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!await areImagesBlank(page, whiteThreashold)) {
|
||||
console.log("Found image on Page, page is not empty");
|
||||
continue;
|
||||
}
|
||||
|
||||
emptyPages.push[i];
|
||||
}
|
||||
|
||||
console.log(emptyPages);
|
||||
|
||||
// TODO: Remove emptyPages using pdflib
|
||||
// return pdf;
|
||||
|
||||
async function areImagesBlank(page, whiteThreashold) {
|
||||
const ops = await page.getOperatorList();
|
||||
|
||||
for (var j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image = page.objs.get(ops.argsArray[j][0]);
|
||||
if(image.data) {
|
||||
return isImageBlank(image, whiteThreashold);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function hasText(page) {
|
||||
const textContent = await page.getTextContent();
|
||||
return textContent.items.length === 0;
|
||||
}
|
||||
|
||||
async function isImageBlank(image, threshold) {
|
||||
const src = new OpenCV.cv.Mat(image.width, image.height, OpenCV.cv.CV_8UC4);
|
||||
src.data.set(image.data);
|
||||
// Convert the image to grayscale
|
||||
const gray = new OpenCV.cv.Mat();
|
||||
OpenCV.cv.cvtColor(src, gray, OpenCV.cv.COLOR_RGBA2GRAY);
|
||||
|
||||
// Calculate the mean value of the grayscale image
|
||||
const meanValue = OpenCV.cv.mean(gray);
|
||||
|
||||
// Free memory
|
||||
src.delete();
|
||||
gray.delete();
|
||||
|
||||
// Check if the mean value is below the threshold
|
||||
if (meanValue[0] <= threshold) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
@ -109,6 +109,12 @@ export async function * traverseOperations(operations, input, Functions) {
|
||||
input.buffer = await Functions.organizePages(input.buffer, operation.values["operation"], operation.values["customOrderString"]);
|
||||
});
|
||||
break;
|
||||
case "removeBlankPages":
|
||||
yield* nToN(input, operation, async (input) => {
|
||||
input.fileName += "_removedBlanks";
|
||||
input.buffer = await Functions.removeBlankPages(input.buffer, operation.values["whiteThreashold"]);
|
||||
});
|
||||
break;
|
||||
default:
|
||||
throw new Error(`${operation.type} not implemented yet.`);
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user