mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-06-06 18:30:57 +00:00
QRCode Detection working.
This commit is contained in:
parent
4e8d8e3d53
commit
dd14b3a773
@ -1,7 +1,9 @@
|
|||||||
import PDFLib from 'pdf-lib';
|
import PDFLib from 'pdf-lib';
|
||||||
import OpenCV from 'opencv-wasm';
|
|
||||||
import PDFJS from "pdfjs-dist";
|
import PDFJS from "pdfjs-dist";
|
||||||
|
|
||||||
|
delete global.crypto; // TODO: I hate to do this, but the new node version forces me to, if anyone finds a better solution, please tell me!
|
||||||
import * as pdfcpuWraopper from "./public/wasm/pdfcpu-wrapper-node.js";
|
import * as pdfcpuWraopper from "./public/wasm/pdfcpu-wrapper-node.js";
|
||||||
|
import OpenCV from 'opencv-wasm';
|
||||||
|
|
||||||
import { extractPages as dependantExtractPages } from "./public/functions/extractPages.js";
|
import { extractPages as dependantExtractPages } from "./public/functions/extractPages.js";
|
||||||
import { impose as dependantImpose } from './public/functions/impose.js';
|
import { impose as dependantImpose } from './public/functions/impose.js';
|
||||||
@ -13,6 +15,7 @@ import { splitPDF as dependantSplitPDF } from './public/functions/splitPDF.js';
|
|||||||
import { editMetadata as dependantEditMetadata } from './public/functions/editMetadata.js';
|
import { editMetadata as dependantEditMetadata } from './public/functions/editMetadata.js';
|
||||||
import { organizePages as dependantOrganizePages } from './public/functions/organizePages.js';
|
import { organizePages as dependantOrganizePages } from './public/functions/organizePages.js';
|
||||||
import { removeBlankPages as dependantRemoveBlankPages} from './public/functions/removeBlankPages.js';
|
import { removeBlankPages as dependantRemoveBlankPages} from './public/functions/removeBlankPages.js';
|
||||||
|
import { splitOn as dependantSplitOn } from "./public/functions/splitOn.js";
|
||||||
|
|
||||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||||
@ -52,4 +55,8 @@ export async function organizePages(snapshot, operation, customOrderString) {
|
|||||||
|
|
||||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib);
|
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function splitOn(snapshot, type, whiteThreashold) {
|
||||||
|
return dependantSplitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib);
|
||||||
}
|
}
|
5010
package-lock.json
generated
5010
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@
|
|||||||
"archiver": "^6.0.1",
|
"archiver": "^6.0.1",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
"express-fileupload": "^1.4.1",
|
"express-fileupload": "^1.4.1",
|
||||||
|
"jsqr": "^1.4.0",
|
||||||
"opencv-wasm": "^4.3.0-10",
|
"opencv-wasm": "^4.3.0-10",
|
||||||
"pdf-lib": "^1.17.1",
|
"pdf-lib": "^1.17.1",
|
||||||
"pdfjs-dist": "^2.0.943"
|
"pdfjs-dist": "^2.0.943"
|
||||||
|
@ -13,6 +13,7 @@ import { splitPDF as dependantSplitPDF } from './functions/splitPDF.js';
|
|||||||
import { editMetadata as dependantEditMetadata} from "./functions/editMetadata.js";
|
import { editMetadata as dependantEditMetadata} from "./functions/editMetadata.js";
|
||||||
import { organizePages as dependantOrganizePages} from "./functions/organizePages.js";
|
import { organizePages as dependantOrganizePages} from "./functions/organizePages.js";
|
||||||
import { removeBlankPages as dependantRemoveBlankPages} from "./functions/removeBlankPages.js";
|
import { removeBlankPages as dependantRemoveBlankPages} from "./functions/removeBlankPages.js";
|
||||||
|
import { splitOn as dependantSplitOn } from "./functions/splitOn.js";
|
||||||
|
|
||||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||||
@ -52,4 +53,8 @@ export async function organizePages(snapshot, operation, customOrderString) {
|
|||||||
|
|
||||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
|
return dependantRemoveBlankPages(snapshot, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function splitOn(snapshot, type, whiteThreashold) {
|
||||||
|
return dependantSplitOn(snapshot, type, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
|
||||||
}
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
import { getImagesOnPage } from "./getImagesOnPage.js";
|
||||||
|
|
||||||
export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV) {
|
export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV) {
|
||||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||||
|
|
||||||
@ -27,15 +29,10 @@ export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV)
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function areImagesBlank(page, threshold) {
|
async function areImagesBlank(page, threshold) {
|
||||||
const ops = await page.getOperatorList();
|
const images = getImagesOnPage(page, PDFJS);
|
||||||
|
for (const image of images) {
|
||||||
for (var j=0; j < ops.fnArray.length; j++) {
|
if(!isImageBlank(image, threshold))
|
||||||
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
return false;
|
||||||
const image = page.objs.get(ops.argsArray[j][0]);
|
|
||||||
if(image.data) {
|
|
||||||
return isImageBlank(image, threshold);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
11
public/functions/shared/getImagesOnPage.js
Normal file
11
public/functions/shared/getImagesOnPage.js
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
export async function getImagesOnPage(page, PDFJS) {
|
||||||
|
const ops = await page.getOperatorList();
|
||||||
|
const images = [];
|
||||||
|
for (var j=0; j < ops.fnArray.length; j++) {
|
||||||
|
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||||
|
const image = page.objs.get(ops.argsArray[j][0]);
|
||||||
|
images.push(image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return images;
|
||||||
|
}
|
@ -1,4 +1,7 @@
|
|||||||
import { detectEmptyPages } from "./shared/detectEmptyPages";
|
import { detectEmptyPages } from "./shared/detectEmptyPages.js";
|
||||||
|
import { getImagesOnPage } from "./shared/getImagesOnPage.js";
|
||||||
|
|
||||||
|
import jsQR from "jsQR";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {"BAR_CODE"|"QR_CODE"|"BLANK_PAGE"} SplitType
|
* @typedef {"BAR_CODE"|"QR_CODE"|"BLANK_PAGE"} SplitType
|
||||||
@ -9,24 +12,22 @@ import { detectEmptyPages } from "./shared/detectEmptyPages";
|
|||||||
* @param {Uint16Array} snapshot
|
* @param {Uint16Array} snapshot
|
||||||
* @param {SplitType} type
|
* @param {SplitType} type
|
||||||
* @param {} PDFJS
|
* @param {} PDFJS
|
||||||
* @param {} OpenCV
|
* @param {import('opencv-wasm')} OpenCV
|
||||||
* @param {} PDFLib
|
* @param {} PDFLib
|
||||||
* @param {} QRCode
|
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib, QRCode) {
|
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib) {
|
||||||
|
|
||||||
let splitAtPages = [];
|
let splitAtPages = [];
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case "BAR_CODE":
|
case "BAR_CODE":
|
||||||
// TODO: Implement
|
// TODO: Implement
|
||||||
throw new Error("This split-type has not been implemented yet")
|
throw new Error("This split-type has not been implemented yet");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "QR_CODE":
|
case "QR_CODE":
|
||||||
// TODO: Implement
|
splitAtPages = await getPagesWithQRCode(snapshot);
|
||||||
throw new Error("This split-type has not been implemented yet")
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "BLANK_PAGE":
|
case "BLANK_PAGE":
|
||||||
@ -45,4 +46,49 @@ export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PD
|
|||||||
// TODO: Remove detected Pages & Split
|
// TODO: Remove detected Pages & Split
|
||||||
|
|
||||||
return pdfDoc.save();
|
return pdfDoc.save();
|
||||||
|
|
||||||
|
async function getPagesWithQRCode(snapshot) {
|
||||||
|
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||||
|
|
||||||
|
const pagesWithQR = [];
|
||||||
|
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||||
|
const page = await pdfDoc.getPage(i);
|
||||||
|
console.log("Checking page " + i);
|
||||||
|
|
||||||
|
const images = await getImagesOnPage(page, PDFJS);
|
||||||
|
|
||||||
|
for (const image of images) {
|
||||||
|
const data = await checkForQROnImage(image);
|
||||||
|
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
||||||
|
pagesWithQR.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pagesWithQR;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkForQROnImage(image) {
|
||||||
|
console.log(image.data, image.width, image.height, image.width * image.height * 4);
|
||||||
|
|
||||||
|
// TODO: There is an issue with the jsQR package, and the package seems to be stale, we could create a fork and fix the issue (The package expects rgba but sometimes we have rgb). In the meanwhile we just force rgba:
|
||||||
|
if(image.data.length == image.width * image.height * 3) {
|
||||||
|
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
||||||
|
|
||||||
|
// Iterate through the original array and add an alpha channel
|
||||||
|
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
||||||
|
tmpArray[j] = image.data[i]; // Red channel
|
||||||
|
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
||||||
|
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
||||||
|
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
||||||
|
}
|
||||||
|
|
||||||
|
image.data = tmpArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
const code = jsQR(image.data, image.width, image.height);
|
||||||
|
if(code)
|
||||||
|
return code.data;
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
};
|
};
|
@ -115,6 +115,12 @@ export async function * traverseOperations(operations, input, Functions) {
|
|||||||
input.buffer = await Functions.removeBlankPages(input.buffer, operation.values["whiteThreashold"]);
|
input.buffer = await Functions.removeBlankPages(input.buffer, operation.values["whiteThreashold"]);
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
case "splitOn":
|
||||||
|
yield* oneToN(input, operation, async (input) => {
|
||||||
|
input.fileName += "_split";
|
||||||
|
input.buffer = await Functions.splitOn(input.buffer, operation.values["type"], operation.values["whiteThreashold"]);
|
||||||
|
});
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Error(`${operation.type} not implemented yet.`);
|
throw new Error(`${operation.type} not implemented yet.`);
|
||||||
break;
|
break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user