mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-05-23 18:32:01 +00:00
Remove blank pages done, Updated README.md
This commit is contained in:
parent
50a1bd8082
commit
f78a64d545
@ -105,7 +105,7 @@ Current functions of spdf and their progress in this repo.
|
|||||||
|
|
||||||
| Status | Feature | Description |
|
| Status | Feature | Description |
|
||||||
| ------ | ------------------ | ----------- |
|
| ------ | ------------------ | ----------- |
|
||||||
| 🚧 | Remove Blank Pages | |
|
| ✔️ | Remove Blank Pages | |
|
||||||
| 🚧 | Auto Split Pages | |
|
| 🚧 | Auto Split Pages | |
|
||||||
|
|
||||||
| Status | Feature | Description |
|
| Status | Feature | Description |
|
||||||
|
@ -1,30 +1,42 @@
|
|||||||
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
|
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
|
||||||
|
|
||||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
const emptyPages = await findEmptyPages(snapshot);
|
||||||
|
|
||||||
const emptyPages = [];
|
console.log("Empty Pages: ", emptyPages);
|
||||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
|
||||||
const page = await pdfDoc.getPage(i);
|
|
||||||
console.log("Checking images");
|
|
||||||
|
|
||||||
if(!await hasText(page)) {
|
const pdfDoc = await PDFLib.PDFDocument.load(snapshot);
|
||||||
console.log("Found text on Page, page is not empty");
|
|
||||||
continue;
|
// Reverse the array before looping in order to keep the indecies at the right pages. E.g. if you delete page 5 page 7 becomes page 6, if you delete page 7 page 5 remains page 5
|
||||||
|
emptyPages.reverse().forEach(pageIndex => {
|
||||||
|
pdfDoc.removePage(pageIndex);
|
||||||
|
})
|
||||||
|
|
||||||
|
return pdfDoc.save();
|
||||||
|
|
||||||
|
async function findEmptyPages(snapshot) {
|
||||||
|
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||||
|
|
||||||
|
const emptyPages = [];
|
||||||
|
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||||
|
const page = await pdfDoc.getPage(i);
|
||||||
|
console.log("Checking page " + i);
|
||||||
|
|
||||||
|
if(!await hasText(page)) {
|
||||||
|
console.log(`Found text on Page ${i}, page is not empty`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!await areImagesBlank(page, whiteThreashold)) {
|
||||||
|
console.log(`Found non white image on Page ${i}, page is not empty`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Page ${i} is empty.`);
|
||||||
|
emptyPages.push(i - 1);
|
||||||
}
|
}
|
||||||
|
return emptyPages;
|
||||||
if(!await areImagesBlank(page, whiteThreashold)) {
|
|
||||||
console.log("Found image on Page, page is not empty");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
emptyPages.push[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(emptyPages);
|
|
||||||
|
|
||||||
// TODO: Remove emptyPages using pdflib
|
|
||||||
// return pdf;
|
|
||||||
|
|
||||||
async function areImagesBlank(page, whiteThreashold) {
|
async function areImagesBlank(page, whiteThreashold) {
|
||||||
const ops = await page.getOperatorList();
|
const ops = await page.getOperatorList();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user