mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-09-18 09:29:24 +00:00
Properly type PDFs
This commit is contained in:
parent
e31e6461e4
commit
6a1caf0904
@ -1,6 +1,7 @@
|
|||||||
import { pdfWorkerManager } from './pdfWorkerManager';
|
import { pdfWorkerManager } from './pdfWorkerManager';
|
||||||
import { FileAnalyzer } from './fileAnalyzer';
|
import { FileAnalyzer } from './fileAnalyzer';
|
||||||
import { TrappedStatus, CustomMetadataEntry, ExtractedPDFMetadata } from '../types/metadata';
|
import { TrappedStatus, CustomMetadataEntry, ExtractedPDFMetadata } from '../types/metadata';
|
||||||
|
import { PDFDocumentProxy } from 'pdfjs-dist/types/src/display/api';
|
||||||
|
|
||||||
export interface MetadataExtractionResult {
|
export interface MetadataExtractionResult {
|
||||||
success: true;
|
success: true;
|
||||||
@ -18,8 +19,8 @@ export type MetadataExtractionResponse = MetadataExtractionResult | MetadataExtr
|
|||||||
* Utility to format PDF date strings to required format (yyyy/MM/dd HH:mm:ss)
|
* Utility to format PDF date strings to required format (yyyy/MM/dd HH:mm:ss)
|
||||||
* Handles PDF date format: "D:YYYYMMDDHHmmSSOHH'mm'" or standard date strings
|
* Handles PDF date format: "D:YYYYMMDDHHmmSSOHH'mm'" or standard date strings
|
||||||
*/
|
*/
|
||||||
function formatPDFDate(dateString: unknown): string {
|
function formatPDFDate(dateString: string): string {
|
||||||
if (!dateString || typeof dateString !== 'string') {
|
if (!dateString) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,14 +81,14 @@ function convertTrappedStatus(trapped: unknown): TrappedStatus {
|
|||||||
* Extract custom metadata fields from PDF.js info object
|
* Extract custom metadata fields from PDF.js info object
|
||||||
* Custom metadata is nested under the "Custom" key
|
* Custom metadata is nested under the "Custom" key
|
||||||
*/
|
*/
|
||||||
function extractCustomMetadata(info: Record<string, unknown>): CustomMetadataEntry[] {
|
function extractCustomMetadata(custom: unknown): CustomMetadataEntry[] {
|
||||||
const customMetadata: CustomMetadataEntry[] = [];
|
const customMetadata: CustomMetadataEntry[] = [];
|
||||||
let customIdCounter = 1;
|
let customIdCounter = 1;
|
||||||
|
|
||||||
|
|
||||||
// Check if there's a Custom object containing the custom metadata
|
// Check if there's a Custom object containing the custom metadata
|
||||||
if (info.Custom && typeof info.Custom === 'object' && info.Custom !== null) {
|
if (typeof custom === 'object' && custom !== null) {
|
||||||
const customObj = info.Custom as Record<string, unknown>;
|
const customObj = custom as Record<string, unknown>;
|
||||||
|
|
||||||
Object.entries(customObj).forEach(([key, value]) => {
|
Object.entries(customObj).forEach(([key, value]) => {
|
||||||
if (value != null && value !== '') {
|
if (value != null && value !== '') {
|
||||||
@ -107,7 +108,7 @@ function extractCustomMetadata(info: Record<string, unknown>): CustomMetadataEnt
|
|||||||
/**
|
/**
|
||||||
* Safely cleanup PDF document with error handling
|
* Safely cleanup PDF document with error handling
|
||||||
*/
|
*/
|
||||||
function cleanupPdfDocument(pdfDoc: any): void {
|
function cleanupPdfDocument(pdfDoc: PDFDocumentProxy | null): void {
|
||||||
if (pdfDoc) {
|
if (pdfDoc) {
|
||||||
try {
|
try {
|
||||||
pdfWorkerManager.destroyDocument(pdfDoc);
|
pdfWorkerManager.destroyDocument(pdfDoc);
|
||||||
@ -117,6 +118,14 @@ function cleanupPdfDocument(pdfDoc: any): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getStringMetadata(info: Record<string, unknown>, key: string): string {
|
||||||
|
if (typeof info[key] === 'string') {
|
||||||
|
return info[key];
|
||||||
|
} else {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract all metadata from a PDF file
|
* Extract all metadata from a PDF file
|
||||||
* Returns a result object with success/error state
|
* Returns a result object with success/error state
|
||||||
@ -131,9 +140,9 @@ export async function extractPDFMetadata(file: File): Promise<MetadataExtraction
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let pdfDoc: any = null;
|
let pdfDoc: PDFDocumentProxy | null = null;
|
||||||
let arrayBuffer: ArrayBuffer;
|
let arrayBuffer: ArrayBuffer;
|
||||||
let metadata: any;
|
let metadata;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
arrayBuffer = await file.arrayBuffer();
|
arrayBuffer = await file.arrayBuffer();
|
||||||
@ -151,20 +160,20 @@ export async function extractPDFMetadata(file: File): Promise<MetadataExtraction
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const info = metadata.info || {};
|
const info = metadata.info as Record<string, unknown>;
|
||||||
|
|
||||||
// Safely extract metadata with proper type checking
|
// Safely extract metadata with proper type checking
|
||||||
const extractedMetadata: ExtractedPDFMetadata = {
|
const extractedMetadata: ExtractedPDFMetadata = {
|
||||||
title: typeof info.Title === 'string' ? info.Title : '',
|
title: getStringMetadata(info, 'Title'),
|
||||||
author: typeof info.Author === 'string' ? info.Author : '',
|
author: getStringMetadata(info, 'Author'),
|
||||||
subject: typeof info.Subject === 'string' ? info.Subject : '',
|
subject: getStringMetadata(info, 'Subject'),
|
||||||
keywords: typeof info.Keywords === 'string' ? info.Keywords : '',
|
keywords: getStringMetadata(info, 'Keywords'),
|
||||||
creator: typeof info.Creator === 'string' ? info.Creator : '',
|
creator: getStringMetadata(info, 'Creator'),
|
||||||
producer: typeof info.Producer === 'string' ? info.Producer : '',
|
producer: getStringMetadata(info, 'Producer'),
|
||||||
creationDate: formatPDFDate(info.CreationDate),
|
creationDate: formatPDFDate(getStringMetadata(info, 'CreationDate')),
|
||||||
modificationDate: formatPDFDate(info.ModDate),
|
modificationDate: formatPDFDate(getStringMetadata(info, 'ModDate')),
|
||||||
trapped: convertTrappedStatus(info.Trapped),
|
trapped: convertTrappedStatus(info.Trapped),
|
||||||
customMetadata: extractCustomMetadata(info)
|
customMetadata: extractCustomMetadata(info.Custom),
|
||||||
};
|
};
|
||||||
|
|
||||||
cleanupPdfDocument(pdfDoc);
|
cleanupPdfDocument(pdfDoc);
|
||||||
|
@ -6,11 +6,12 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import * as pdfjsLib from 'pdfjs-dist';
|
import * as pdfjsLib from 'pdfjs-dist';
|
||||||
|
import { PDFDocumentProxy } from 'pdfjs-dist/types/src/display/api';
|
||||||
const { getDocument, GlobalWorkerOptions } = pdfjsLib;
|
const { getDocument, GlobalWorkerOptions } = pdfjsLib;
|
||||||
|
|
||||||
class PDFWorkerManager {
|
class PDFWorkerManager {
|
||||||
private static instance: PDFWorkerManager;
|
private static instance: PDFWorkerManager;
|
||||||
private activeDocuments = new Set<any>();
|
private activeDocuments = new Set<PDFDocumentProxy>();
|
||||||
private workerCount = 0;
|
private workerCount = 0;
|
||||||
private maxWorkers = 10; // Limit concurrent workers
|
private maxWorkers = 10; // Limit concurrent workers
|
||||||
private isInitialized = false;
|
private isInitialized = false;
|
||||||
@ -48,7 +49,7 @@ class PDFWorkerManager {
|
|||||||
stopAtErrors?: boolean;
|
stopAtErrors?: boolean;
|
||||||
verbosity?: number;
|
verbosity?: number;
|
||||||
} = {}
|
} = {}
|
||||||
): Promise<any> {
|
): Promise<PDFDocumentProxy> {
|
||||||
// Wait if we've hit the worker limit
|
// Wait if we've hit the worker limit
|
||||||
if (this.activeDocuments.size >= this.maxWorkers) {
|
if (this.activeDocuments.size >= this.maxWorkers) {
|
||||||
await this.waitForAvailableWorker();
|
await this.waitForAvailableWorker();
|
||||||
@ -104,7 +105,7 @@ class PDFWorkerManager {
|
|||||||
/**
|
/**
|
||||||
* Properly destroy a PDF document and clean up resources
|
* Properly destroy a PDF document and clean up resources
|
||||||
*/
|
*/
|
||||||
destroyDocument(pdf: any): void {
|
destroyDocument(pdf: PDFDocumentProxy): void {
|
||||||
if (this.activeDocuments.has(pdf)) {
|
if (this.activeDocuments.has(pdf)) {
|
||||||
try {
|
try {
|
||||||
pdf.destroy();
|
pdf.destroy();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user