diff --git a/frontend/src/services/pdfMetadataService.ts b/frontend/src/services/pdfMetadataService.ts index 4aaf2638b..74be20f08 100644 --- a/frontend/src/services/pdfMetadataService.ts +++ b/frontend/src/services/pdfMetadataService.ts @@ -1,6 +1,7 @@ import { pdfWorkerManager } from './pdfWorkerManager'; import { FileAnalyzer } from './fileAnalyzer'; import { TrappedStatus, CustomMetadataEntry, ExtractedPDFMetadata } from '../types/metadata'; +import { PDFDocumentProxy } from 'pdfjs-dist/types/src/display/api'; export interface MetadataExtractionResult { success: true; @@ -18,8 +19,8 @@ export type MetadataExtractionResponse = MetadataExtractionResult | MetadataExtr * Utility to format PDF date strings to required format (yyyy/MM/dd HH:mm:ss) * Handles PDF date format: "D:YYYYMMDDHHmmSSOHH'mm'" or standard date strings */ -function formatPDFDate(dateString: unknown): string { - if (!dateString || typeof dateString !== 'string') { +function formatPDFDate(dateString: string): string { + if (!dateString) { return ''; } @@ -80,14 +81,14 @@ function convertTrappedStatus(trapped: unknown): TrappedStatus { * Extract custom metadata fields from PDF.js info object * Custom metadata is nested under the "Custom" key */ -function extractCustomMetadata(info: Record): CustomMetadataEntry[] { +function extractCustomMetadata(custom: unknown): CustomMetadataEntry[] { const customMetadata: CustomMetadataEntry[] = []; let customIdCounter = 1; // Check if there's a Custom object containing the custom metadata - if (info.Custom && typeof info.Custom === 'object' && info.Custom !== null) { - const customObj = info.Custom as Record; + if (typeof custom === 'object' && custom !== null) { + const customObj = custom as Record; Object.entries(customObj).forEach(([key, value]) => { if (value != null && value !== '') { @@ -107,7 +108,7 @@ function extractCustomMetadata(info: Record): CustomMetadataEnt /** * Safely cleanup PDF document with error handling */ -function cleanupPdfDocument(pdfDoc: any): void { +function cleanupPdfDocument(pdfDoc: PDFDocumentProxy | null): void { if (pdfDoc) { try { pdfWorkerManager.destroyDocument(pdfDoc); @@ -117,6 +118,14 @@ function cleanupPdfDocument(pdfDoc: any): void { } } +function getStringMetadata(info: Record, key: string): string { + if (typeof info[key] === 'string') { + return info[key]; + } else { + return ''; + } +} + /** * Extract all metadata from a PDF file * Returns a result object with success/error state @@ -131,9 +140,9 @@ export async function extractPDFMetadata(file: File): Promise; // Safely extract metadata with proper type checking const extractedMetadata: ExtractedPDFMetadata = { - title: typeof info.Title === 'string' ? info.Title : '', - author: typeof info.Author === 'string' ? info.Author : '', - subject: typeof info.Subject === 'string' ? info.Subject : '', - keywords: typeof info.Keywords === 'string' ? info.Keywords : '', - creator: typeof info.Creator === 'string' ? info.Creator : '', - producer: typeof info.Producer === 'string' ? info.Producer : '', - creationDate: formatPDFDate(info.CreationDate), - modificationDate: formatPDFDate(info.ModDate), + title: getStringMetadata(info, 'Title'), + author: getStringMetadata(info, 'Author'), + subject: getStringMetadata(info, 'Subject'), + keywords: getStringMetadata(info, 'Keywords'), + creator: getStringMetadata(info, 'Creator'), + producer: getStringMetadata(info, 'Producer'), + creationDate: formatPDFDate(getStringMetadata(info, 'CreationDate')), + modificationDate: formatPDFDate(getStringMetadata(info, 'ModDate')), trapped: convertTrappedStatus(info.Trapped), - customMetadata: extractCustomMetadata(info) + customMetadata: extractCustomMetadata(info.Custom), }; cleanupPdfDocument(pdfDoc); diff --git a/frontend/src/services/pdfWorkerManager.ts b/frontend/src/services/pdfWorkerManager.ts index dda434049..9db0044a0 100644 --- a/frontend/src/services/pdfWorkerManager.ts +++ b/frontend/src/services/pdfWorkerManager.ts @@ -6,11 +6,12 @@ */ import * as pdfjsLib from 'pdfjs-dist'; +import { PDFDocumentProxy } from 'pdfjs-dist/types/src/display/api'; const { getDocument, GlobalWorkerOptions } = pdfjsLib; class PDFWorkerManager { private static instance: PDFWorkerManager; - private activeDocuments = new Set(); + private activeDocuments = new Set(); private workerCount = 0; private maxWorkers = 10; // Limit concurrent workers private isInitialized = false; @@ -48,7 +49,7 @@ class PDFWorkerManager { stopAtErrors?: boolean; verbosity?: number; } = {} - ): Promise { + ): Promise { // Wait if we've hit the worker limit if (this.activeDocuments.size >= this.maxWorkers) { await this.waitForAvailableWorker(); @@ -104,7 +105,7 @@ class PDFWorkerManager { /** * Properly destroy a PDF document and clean up resources */ - destroyDocument(pdf: any): void { + destroyDocument(pdf: PDFDocumentProxy): void { if (this.activeDocuments.has(pdf)) { try { pdf.destroy();