This commit is contained in:
James Brunton 2025-09-12 11:34:45 +01:00
parent cef61d3733
commit e31e6461e4
3 changed files with 86 additions and 86 deletions

View File

@ -26,4 +26,4 @@ const DeleteAllStep = ({
); );
}; };
export default DeleteAllStep; export default DeleteAllStep;

View File

@ -1,5 +1,5 @@
import { useState, useEffect, useRef } from "react"; import { useState, useEffect, useRef } from "react";
import { PDFMetadataService } from "../../../services/pdfMetadataService"; import { extractPDFMetadata } from "../../../services/pdfMetadataService";
import { useSelectedFiles } from "../../../contexts/file/fileHooks"; import { useSelectedFiles } from "../../../contexts/file/fileHooks";
import { ChangeMetadataParameters } from "./useChangeMetadataParameters"; import { ChangeMetadataParameters } from "./useChangeMetadataParameters";
@ -34,31 +34,30 @@ export const useMetadataExtraction = (params: MetadataExtractionParams) => {
} }
setIsExtractingMetadata(true); setIsExtractingMetadata(true);
try {
const result = await PDFMetadataService.extractMetadata(firstFile);
if (result.success) { const result = await extractPDFMetadata(firstFile);
const metadata = result.metadata;
// Pre-populate all fields with extracted metadata if (result.success) {
params.updateParameter('title', metadata.title); const metadata = result.metadata;
params.updateParameter('author', metadata.author);
params.updateParameter('subject', metadata.subject);
params.updateParameter('keywords', metadata.keywords);
params.updateParameter('creator', metadata.creator);
params.updateParameter('producer', metadata.producer);
params.updateParameter('creationDate', metadata.creationDate);
params.updateParameter('modificationDate', metadata.modificationDate);
params.updateParameter('trapped', metadata.trapped);
params.updateParameter('customMetadata', metadata.customMetadata);
setHasExtractedMetadata(true); // Pre-populate all fields with extracted metadata
} params.updateParameter('title', metadata.title);
} catch (error) { params.updateParameter('author', metadata.author);
console.warn('Failed to extract metadata:', error); params.updateParameter('subject', metadata.subject);
} finally { params.updateParameter('keywords', metadata.keywords);
setIsExtractingMetadata(false); params.updateParameter('creator', metadata.creator);
params.updateParameter('producer', metadata.producer);
params.updateParameter('creationDate', metadata.creationDate);
params.updateParameter('modificationDate', metadata.modificationDate);
params.updateParameter('trapped', metadata.trapped);
params.updateParameter('customMetadata', metadata.customMetadata);
setHasExtractedMetadata(true);
} else {
console.warn('Failed to extract metadata:', result.error);
} }
setIsExtractingMetadata(false);
}; };
extractMetadata(); extractMetadata();

View File

@ -105,71 +105,72 @@ function extractCustomMetadata(info: Record<string, unknown>): CustomMetadataEnt
} }
/** /**
* Service to extract metadata from PDF files using PDF.js * Safely cleanup PDF document with error handling
*/ */
export class PDFMetadataService { function cleanupPdfDocument(pdfDoc: any): void {
/** if (pdfDoc) {
* Extract all metadata from a PDF file
* Returns a result object with success/error state
*/
static async extractMetadata(file: File): Promise<MetadataExtractionResponse> {
// Use existing PDF validation
const isValidPDF = await FileAnalyzer.isValidPDF(file);
if (!isValidPDF) {
return {
success: false,
error: 'File is not a valid PDF'
};
}
let pdfDoc: any = null;
try { try {
const arrayBuffer = await file.arrayBuffer(); pdfWorkerManager.destroyDocument(pdfDoc);
pdfDoc = await pdfWorkerManager.createDocument(arrayBuffer, { } catch (cleanupError) {
disableAutoFetch: true, console.warn('Failed to cleanup PDF document:', cleanupError);
disableStream: true
});
const metadata = await pdfDoc.getMetadata();
const info = metadata.info || {};
// Safely extract metadata with proper type checking
const extractedMetadata: ExtractedPDFMetadata = {
title: typeof info.Title === 'string' ? info.Title : '',
author: typeof info.Author === 'string' ? info.Author : '',
subject: typeof info.Subject === 'string' ? info.Subject : '',
keywords: typeof info.Keywords === 'string' ? info.Keywords : '',
creator: typeof info.Creator === 'string' ? info.Creator : '',
producer: typeof info.Producer === 'string' ? info.Producer : '',
creationDate: formatPDFDate(info.CreationDate),
modificationDate: formatPDFDate(info.ModDate),
trapped: convertTrappedStatus(info.Trapped),
customMetadata: extractCustomMetadata(info)
};
return {
success: true,
metadata: extractedMetadata
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
return {
success: false,
error: `Failed to extract PDF metadata: ${errorMessage}`
};
} finally {
// Ensure cleanup even if extraction fails
if (pdfDoc) {
try {
pdfWorkerManager.destroyDocument(pdfDoc);
} catch (cleanupError) {
console.warn('Failed to cleanup PDF document:', cleanupError);
}
}
} }
} }
} }
/**
* Extract all metadata from a PDF file
* Returns a result object with success/error state
*/
export async function extractPDFMetadata(file: File): Promise<MetadataExtractionResponse> {
// Use existing PDF validation
const isValidPDF = await FileAnalyzer.isValidPDF(file);
if (!isValidPDF) {
return {
success: false,
error: 'File is not a valid PDF'
};
}
let pdfDoc: any = null;
let arrayBuffer: ArrayBuffer;
let metadata: any;
try {
arrayBuffer = await file.arrayBuffer();
pdfDoc = await pdfWorkerManager.createDocument(arrayBuffer, {
disableAutoFetch: true,
disableStream: true
});
metadata = await pdfDoc.getMetadata();
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
cleanupPdfDocument(pdfDoc);
return {
success: false,
error: `Failed to read PDF: ${errorMessage}`
};
}
const info = metadata.info || {};
// Safely extract metadata with proper type checking
const extractedMetadata: ExtractedPDFMetadata = {
title: typeof info.Title === 'string' ? info.Title : '',
author: typeof info.Author === 'string' ? info.Author : '',
subject: typeof info.Subject === 'string' ? info.Subject : '',
keywords: typeof info.Keywords === 'string' ? info.Keywords : '',
creator: typeof info.Creator === 'string' ? info.Creator : '',
producer: typeof info.Producer === 'string' ? info.Producer : '',
creationDate: formatPDFDate(info.CreationDate),
modificationDate: formatPDFDate(info.ModDate),
trapped: convertTrappedStatus(info.Trapped),
customMetadata: extractCustomMetadata(info)
};
cleanupPdfDocument(pdfDoc);
return {
success: true,
metadata: extractedMetadata
};
}