Stirling-PDF/frontend/src/utils/fileHistoryUtils.ts

334 lines
9.8 KiB
TypeScript
Raw Normal View History

2025-09-02 17:24:26 +01:00
/**
* File History Utilities
2025-09-03 14:48:14 +01:00
*
2025-09-02 17:24:26 +01:00
* Helper functions for integrating PDF metadata service with FileContext operations.
* Handles extraction of history from files and preparation for metadata injection.
*/
import { pdfMetadataService, type ToolOperation } from '../services/pdfMetadataService';
import { FileRecord } from '../types/fileContext';
import { FileId, FileMetadata } from '../types/file';
import { createFileId } from '../types/fileContext';
const DEBUG = process.env.NODE_ENV === 'development';
/**
* Extract history information from a PDF file and update FileRecord
*/
export async function extractFileHistory(
2025-09-03 14:48:14 +01:00
file: File,
2025-09-02 17:24:26 +01:00
record: FileRecord
): Promise<FileRecord> {
// Only process PDF files
if (!file.type.includes('pdf')) {
return record;
}
try {
const arrayBuffer = await file.arrayBuffer();
const historyMetadata = await pdfMetadataService.extractHistoryMetadata(arrayBuffer);
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
if (historyMetadata) {
const history = historyMetadata.stirlingHistory;
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
// Update record with history information
return {
...record,
originalFileId: history.originalFileId,
versionNumber: history.versionNumber,
parentFileId: history.parentFileId as FileId | undefined,
toolHistory: history.toolChain
};
}
} catch (error) {
if (DEBUG) console.warn('📄 Failed to extract file history:', file.name, error);
}
return record;
}
/**
* Inject history metadata into a PDF file for tool operations
*/
export async function injectHistoryForTool(
file: File,
sourceFileRecord: FileRecord,
toolName: string,
parameters?: Record<string, any>
): Promise<File> {
// Only process PDF files
if (!file.type.includes('pdf')) {
return file;
}
try {
const arrayBuffer = await file.arrayBuffer();
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
// Create tool operation record
const toolOperation: ToolOperation = {
toolName,
timestamp: Date.now(),
parameters
};
let modifiedBytes: ArrayBuffer;
// Extract version info directly from the PDF metadata to ensure accuracy
const existingHistoryMetadata = await pdfMetadataService.extractHistoryMetadata(arrayBuffer);
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
let newVersionNumber: number;
let originalFileId: string;
let parentFileId: string;
let parentToolChain: ToolOperation[];
if (existingHistoryMetadata) {
// File already has embedded history - increment version
const history = existingHistoryMetadata.stirlingHistory;
newVersionNumber = history.versionNumber + 1;
originalFileId = history.originalFileId;
parentFileId = sourceFileRecord.id; // This file becomes the parent
parentToolChain = history.toolChain || [];
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
} else if (sourceFileRecord.originalFileId && sourceFileRecord.versionNumber) {
// File record has history but PDF doesn't (shouldn't happen, but fallback)
newVersionNumber = sourceFileRecord.versionNumber + 1;
originalFileId = sourceFileRecord.originalFileId;
parentFileId = sourceFileRecord.id;
parentToolChain = sourceFileRecord.toolHistory || [];
} else {
// File has no history - this becomes version 1
newVersionNumber = 1;
originalFileId = sourceFileRecord.id; // Use source file ID as original
2025-09-03 14:48:14 +01:00
parentFileId = sourceFileRecord.id; // Parent is the source file
2025-09-02 17:24:26 +01:00
parentToolChain = []; // No previous tools
}
// Create new tool chain with the new operation
const newToolChain = [...parentToolChain, toolOperation];
modifiedBytes = await pdfMetadataService.injectHistoryMetadata(
arrayBuffer,
originalFileId,
parentFileId,
newToolChain,
newVersionNumber
);
// Create new file with updated metadata
return new File([modifiedBytes], file.name, { type: file.type });
} catch (error) {
if (DEBUG) console.warn('📄 Failed to inject history for tool operation:', error);
return file; // Return original file if injection fails
}
}
/**
* Prepare FormData with history-injected PDFs for tool operations
*/
export async function prepareFilesWithHistory(
files: File[],
getFileRecord: (file: File) => FileRecord | undefined,
toolName: string,
parameters?: Record<string, any>
): Promise<File[]> {
const processedFiles: File[] = [];
for (const file of files) {
const record = getFileRecord(file);
if (!record) {
processedFiles.push(file);
continue;
}
const fileWithHistory = await injectHistoryForTool(file, record, toolName, parameters);
processedFiles.push(fileWithHistory);
}
return processedFiles;
}
2025-09-03 14:48:14 +01:00
/**
* Verify that processed files preserved metadata from originals
* Logs warnings for tools that strip standard PDF metadata
*/
export async function verifyToolMetadataPreservation(
originalFiles: File[],
processedFiles: File[],
toolName: string
): Promise<void> {
if (originalFiles.length === 0 || processedFiles.length === 0) return;
try {
// For single-file tools, compare the original with the processed file
if (originalFiles.length === 1 && processedFiles.length === 1) {
const originalBytes = await originalFiles[0].arrayBuffer();
const processedBytes = await processedFiles[0].arrayBuffer();
await pdfMetadataService.verifyMetadataPreservation(
originalBytes,
processedBytes,
toolName
);
}
// For multi-file tools, we could add more complex verification later
} catch (error) {
if (DEBUG) console.warn(`📄 Failed to verify metadata preservation for ${toolName}:`, error);
}
}
2025-09-02 17:24:26 +01:00
/**
* Group files by their original file ID for version management
*/
export function groupFilesByOriginal(fileRecords: FileRecord[]): Map<string, FileRecord[]> {
const groups = new Map<string, FileRecord[]>();
for (const record of fileRecords) {
2025-09-03 14:48:14 +01:00
// For files with history, use their originalFileId
// For files without history, check if any other file references this file as originalFileId
let groupKey = record.originalFileId;
if (!groupKey) {
// Check if this file is referenced as an originalFileId by other files
const isReferencedAsOriginal = fileRecords.some(otherRecord =>
otherRecord.originalFileId === record.id
);
if (isReferencedAsOriginal) {
// This file is the original of other files
groupKey = record.id;
} else {
// This file is truly standalone
groupKey = record.id;
}
}
if (!groups.has(groupKey)) {
groups.set(groupKey, []);
2025-09-02 17:24:26 +01:00
}
2025-09-03 14:48:14 +01:00
groups.get(groupKey)!.push(record);
2025-09-02 17:24:26 +01:00
}
// Sort each group by version number
for (const [_, records] of groups) {
records.sort((a, b) => (b.versionNumber || 0) - (a.versionNumber || 0));
}
return groups;
}
/**
* Get the latest version of each file group
*/
export function getLatestVersions(fileRecords: FileRecord[]): FileRecord[] {
const groups = groupFilesByOriginal(fileRecords);
const latestVersions: FileRecord[] = [];
for (const [_, records] of groups) {
if (records.length > 0) {
// First item is the latest version (sorted desc by version number)
latestVersions.push(records[0]);
}
}
return latestVersions;
}
/**
* Get version history for a file
*/
export function getVersionHistory(
2025-09-03 14:48:14 +01:00
targetRecord: FileRecord,
2025-09-02 17:24:26 +01:00
allRecords: FileRecord[]
): FileRecord[] {
const originalId = targetRecord.originalFileId || targetRecord.id;
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
return allRecords
.filter(record => {
const recordOriginalId = record.originalFileId || record.id;
return recordOriginalId === originalId;
})
.sort((a, b) => (b.versionNumber || 0) - (a.versionNumber || 0));
}
/**
* Check if a file has version history
*/
export function hasVersionHistory(record: FileRecord): boolean {
return !!(record.originalFileId && record.versionNumber && record.versionNumber > 0);
}
/**
* Generate a descriptive name for a file version
*/
export function generateVersionName(record: FileRecord): string {
const baseName = record.name.replace(/\.pdf$/i, '');
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
if (!hasVersionHistory(record)) {
return record.name;
}
const versionInfo = record.versionNumber ? ` (v${record.versionNumber})` : '';
2025-09-03 14:48:14 +01:00
const toolInfo = record.toolHistory && record.toolHistory.length > 0
2025-09-02 17:24:26 +01:00
? ` - ${record.toolHistory[record.toolHistory.length - 1].toolName}`
: '';
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
return `${baseName}${versionInfo}${toolInfo}.pdf`;
}
/**
* Create metadata for storing files with history information
*/
export async function createFileMetadataWithHistory(
2025-09-03 14:48:14 +01:00
file: File,
fileId: FileId,
2025-09-02 17:24:26 +01:00
thumbnail?: string
): Promise<FileMetadata> {
const baseMetadata: FileMetadata = {
id: fileId,
name: file.name,
type: file.type,
size: file.size,
lastModified: file.lastModified,
thumbnail
};
2025-09-03 14:48:14 +01:00
// Extract metadata for PDF files
2025-09-02 17:24:26 +01:00
if (file.type.includes('pdf')) {
try {
const arrayBuffer = await file.arrayBuffer();
2025-09-03 14:48:14 +01:00
const [historyMetadata, standardMetadata] = await Promise.all([
pdfMetadataService.extractHistoryMetadata(arrayBuffer),
pdfMetadataService.extractStandardMetadata(arrayBuffer)
]);
const result = { ...baseMetadata };
// Add standard PDF metadata if available
if (standardMetadata) {
result.pdfMetadata = standardMetadata;
}
// Add history metadata if available
2025-09-02 17:24:26 +01:00
if (historyMetadata) {
const history = historyMetadata.stirlingHistory;
2025-09-03 14:48:14 +01:00
result.originalFileId = history.originalFileId;
result.versionNumber = history.versionNumber;
result.parentFileId = history.parentFileId as FileId | undefined;
result.historyInfo = {
2025-09-02 17:24:26 +01:00
originalFileId: history.originalFileId,
2025-09-03 14:48:14 +01:00
parentFileId: history.parentFileId,
2025-09-02 17:24:26 +01:00
versionNumber: history.versionNumber,
2025-09-03 14:48:14 +01:00
toolChain: history.toolChain
2025-09-02 17:24:26 +01:00
};
}
2025-09-03 14:48:14 +01:00
return result;
2025-09-02 17:24:26 +01:00
} catch (error) {
2025-09-03 14:48:14 +01:00
if (DEBUG) console.warn('📄 Failed to extract metadata:', file.name, error);
2025-09-02 17:24:26 +01:00
}
}
return baseMetadata;
2025-09-03 14:48:14 +01:00
}