Stirling-PDF/frontend/src/utils/fileHistoryUtils.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

509 lines
16 KiB
TypeScript
Raw Normal View History

2025-09-02 17:24:26 +01:00
/**
* File History Utilities
2025-09-03 14:48:14 +01:00
*
2025-09-02 17:24:26 +01:00
* Helper functions for integrating PDF metadata service with FileContext operations.
* Handles extraction of history from files and preparation for metadata injection.
*/
import { pdfMetadataService, type ToolOperation } from '../services/pdfMetadataService';
2025-09-05 17:41:53 +01:00
import { StirlingFileStub } from '../types/fileContext';
2025-09-02 17:24:26 +01:00
import { FileId, FileMetadata } from '../types/file';
import { createFileId } from '../types/fileContext';
const DEBUG = process.env.NODE_ENV === 'development';
/**
2025-09-05 17:41:53 +01:00
* Extract history information from a PDF file and update StirlingFileStub
2025-09-02 17:24:26 +01:00
*/
export async function extractFileHistory(
2025-09-03 14:48:14 +01:00
file: File,
2025-09-05 17:41:53 +01:00
record: StirlingFileStub
): Promise<StirlingFileStub> {
2025-09-02 17:24:26 +01:00
// Only process PDF files
if (!file.type.includes('pdf')) {
return record;
}
try {
const arrayBuffer = await file.arrayBuffer();
const historyMetadata = await pdfMetadataService.extractHistoryMetadata(arrayBuffer);
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
if (historyMetadata) {
const history = historyMetadata.stirlingHistory;
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
// Update record with history information
return {
...record,
originalFileId: history.originalFileId,
versionNumber: history.versionNumber,
parentFileId: history.parentFileId as FileId | undefined,
toolHistory: history.toolChain
};
}
} catch (error) {
if (DEBUG) console.warn('📄 Failed to extract file history:', file.name, error);
}
return record;
}
/**
* Inject history metadata into a PDF file for tool operations
*/
export async function injectHistoryForTool(
file: File,
2025-09-05 17:41:53 +01:00
sourceStirlingFileStub: StirlingFileStub,
2025-09-02 17:24:26 +01:00
toolName: string,
parameters?: Record<string, any>
): Promise<File> {
// Only process PDF files
if (!file.type.includes('pdf')) {
return file;
}
try {
const arrayBuffer = await file.arrayBuffer();
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
// Create tool operation record
const toolOperation: ToolOperation = {
toolName,
timestamp: Date.now(),
parameters
};
let modifiedBytes: ArrayBuffer;
// Extract version info directly from the PDF metadata to ensure accuracy
const existingHistoryMetadata = await pdfMetadataService.extractHistoryMetadata(arrayBuffer);
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
let newVersionNumber: number;
let originalFileId: string;
let parentFileId: string;
let parentToolChain: ToolOperation[];
if (existingHistoryMetadata) {
// File already has embedded history - increment version
const history = existingHistoryMetadata.stirlingHistory;
newVersionNumber = history.versionNumber + 1;
originalFileId = history.originalFileId;
2025-09-05 17:41:53 +01:00
parentFileId = sourceStirlingFileStub.id; // This file becomes the parent
2025-09-02 17:24:26 +01:00
parentToolChain = history.toolChain || [];
2025-09-03 14:48:14 +01:00
2025-09-05 17:41:53 +01:00
} else if (sourceStirlingFileStub.originalFileId && sourceStirlingFileStub.versionNumber) {
2025-09-02 17:24:26 +01:00
// File record has history but PDF doesn't (shouldn't happen, but fallback)
2025-09-05 17:41:53 +01:00
newVersionNumber = sourceStirlingFileStub.versionNumber + 1;
originalFileId = sourceStirlingFileStub.originalFileId;
parentFileId = sourceStirlingFileStub.id;
parentToolChain = sourceStirlingFileStub.toolHistory || [];
2025-09-02 17:24:26 +01:00
} else {
// File has no history - this becomes version 1
newVersionNumber = 1;
2025-09-05 17:41:53 +01:00
originalFileId = sourceStirlingFileStub.id; // Use source file ID as original
parentFileId = sourceStirlingFileStub.id; // Parent is the source file
2025-09-02 17:24:26 +01:00
parentToolChain = []; // No previous tools
}
// Create new tool chain with the new operation
const newToolChain = [...parentToolChain, toolOperation];
modifiedBytes = await pdfMetadataService.injectHistoryMetadata(
arrayBuffer,
originalFileId,
parentFileId,
newToolChain,
newVersionNumber
);
// Create new file with updated metadata
return new File([modifiedBytes], file.name, { type: file.type });
} catch (error) {
if (DEBUG) console.warn('📄 Failed to inject history for tool operation:', error);
return file; // Return original file if injection fails
}
}
/**
2025-09-05 18:00:00 +01:00
* Prepare StirlingFiles with history-injected PDFs for tool operations
* Preserves fileId and all StirlingFile metadata while injecting history
2025-09-02 17:24:26 +01:00
*/
2025-09-05 18:00:00 +01:00
export async function prepareStirlingFilesWithHistory(
stirlingFiles: import('../types/fileContext').StirlingFile[],
getStirlingFileStub: (fileId: import('../types/file').FileId) => StirlingFileStub | undefined,
2025-09-02 17:24:26 +01:00
toolName: string,
parameters?: Record<string, any>
2025-09-05 18:00:00 +01:00
): Promise<import('../types/fileContext').StirlingFile[]> {
const processedFiles: import('../types/fileContext').StirlingFile[] = [];
2025-09-02 17:24:26 +01:00
2025-09-05 18:00:00 +01:00
for (const stirlingFile of stirlingFiles) {
const fileStub = getStirlingFileStub(stirlingFile.fileId);
if (!fileStub) {
// If no stub found, keep original file
processedFiles.push(stirlingFile);
2025-09-02 17:24:26 +01:00
continue;
}
2025-09-05 18:00:00 +01:00
// Inject history into the file data
const fileWithHistory = await injectHistoryForTool(stirlingFile, fileStub, toolName, parameters);
// Create new StirlingFile with the updated file data but preserve fileId and quickKey
const updatedStirlingFile = new File([fileWithHistory], fileWithHistory.name, {
type: fileWithHistory.type,
lastModified: fileWithHistory.lastModified
}) as import('../types/fileContext').StirlingFile;
// Preserve the original fileId and quickKey
Object.defineProperty(updatedStirlingFile, 'fileId', {
value: stirlingFile.fileId,
writable: false,
enumerable: true,
configurable: false
});
Object.defineProperty(updatedStirlingFile, 'quickKey', {
value: stirlingFile.quickKey,
writable: false,
enumerable: true,
configurable: false
});
processedFiles.push(updatedStirlingFile);
2025-09-02 17:24:26 +01:00
}
return processedFiles;
}
2025-09-03 14:48:14 +01:00
/**
* Verify that processed files preserved metadata from originals
* Logs warnings for tools that strip standard PDF metadata
*/
export async function verifyToolMetadataPreservation(
originalFiles: File[],
processedFiles: File[],
toolName: string
): Promise<void> {
if (originalFiles.length === 0 || processedFiles.length === 0) return;
try {
// For single-file tools, compare the original with the processed file
if (originalFiles.length === 1 && processedFiles.length === 1) {
const originalBytes = await originalFiles[0].arrayBuffer();
const processedBytes = await processedFiles[0].arrayBuffer();
await pdfMetadataService.verifyMetadataPreservation(
originalBytes,
processedBytes,
toolName
);
}
// For multi-file tools, we could add more complex verification later
} catch (error) {
if (DEBUG) console.warn(`📄 Failed to verify metadata preservation for ${toolName}:`, error);
}
}
2025-09-02 17:24:26 +01:00
/**
2025-09-03 17:47:58 +01:00
* Group files by processing branches - each branch ends in a leaf file
* Returns Map<fileId, lineagePath[]> where fileId is the leaf and lineagePath is the path back to original
2025-09-02 17:24:26 +01:00
*/
2025-09-05 17:41:53 +01:00
export function groupFilesByOriginal(StirlingFileStubs: StirlingFileStub[]): Map<string, StirlingFileStub[]> {
const groups = new Map<string, StirlingFileStub[]>();
2025-09-02 17:24:26 +01:00
2025-09-03 17:47:58 +01:00
// Create a map for quick lookups
2025-09-05 17:41:53 +01:00
const fileMap = new Map<string, StirlingFileStub>();
for (const record of StirlingFileStubs) {
2025-09-03 17:47:58 +01:00
fileMap.set(record.id, record);
}
2025-09-03 14:48:14 +01:00
2025-09-03 17:47:58 +01:00
// Find leaf files (files that are not parents of any other files AND have version history)
// Original files (v0) should only be leaves if they have no processed versions at all
2025-09-05 17:41:53 +01:00
const leafFiles = StirlingFileStubs.filter(stub => {
const isParentOfOthers = StirlingFileStubs.some(otherStub => otherStub.parentFileId === stub.id);
const isOriginalOfOthers = StirlingFileStubs.some(otherStub => otherStub.originalFileId === stub.id);
2025-09-03 17:47:58 +01:00
// A file is a leaf if:
// 1. It's not a parent of any other files, AND
// 2. It has processing history (versionNumber > 0) OR it's not referenced as original by others
2025-09-05 17:41:53 +01:00
return !isParentOfOthers && (stub.versionNumber && stub.versionNumber > 0 || !isOriginalOfOthers);
2025-09-03 17:47:58 +01:00
});
// For each leaf file, build its complete lineage path back to original
for (const leafFile of leafFiles) {
2025-09-05 17:41:53 +01:00
const lineagePath: StirlingFileStub[] = [];
let currentFile: StirlingFileStub | undefined = leafFile;
2025-09-03 17:47:58 +01:00
// Trace back through parentFileId chain to build this specific branch
while (currentFile) {
lineagePath.push(currentFile);
2025-09-05 17:41:53 +01:00
2025-09-03 17:47:58 +01:00
// Move to parent file in this branch
2025-09-05 17:41:53 +01:00
let nextFile: StirlingFileStub | undefined = undefined;
2025-09-03 17:47:58 +01:00
if (currentFile.parentFileId) {
nextFile = fileMap.get(currentFile.parentFileId);
} else if (currentFile.originalFileId && currentFile.originalFileId !== currentFile.id) {
// For v1 files, the original file might be referenced by originalFileId
nextFile = fileMap.get(currentFile.originalFileId);
2025-09-03 14:48:14 +01:00
}
2025-09-05 17:41:53 +01:00
2025-09-03 17:47:58 +01:00
// Check for infinite loops before moving to next
if (nextFile && lineagePath.some(file => file.id === nextFile!.id)) {
break;
}
2025-09-05 17:41:53 +01:00
2025-09-03 17:47:58 +01:00
currentFile = nextFile;
2025-09-03 14:48:14 +01:00
}
2025-09-05 17:41:53 +01:00
2025-09-03 17:47:58 +01:00
// Sort lineage with latest version first (leaf at top)
lineagePath.sort((a, b) => (b.versionNumber || 0) - (a.versionNumber || 0));
2025-09-05 17:41:53 +01:00
2025-09-03 17:47:58 +01:00
// Use leaf file ID as the group key - each branch gets its own group
groups.set(leafFile.id, lineagePath);
2025-09-02 17:24:26 +01:00
}
return groups;
}
/**
2025-09-04 11:26:55 +01:00
* Get the latest version of each file group (optimized version using leaf flags)
2025-09-02 17:24:26 +01:00
*/
2025-09-05 17:41:53 +01:00
export function getLatestVersions(fileStubs: StirlingFileStub[]): StirlingFileStub[] {
2025-09-04 11:26:55 +01:00
// If we have leaf flags, use them for much faster filtering
2025-09-05 17:41:53 +01:00
const hasLeafFlags = fileStubs.some(fileStub => fileStub.isLeaf !== undefined);
2025-09-04 11:26:55 +01:00
if (hasLeafFlags) {
// Fast path: just return files marked as leaf nodes
2025-09-05 17:41:53 +01:00
return fileStubs.filter(fileStub => fileStub.isLeaf !== false); // Default to true if undefined
2025-09-04 11:26:55 +01:00
} else {
// Fallback to expensive calculation for backward compatibility
2025-09-05 17:41:53 +01:00
const groups = groupFilesByOriginal(fileStubs);
const latestVersions: StirlingFileStub[] = [];
2025-09-04 11:26:55 +01:00
2025-09-05 17:41:53 +01:00
for (const [_, fileStubs] of groups) {
if (fileStubs.length > 0) {
2025-09-04 11:26:55 +01:00
// First item is the latest version (sorted desc by version number)
2025-09-05 17:41:53 +01:00
latestVersions.push(fileStubs[0]);
2025-09-04 11:26:55 +01:00
}
2025-09-02 17:24:26 +01:00
}
2025-09-04 11:26:55 +01:00
return latestVersions;
}
2025-09-02 17:24:26 +01:00
}
/**
* Get version history for a file
*/
export function getVersionHistory(
2025-09-05 17:41:53 +01:00
targetFileStub: StirlingFileStub,
allFileStubs: StirlingFileStub[]
): StirlingFileStub[] {
const originalId = targetFileStub.originalFileId || targetFileStub.id;
return allFileStubs
.filter(fileStub => {
const fileStubOriginalId = fileStub.originalFileId || fileStub.id;
return fileStubOriginalId === originalId;
2025-09-02 17:24:26 +01:00
})
.sort((a, b) => (b.versionNumber || 0) - (a.versionNumber || 0));
}
/**
* Check if a file has version history
*/
2025-09-05 17:41:53 +01:00
export function hasVersionHistory(fileStub: StirlingFileStub): boolean {
return !!(fileStub.originalFileId && fileStub.versionNumber && fileStub.versionNumber > 0);
2025-09-02 17:24:26 +01:00
}
/**
* Generate a descriptive name for a file version
*/
2025-09-05 17:41:53 +01:00
export function generateVersionName(fileStub: StirlingFileStub): string {
const baseName = fileStub.name.replace(/\.pdf$/i, '');
2025-09-03 14:48:14 +01:00
2025-09-05 17:41:53 +01:00
if (!hasVersionHistory(fileStub)) {
return fileStub.name;
2025-09-02 17:24:26 +01:00
}
2025-09-05 17:41:53 +01:00
const versionInfo = fileStub.versionNumber ? ` (v${fileStub.versionNumber})` : '';
const toolInfo = fileStub.toolHistory && fileStub.toolHistory.length > 0
? ` - ${fileStub.toolHistory[fileStub.toolHistory.length - 1].toolName}`
2025-09-02 17:24:26 +01:00
: '';
2025-09-03 14:48:14 +01:00
2025-09-02 17:24:26 +01:00
return `${baseName}${versionInfo}${toolInfo}.pdf`;
}
2025-09-04 11:26:55 +01:00
/**
* Get recent files efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFiles(): Promise<import('../services/fileStorage').StoredFile[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFiles();
} catch (error) {
console.warn('Failed to get recent leaf files from IndexedDB:', error);
return [];
}
}
/**
* Get recent file metadata efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFileMetadata(): Promise<Omit<import('../services/fileStorage').StoredFile, 'data'>[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFileMetadata();
} catch (error) {
console.warn('Failed to get recent leaf file metadata from IndexedDB:', error);
return [];
}
}
2025-09-04 12:11:09 +01:00
/**
* Extract basic file metadata (version number and tool chain) without full history calculation
* This is lightweight and used for displaying essential info on file thumbnails
*/
export async function extractBasicFileMetadata(
file: File,
2025-09-05 17:41:53 +01:00
fileStub: StirlingFileStub
): Promise<StirlingFileStub> {
2025-09-04 12:11:09 +01:00
// Only process PDF files
if (!file.type.includes('pdf')) {
2025-09-05 17:41:53 +01:00
return fileStub;
2025-09-04 12:11:09 +01:00
}
try {
const arrayBuffer = await file.arrayBuffer();
const historyMetadata = await pdfMetadataService.extractHistoryMetadata(arrayBuffer);
if (historyMetadata) {
const history = historyMetadata.stirlingHistory;
2025-09-05 17:41:53 +01:00
// Update fileStub with essential metadata only (no parent/original relationships)
2025-09-04 12:11:09 +01:00
return {
2025-09-05 17:41:53 +01:00
...fileStub,
2025-09-04 12:11:09 +01:00
versionNumber: history.versionNumber,
toolHistory: history.toolChain
};
}
} catch (error) {
if (DEBUG) console.warn('📄 Failed to extract basic metadata:', file.name, error);
}
2025-09-05 17:41:53 +01:00
return fileStub;
2025-09-04 12:11:09 +01:00
}
/**
* Load file history on-demand for a specific file
* This replaces the automatic history extraction during file loading
*/
export async function loadFileHistoryOnDemand(
file: File,
fileId: FileId,
2025-09-05 17:41:53 +01:00
updateFileStub?: (id: FileId, updates: Partial<StirlingFileStub>) => void
2025-09-04 12:11:09 +01:00
): Promise<{
originalFileId?: string;
versionNumber?: number;
parentFileId?: FileId;
toolHistory?: Array<{
toolName: string;
timestamp: number;
parameters?: Record<string, any>;
}>;
} | null> {
// Only process PDF files
if (!file.type.includes('pdf')) {
return null;
}
try {
2025-09-05 17:41:53 +01:00
const baseFileStub: StirlingFileStub = {
2025-09-04 12:11:09 +01:00
id: fileId,
name: file.name,
size: file.size,
type: file.type,
lastModified: file.lastModified
};
2025-09-05 17:41:53 +01:00
const updatedFileStub = await extractFileHistory(file, baseFileStub);
if (updatedFileStub !== baseFileStub && (updatedFileStub.originalFileId || updatedFileStub.versionNumber)) {
2025-09-04 12:11:09 +01:00
const historyData = {
2025-09-05 17:41:53 +01:00
originalFileId: updatedFileStub.originalFileId,
versionNumber: updatedFileStub.versionNumber,
parentFileId: updatedFileStub.parentFileId,
toolHistory: updatedFileStub.toolHistory
2025-09-04 12:11:09 +01:00
};
2025-09-05 17:41:53 +01:00
// Update the file stub if update function is provided
if (updateFileStub) {
updateFileStub(fileId, historyData);
2025-09-04 12:11:09 +01:00
}
return historyData;
}
return null;
} catch (error) {
console.warn(`Failed to load history for ${file.name}:`, error);
return null;
}
}
2025-09-02 17:24:26 +01:00
/**
* Create metadata for storing files with history information
*/
export async function createFileMetadataWithHistory(
2025-09-03 14:48:14 +01:00
file: File,
fileId: FileId,
2025-09-02 17:24:26 +01:00
thumbnail?: string
): Promise<FileMetadata> {
const baseMetadata: FileMetadata = {
id: fileId,
name: file.name,
type: file.type,
size: file.size,
lastModified: file.lastModified,
2025-09-04 11:26:55 +01:00
thumbnail,
isLeaf: true // New files are leaf nodes by default
2025-09-02 17:24:26 +01:00
};
2025-09-03 14:48:14 +01:00
// Extract metadata for PDF files
2025-09-02 17:24:26 +01:00
if (file.type.includes('pdf')) {
try {
const arrayBuffer = await file.arrayBuffer();
2025-09-03 14:48:14 +01:00
const [historyMetadata, standardMetadata] = await Promise.all([
pdfMetadataService.extractHistoryMetadata(arrayBuffer),
pdfMetadataService.extractStandardMetadata(arrayBuffer)
]);
const result = { ...baseMetadata };
// Add standard PDF metadata if available
if (standardMetadata) {
result.pdfMetadata = standardMetadata;
}
2025-09-04 12:11:09 +01:00
// Add history metadata if available (basic version for display)
2025-09-02 17:24:26 +01:00
if (historyMetadata) {
const history = historyMetadata.stirlingHistory;
2025-09-04 12:11:09 +01:00
// Only add basic metadata needed for display, not full history relationships
2025-09-03 14:48:14 +01:00
result.versionNumber = history.versionNumber;
result.historyInfo = {
2025-09-02 17:24:26 +01:00
originalFileId: history.originalFileId,
2025-09-03 14:48:14 +01:00
parentFileId: history.parentFileId,
2025-09-02 17:24:26 +01:00
versionNumber: history.versionNumber,
2025-09-03 14:48:14 +01:00
toolChain: history.toolChain
2025-09-02 17:24:26 +01:00
};
}
2025-09-03 14:48:14 +01:00
return result;
2025-09-02 17:24:26 +01:00
} catch (error) {
2025-09-03 14:48:14 +01:00
if (DEBUG) console.warn('📄 Failed to extract metadata:', file.name, error);
2025-09-02 17:24:26 +01:00
}
}
return baseMetadata;
2025-09-03 14:48:14 +01:00
}