Precomputed-leaf files

This commit is contained in:
Connor Yoh 2025-09-04 11:26:55 +01:00
parent e40d600759
commit d0c6ae2c31
6 changed files with 184 additions and 18 deletions

View File

@ -27,6 +27,7 @@ interface IndexedDBContextValue {
// Utilities // Utilities
getStorageStats: () => Promise<{ used: number; available: number; fileCount: number }>; getStorageStats: () => Promise<{ used: number; available: number; fileCount: number }>;
updateThumbnail: (fileId: FileId, thumbnail: string) => Promise<boolean>; updateThumbnail: (fileId: FileId, thumbnail: string) => Promise<boolean>;
markFileAsProcessed: (fileId: FileId) => Promise<boolean>;
} }
const IndexedDBContext = createContext<IndexedDBContextValue | null>(null); const IndexedDBContext = createContext<IndexedDBContextValue | null>(null);
@ -219,6 +220,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
return await fileStorage.updateThumbnail(fileId, thumbnail); return await fileStorage.updateThumbnail(fileId, thumbnail);
}, []); }, []);
const markFileAsProcessed = useCallback(async (fileId: FileId): Promise<boolean> => {
return await fileStorage.markFileAsProcessed(fileId);
}, []);
const value: IndexedDBContextValue = { const value: IndexedDBContextValue = {
saveFile, saveFile,
loadFile, loadFile,
@ -228,7 +233,8 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
deleteMultiple, deleteMultiple,
clearAll, clearAll,
getStorageStats, getStorageStats,
updateThumbnail updateThumbnail,
markFileAsProcessed
}; };
return ( return (

View File

@ -480,16 +480,42 @@ export async function consumeFiles(
stateRef: React.MutableRefObject<FileContextState>, stateRef: React.MutableRefObject<FileContextState>,
filesRef: React.MutableRefObject<Map<FileId, File>>, filesRef: React.MutableRefObject<Map<FileId, File>>,
dispatch: React.Dispatch<FileContextAction>, dispatch: React.Dispatch<FileContextAction>,
indexedDB?: { saveFile: (file: File, fileId: FileId, existingThumbnail?: string) => Promise<any> } | null indexedDB?: { saveFile: (file: File, fileId: FileId, existingThumbnail?: string) => Promise<any>; markFileAsProcessed: (fileId: FileId) => Promise<boolean> } | null
): Promise<FileId[]> { ): Promise<FileId[]> {
if (DEBUG) console.log(`📄 consumeFiles: Processing ${inputFileIds.length} input files, ${outputFiles.length} output files`); if (DEBUG) console.log(`📄 consumeFiles: Processing ${inputFileIds.length} input files, ${outputFiles.length} output files`);
// Process output files with thumbnails and metadata // Process output files with thumbnails and metadata
const outputFileRecords = await processFilesIntoRecords(outputFiles, filesRef); const outputFileRecords = await processFilesIntoRecords(outputFiles, filesRef);
// Persist output files to IndexedDB if available // Mark input files as processed in IndexedDB (no longer leaf nodes)
if (indexedDB) { if (indexedDB) {
await persistFilesToIndexedDB(outputFileRecords, indexedDB); await Promise.all([
// Mark input files as processed
...inputFileIds.map(async (fileId) => {
try {
await indexedDB.markFileAsProcessed(fileId);
// Update file record to reflect that it's no longer a leaf
dispatch({
type: 'UPDATE_FILE_RECORD',
payload: {
id: fileId,
updates: { isLeaf: false }
}
});
if (DEBUG) console.log(`📄 consumeFiles: Marked file ${fileId} as processed`);
} catch (error) {
if (DEBUG) console.warn(`📄 consumeFiles: Failed to mark file ${fileId} as processed:`, error);
}
}),
// Persist output files to IndexedDB
...outputFileRecords.map(async ({ file, fileId, thumbnail }) => {
try {
await indexedDB.saveFile(file, fileId, thumbnail);
} catch (error) {
console.error('Failed to persist file to IndexedDB:', file.name, error);
}
})
]);
} }
// Dispatch the consume action // Dispatch the consume action

View File

@ -16,6 +16,7 @@ export interface StoredFile {
data: ArrayBuffer; data: ArrayBuffer;
thumbnail?: string; thumbnail?: string;
url?: string; // For compatibility with existing components url?: string; // For compatibility with existing components
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
} }
export interface StorageStats { export interface StorageStats {
@ -39,7 +40,7 @@ class FileStorageService {
/** /**
* Store a file in IndexedDB with external UUID * Store a file in IndexedDB with external UUID
*/ */
async storeFile(file: File, fileId: FileId, thumbnail?: string): Promise<StoredFile> { async storeFile(file: File, fileId: FileId, thumbnail?: string, isLeaf: boolean = true): Promise<StoredFile> {
const db = await this.getDatabase(); const db = await this.getDatabase();
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
@ -51,7 +52,8 @@ class FileStorageService {
size: file.size, size: file.size,
lastModified: file.lastModified, lastModified: file.lastModified,
data: arrayBuffer, data: arrayBuffer,
thumbnail thumbnail,
isLeaf
}; };
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
@ -65,7 +67,8 @@ class FileStorageService {
id: storedFile.id, // Now a UUID from FileContext id: storedFile.id, // Now a UUID from FileContext
name: storedFile.name, name: storedFile.name,
hasData: !!storedFile.data, hasData: !!storedFile.data,
dataSize: storedFile.data.byteLength dataSize: storedFile.data.byteLength,
isLeaf: storedFile.isLeaf
}); });
const request = store.add(storedFile); const request = store.add(storedFile);
@ -206,6 +209,96 @@ class FileStorageService {
}); });
} }
/**
* Mark a file as no longer being a leaf (it has been processed)
*/
async markFileAsProcessed(id: FileId): Promise<boolean> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readwrite');
const store = transaction.objectStore(this.storeName);
const getRequest = store.get(id);
getRequest.onsuccess = () => {
const file = getRequest.result;
if (file) {
file.isLeaf = false;
const updateRequest = store.put(file);
updateRequest.onsuccess = () => resolve(true);
updateRequest.onerror = () => reject(updateRequest.error);
} else {
resolve(false); // File not found
}
};
getRequest.onerror = () => reject(getRequest.error);
});
}
/**
* Get only leaf files (files that haven't been processed yet)
*/
async getLeafFiles(): Promise<StoredFile[]> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readonly');
const store = transaction.objectStore(this.storeName);
const request = store.openCursor();
const leafFiles: StoredFile[] = [];
request.onerror = () => reject(request.error);
request.onsuccess = (event) => {
const cursor = (event.target as IDBRequest).result;
if (cursor) {
const storedFile = cursor.value;
if (storedFile && storedFile.isLeaf !== false) { // Default to true if undefined
leafFiles.push(storedFile);
}
cursor.continue();
} else {
resolve(leafFiles);
}
};
});
}
/**
* Get metadata of only leaf files (without loading data into memory)
*/
async getLeafFileMetadata(): Promise<Omit<StoredFile, 'data'>[]> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readonly');
const store = transaction.objectStore(this.storeName);
const request = store.openCursor();
const files: Omit<StoredFile, 'data'>[] = [];
request.onerror = () => reject(request.error);
request.onsuccess = (event) => {
const cursor = (event.target as IDBRequest).result;
if (cursor) {
const storedFile = cursor.value;
// Only include leaf files (default to true if undefined for backward compatibility)
if (storedFile && storedFile.name && typeof storedFile.size === 'number' && storedFile.isLeaf !== false) {
files.push({
id: storedFile.id,
name: storedFile.name,
type: storedFile.type,
size: storedFile.size,
lastModified: storedFile.lastModified,
thumbnail: storedFile.thumbnail,
isLeaf: storedFile.isLeaf
});
}
cursor.continue();
} else {
resolve(files);
}
};
});
}
/** /**
* Clear all stored files * Clear all stored files
*/ */

View File

@ -37,6 +37,7 @@ export interface FileMetadata {
size: number; size: number;
lastModified: number; lastModified: number;
thumbnail?: string; thumbnail?: string;
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
// File history tracking (extracted from PDF metadata) // File history tracking (extracted from PDF metadata)
historyInfo?: FileHistoryInfo; historyInfo?: FileHistoryInfo;

View File

@ -54,6 +54,7 @@ export interface FileRecord {
processedFile?: ProcessedFileMetadata; processedFile?: ProcessedFileMetadata;
insertAfterPageId?: string; // Page ID after which this file should be inserted insertAfterPageId?: string; // Page ID after which this file should be inserted
isPinned?: boolean; isPinned?: boolean;
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
// File history tracking (from PDF metadata) // File history tracking (from PDF metadata)
originalFileId?: string; // Root file ID for grouping versions originalFileId?: string; // Root file ID for grouping versions
@ -104,7 +105,8 @@ export function toFileRecord(file: File, id?: FileId): FileRecord {
type: file.type, type: file.type,
lastModified: file.lastModified, lastModified: file.lastModified,
quickKey: createQuickKey(file), quickKey: createQuickKey(file),
createdAt: Date.now() createdAt: Date.now(),
isLeaf: true // New files are leaf nodes by default
}; };
} }

View File

@ -239,9 +239,17 @@ export function groupFilesByOriginal(fileRecords: FileRecord[]): Map<string, Fil
} }
/** /**
* Get the latest version of each file group * Get the latest version of each file group (optimized version using leaf flags)
*/ */
export function getLatestVersions(fileRecords: FileRecord[]): FileRecord[] { export function getLatestVersions(fileRecords: FileRecord[]): FileRecord[] {
// If we have leaf flags, use them for much faster filtering
const hasLeafFlags = fileRecords.some(record => record.isLeaf !== undefined);
if (hasLeafFlags) {
// Fast path: just return files marked as leaf nodes
return fileRecords.filter(record => record.isLeaf !== false); // Default to true if undefined
} else {
// Fallback to expensive calculation for backward compatibility
const groups = groupFilesByOriginal(fileRecords); const groups = groupFilesByOriginal(fileRecords);
const latestVersions: FileRecord[] = []; const latestVersions: FileRecord[] = [];
@ -254,6 +262,7 @@ export function getLatestVersions(fileRecords: FileRecord[]): FileRecord[] {
return latestVersions; return latestVersions;
} }
}
/** /**
* Get version history for a file * Get version history for a file
@ -297,6 +306,34 @@ export function generateVersionName(record: FileRecord): string {
return `${baseName}${versionInfo}${toolInfo}.pdf`; return `${baseName}${versionInfo}${toolInfo}.pdf`;
} }
/**
* Get recent files efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFiles(): Promise<import('../services/fileStorage').StoredFile[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFiles();
} catch (error) {
console.warn('Failed to get recent leaf files from IndexedDB:', error);
return [];
}
}
/**
* Get recent file metadata efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFileMetadata(): Promise<Omit<import('../services/fileStorage').StoredFile, 'data'>[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFileMetadata();
} catch (error) {
console.warn('Failed to get recent leaf file metadata from IndexedDB:', error);
return [];
}
}
/** /**
* Create metadata for storing files with history information * Create metadata for storing files with history information
*/ */
@ -311,7 +348,8 @@ export async function createFileMetadataWithHistory(
type: file.type, type: file.type,
size: file.size, size: file.size,
lastModified: file.lastModified, lastModified: file.lastModified,
thumbnail thumbnail,
isLeaf: true // New files are leaf nodes by default
}; };
// Extract metadata for PDF files // Extract metadata for PDF files