Precomputed-leaf files

This commit is contained in:
Connor Yoh 2025-09-04 11:26:55 +01:00
parent e40d600759
commit d0c6ae2c31
6 changed files with 184 additions and 18 deletions

View File

@ -27,6 +27,7 @@ interface IndexedDBContextValue {
// Utilities
getStorageStats: () => Promise<{ used: number; available: number; fileCount: number }>;
updateThumbnail: (fileId: FileId, thumbnail: string) => Promise<boolean>;
markFileAsProcessed: (fileId: FileId) => Promise<boolean>;
}
const IndexedDBContext = createContext<IndexedDBContextValue | null>(null);
@ -219,6 +220,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
return await fileStorage.updateThumbnail(fileId, thumbnail);
}, []);
const markFileAsProcessed = useCallback(async (fileId: FileId): Promise<boolean> => {
return await fileStorage.markFileAsProcessed(fileId);
}, []);
const value: IndexedDBContextValue = {
saveFile,
loadFile,
@ -228,7 +233,8 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
deleteMultiple,
clearAll,
getStorageStats,
updateThumbnail
updateThumbnail,
markFileAsProcessed
};
return (

View File

@ -480,16 +480,42 @@ export async function consumeFiles(
stateRef: React.MutableRefObject<FileContextState>,
filesRef: React.MutableRefObject<Map<FileId, File>>,
dispatch: React.Dispatch<FileContextAction>,
indexedDB?: { saveFile: (file: File, fileId: FileId, existingThumbnail?: string) => Promise<any> } | null
indexedDB?: { saveFile: (file: File, fileId: FileId, existingThumbnail?: string) => Promise<any>; markFileAsProcessed: (fileId: FileId) => Promise<boolean> } | null
): Promise<FileId[]> {
if (DEBUG) console.log(`📄 consumeFiles: Processing ${inputFileIds.length} input files, ${outputFiles.length} output files`);
// Process output files with thumbnails and metadata
const outputFileRecords = await processFilesIntoRecords(outputFiles, filesRef);
// Persist output files to IndexedDB if available
// Mark input files as processed in IndexedDB (no longer leaf nodes)
if (indexedDB) {
await persistFilesToIndexedDB(outputFileRecords, indexedDB);
await Promise.all([
// Mark input files as processed
...inputFileIds.map(async (fileId) => {
try {
await indexedDB.markFileAsProcessed(fileId);
// Update file record to reflect that it's no longer a leaf
dispatch({
type: 'UPDATE_FILE_RECORD',
payload: {
id: fileId,
updates: { isLeaf: false }
}
});
if (DEBUG) console.log(`📄 consumeFiles: Marked file ${fileId} as processed`);
} catch (error) {
if (DEBUG) console.warn(`📄 consumeFiles: Failed to mark file ${fileId} as processed:`, error);
}
}),
// Persist output files to IndexedDB
...outputFileRecords.map(async ({ file, fileId, thumbnail }) => {
try {
await indexedDB.saveFile(file, fileId, thumbnail);
} catch (error) {
console.error('Failed to persist file to IndexedDB:', file.name, error);
}
})
]);
}
// Dispatch the consume action

View File

@ -16,6 +16,7 @@ export interface StoredFile {
data: ArrayBuffer;
thumbnail?: string;
url?: string; // For compatibility with existing components
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
}
export interface StorageStats {
@ -39,7 +40,7 @@ class FileStorageService {
/**
* Store a file in IndexedDB with external UUID
*/
async storeFile(file: File, fileId: FileId, thumbnail?: string): Promise<StoredFile> {
async storeFile(file: File, fileId: FileId, thumbnail?: string, isLeaf: boolean = true): Promise<StoredFile> {
const db = await this.getDatabase();
const arrayBuffer = await file.arrayBuffer();
@ -51,7 +52,8 @@ class FileStorageService {
size: file.size,
lastModified: file.lastModified,
data: arrayBuffer,
thumbnail
thumbnail,
isLeaf
};
return new Promise((resolve, reject) => {
@ -65,7 +67,8 @@ class FileStorageService {
id: storedFile.id, // Now a UUID from FileContext
name: storedFile.name,
hasData: !!storedFile.data,
dataSize: storedFile.data.byteLength
dataSize: storedFile.data.byteLength,
isLeaf: storedFile.isLeaf
});
const request = store.add(storedFile);
@ -206,6 +209,96 @@ class FileStorageService {
});
}
/**
* Mark a file as no longer being a leaf (it has been processed)
*/
async markFileAsProcessed(id: FileId): Promise<boolean> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readwrite');
const store = transaction.objectStore(this.storeName);
const getRequest = store.get(id);
getRequest.onsuccess = () => {
const file = getRequest.result;
if (file) {
file.isLeaf = false;
const updateRequest = store.put(file);
updateRequest.onsuccess = () => resolve(true);
updateRequest.onerror = () => reject(updateRequest.error);
} else {
resolve(false); // File not found
}
};
getRequest.onerror = () => reject(getRequest.error);
});
}
/**
* Get only leaf files (files that haven't been processed yet)
*/
async getLeafFiles(): Promise<StoredFile[]> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readonly');
const store = transaction.objectStore(this.storeName);
const request = store.openCursor();
const leafFiles: StoredFile[] = [];
request.onerror = () => reject(request.error);
request.onsuccess = (event) => {
const cursor = (event.target as IDBRequest).result;
if (cursor) {
const storedFile = cursor.value;
if (storedFile && storedFile.isLeaf !== false) { // Default to true if undefined
leafFiles.push(storedFile);
}
cursor.continue();
} else {
resolve(leafFiles);
}
};
});
}
/**
* Get metadata of only leaf files (without loading data into memory)
*/
async getLeafFileMetadata(): Promise<Omit<StoredFile, 'data'>[]> {
const db = await this.getDatabase();
return new Promise((resolve, reject) => {
const transaction = db.transaction([this.storeName], 'readonly');
const store = transaction.objectStore(this.storeName);
const request = store.openCursor();
const files: Omit<StoredFile, 'data'>[] = [];
request.onerror = () => reject(request.error);
request.onsuccess = (event) => {
const cursor = (event.target as IDBRequest).result;
if (cursor) {
const storedFile = cursor.value;
// Only include leaf files (default to true if undefined for backward compatibility)
if (storedFile && storedFile.name && typeof storedFile.size === 'number' && storedFile.isLeaf !== false) {
files.push({
id: storedFile.id,
name: storedFile.name,
type: storedFile.type,
size: storedFile.size,
lastModified: storedFile.lastModified,
thumbnail: storedFile.thumbnail,
isLeaf: storedFile.isLeaf
});
}
cursor.continue();
} else {
resolve(files);
}
};
});
}
/**
* Clear all stored files
*/

View File

@ -37,6 +37,7 @@ export interface FileMetadata {
size: number;
lastModified: number;
thumbnail?: string;
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
// File history tracking (extracted from PDF metadata)
historyInfo?: FileHistoryInfo;

View File

@ -54,6 +54,7 @@ export interface FileRecord {
processedFile?: ProcessedFileMetadata;
insertAfterPageId?: string; // Page ID after which this file should be inserted
isPinned?: boolean;
isLeaf?: boolean; // True if this file is a leaf node (hasn't been processed yet)
// File history tracking (from PDF metadata)
originalFileId?: string; // Root file ID for grouping versions
@ -104,7 +105,8 @@ export function toFileRecord(file: File, id?: FileId): FileRecord {
type: file.type,
lastModified: file.lastModified,
quickKey: createQuickKey(file),
createdAt: Date.now()
createdAt: Date.now(),
isLeaf: true // New files are leaf nodes by default
};
}

View File

@ -239,20 +239,29 @@ export function groupFilesByOriginal(fileRecords: FileRecord[]): Map<string, Fil
}
/**
* Get the latest version of each file group
* Get the latest version of each file group (optimized version using leaf flags)
*/
export function getLatestVersions(fileRecords: FileRecord[]): FileRecord[] {
const groups = groupFilesByOriginal(fileRecords);
const latestVersions: FileRecord[] = [];
// If we have leaf flags, use them for much faster filtering
const hasLeafFlags = fileRecords.some(record => record.isLeaf !== undefined);
if (hasLeafFlags) {
// Fast path: just return files marked as leaf nodes
return fileRecords.filter(record => record.isLeaf !== false); // Default to true if undefined
} else {
// Fallback to expensive calculation for backward compatibility
const groups = groupFilesByOriginal(fileRecords);
const latestVersions: FileRecord[] = [];
for (const [_, records] of groups) {
if (records.length > 0) {
// First item is the latest version (sorted desc by version number)
latestVersions.push(records[0]);
for (const [_, records] of groups) {
if (records.length > 0) {
// First item is the latest version (sorted desc by version number)
latestVersions.push(records[0]);
}
}
}
return latestVersions;
return latestVersions;
}
}
/**
@ -297,6 +306,34 @@ export function generateVersionName(record: FileRecord): string {
return `${baseName}${versionInfo}${toolInfo}.pdf`;
}
/**
* Get recent files efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFiles(): Promise<import('../services/fileStorage').StoredFile[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFiles();
} catch (error) {
console.warn('Failed to get recent leaf files from IndexedDB:', error);
return [];
}
}
/**
* Get recent file metadata efficiently using leaf flags from IndexedDB
* This is much faster than loading all files and calculating leaf nodes
*/
export async function getRecentLeafFileMetadata(): Promise<Omit<import('../services/fileStorage').StoredFile, 'data'>[]> {
try {
const { fileStorage } = await import('../services/fileStorage');
return await fileStorage.getLeafFileMetadata();
} catch (error) {
console.warn('Failed to get recent leaf file metadata from IndexedDB:', error);
return [];
}
}
/**
* Create metadata for storing files with history information
*/
@ -311,7 +348,8 @@ export async function createFileMetadataWithHistory(
type: file.type,
size: file.size,
lastModified: file.lastModified,
thumbnail
thumbnail,
isLeaf: true // New files are leaf nodes by default
};
// Extract metadata for PDF files