From 2c9537f786e365611936f44f4489dee11dce1337 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.> Date: Thu, 10 Jul 2025 14:18:35 +0100 Subject: [PATCH] multi file async --- .../software/common/model/job/JobResult.java | 81 +++++++- .../software/common/model/job/ResultFile.java | 26 +++ .../software/common/service/FileStorage.java | 17 ++ .../software/common/service/TaskManager.java | 180 ++++++++++++++++-- .../common/controller/JobController.java | 131 +++++++++++++ 5 files changed, 421 insertions(+), 14 deletions(-) create mode 100644 common/src/main/java/stirling/software/common/model/job/ResultFile.java diff --git a/common/src/main/java/stirling/software/common/model/job/JobResult.java b/common/src/main/java/stirling/software/common/model/job/JobResult.java index a621f2db2..5c6946122 100644 --- a/common/src/main/java/stirling/software/common/model/job/JobResult.java +++ b/common/src/main/java/stirling/software/common/model/job/JobResult.java @@ -1,6 +1,7 @@ package stirling.software.common.model.job; import java.time.LocalDateTime; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; @@ -26,15 +27,18 @@ public class JobResult { /** Error message if the job failed */ private String error; - /** The file ID of the result file, if applicable */ + /** The file ID of the result file, if applicable (legacy single file support) */ private String fileId; - /** Original file name, if applicable */ + /** Original file name, if applicable (legacy single file support) */ private String originalFileName; - /** MIME type of the result, if applicable */ + /** MIME type of the result, if applicable (legacy single file support) */ private String contentType; + /** List of result files for jobs that produce multiple files */ + private List resultFiles; + /** Time when the job was created */ private LocalDateTime createdAt; @@ -101,6 +105,77 @@ public class JobResult { this.completedAt = LocalDateTime.now(); } + /** + * Mark this job as complete with multiple file results + * + * @param resultFiles The list of result files + */ + public void completeWithFiles(List resultFiles) { + this.complete = true; + this.resultFiles = new ArrayList<>(resultFiles); + this.completedAt = LocalDateTime.now(); + } + + /** + * Mark this job as complete with a single file result (convenience method) + * + * @param fileId The file ID of the result + * @param fileName The file name + * @param contentType The content type of the file + * @param fileSize The size of the file in bytes + */ + public void completeWithSingleFile(String fileId, String fileName, String contentType, long fileSize) { + ResultFile resultFile = ResultFile.builder() + .fileId(fileId) + .fileName(fileName) + .contentType(contentType) + .fileSize(fileSize) + .build(); + completeWithFiles(List.of(resultFile)); + } + + /** + * Check if this job has file results + * + * @return true if this job has file results, false otherwise + */ + public boolean hasFiles() { + return (resultFiles != null && !resultFiles.isEmpty()) || fileId != null; + } + + /** + * Check if this job has multiple file results + * + * @return true if this job has multiple file results, false otherwise + */ + public boolean hasMultipleFiles() { + return resultFiles != null && resultFiles.size() > 1; + } + + /** + * Get all result files (includes legacy single file converted to ResultFile) + * + * @return List of result files + */ + public List getAllResultFiles() { + if (resultFiles != null && !resultFiles.isEmpty()) { + return Collections.unmodifiableList(resultFiles); + } + + // Legacy single file support + if (fileId != null) { + ResultFile legacyFile = ResultFile.builder() + .fileId(fileId) + .fileName(originalFileName) + .contentType(contentType) + .fileSize(0) // Size not tracked in legacy format + .build(); + return List.of(legacyFile); + } + + return Collections.emptyList(); + } + /** * Add a note to this job * diff --git a/common/src/main/java/stirling/software/common/model/job/ResultFile.java b/common/src/main/java/stirling/software/common/model/job/ResultFile.java new file mode 100644 index 000000000..0354e2904 --- /dev/null +++ b/common/src/main/java/stirling/software/common/model/job/ResultFile.java @@ -0,0 +1,26 @@ +package stirling.software.common.model.job; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Represents a single file result from a job execution */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ResultFile { + + /** The file ID for accessing the file */ + private String fileId; + + /** The original file name */ + private String fileName; + + /** MIME type of the file */ + private String contentType; + + /** Size of the file in bytes */ + private long fileSize; +} \ No newline at end of file diff --git a/common/src/main/java/stirling/software/common/service/FileStorage.java b/common/src/main/java/stirling/software/common/service/FileStorage.java index e200ded8a..e02c39434 100644 --- a/common/src/main/java/stirling/software/common/service/FileStorage.java +++ b/common/src/main/java/stirling/software/common/service/FileStorage.java @@ -131,6 +131,23 @@ public class FileStorage { return Files.exists(filePath); } + /** + * Get the size of a file by its ID without loading the content into memory + * + * @param fileId The ID of the file + * @return The size of the file in bytes + * @throws IOException If the file doesn't exist or can't be read + */ + public long getFileSize(String fileId) throws IOException { + Path filePath = getFilePath(fileId); + + if (!Files.exists(filePath)) { + throw new IOException("File not found with ID: " + fileId); + } + + return Files.size(filePath); + } + /** * Get the path for a file ID * diff --git a/common/src/main/java/stirling/software/common/service/TaskManager.java b/common/src/main/java/stirling/software/common/service/TaskManager.java index c2b3ba8a8..d884b7ace 100644 --- a/common/src/main/java/stirling/software/common/service/TaskManager.java +++ b/common/src/main/java/stirling/software/common/service/TaskManager.java @@ -1,12 +1,21 @@ package stirling.software.common.service; +import java.io.ByteArrayInputStream; +import java.io.IOException; import java.time.LocalDateTime; import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import org.springframework.http.MediaType; +import org.springframework.web.multipart.MultipartFile; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; @@ -17,6 +26,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.job.JobResult; import stirling.software.common.model.job.JobStats; +import stirling.software.common.model.job.ResultFile; /** Manages async tasks and their results */ @Service @@ -80,10 +90,40 @@ public class TaskManager { public void setFileResult( String jobId, String fileId, String originalFileName, String contentType) { JobResult jobResult = getOrCreateJobResult(jobId); + + // Check if this is a ZIP file that should be extracted + if (isZipFile(contentType, originalFileName)) { + try { + List extractedFiles = extractZipToIndividualFiles(fileId, originalFileName); + if (!extractedFiles.isEmpty()) { + jobResult.completeWithFiles(extractedFiles); + log.debug("Set multiple file results for job ID: {} with {} files extracted from ZIP", + jobId, extractedFiles.size()); + return; + } + } catch (Exception e) { + log.warn("Failed to extract ZIP file for job {}: {}. Falling back to single file result.", + jobId, e.getMessage()); + } + } + + // Fallback to legacy single file or non-ZIP file jobResult.completeWithFile(fileId, originalFileName, contentType); log.debug("Set file result for job ID: {} with file ID: {}", jobId, fileId); } + /** + * Set the result of a task as multiple files + * + * @param jobId The job ID + * @param resultFiles The list of result files + */ + public void setMultipleFileResults(String jobId, List resultFiles) { + JobResult jobResult = getOrCreateJobResult(jobId); + jobResult.completeWithFiles(resultFiles); + log.debug("Set multiple file results for job ID: {} with {} files", jobId, resultFiles.size()); + } + /** * Set an error for a task * @@ -250,17 +290,8 @@ public class TaskManager { && result.getCompletedAt() != null && result.getCompletedAt().isBefore(expiryThreshold)) { - // If the job has a file result, delete the file - if (result.getFileId() != null) { - try { - fileStorage.deleteFile(result.getFileId()); - } catch (Exception e) { - log.warn( - "Failed to delete file for job {}: {}", - entry.getKey(), - e.getMessage()); - } - } + // Clean up file results + cleanupJobFiles(result, entry.getKey()); // Remove the job result jobResults.remove(entry.getKey()); @@ -290,4 +321,131 @@ public class TaskManager { cleanupExecutor.shutdownNow(); } } + + /** + * Check if a file is a ZIP file based on content type and filename + */ + private boolean isZipFile(String contentType, String fileName) { + if (contentType != null && (contentType.equals("application/zip") || + contentType.equals("application/x-zip-compressed") || + contentType.equals(MediaType.APPLICATION_OCTET_STREAM_VALUE))) { + return true; + } + + if (fileName != null && fileName.toLowerCase().endsWith(".zip")) { + return true; + } + + return false; + } + + /** + * Extract a ZIP file into individual files and store them + */ + private List extractZipToIndividualFiles(String zipFileId, String originalZipFileName) + throws IOException { + List extractedFiles = new ArrayList<>(); + + MultipartFile zipFile = fileStorage.retrieveFile(zipFileId); + + try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { + ZipEntry entry; + while ((entry = zipIn.getNextEntry()) != null) { + if (!entry.isDirectory()) { + byte[] fileContent = zipIn.readAllBytes(); + String contentType = determineContentType(entry.getName()); + String individualFileId = fileStorage.storeBytes(fileContent, entry.getName()); + + ResultFile resultFile = ResultFile.builder() + .fileId(individualFileId) + .fileName(entry.getName()) + .contentType(contentType) + .fileSize(fileContent.length) + .build(); + + extractedFiles.add(resultFile); + log.debug("Extracted file: {} (size: {} bytes)", entry.getName(), fileContent.length); + } + zipIn.closeEntry(); + } + } + + // Clean up the original ZIP file after extraction + try { + fileStorage.deleteFile(zipFileId); + log.debug("Cleaned up original ZIP file: {}", zipFileId); + } catch (Exception e) { + log.warn("Failed to clean up original ZIP file {}: {}", zipFileId, e.getMessage()); + } + + return extractedFiles; + } + + /** + * Determine content type based on file extension + */ + private String determineContentType(String fileName) { + if (fileName == null) { + return MediaType.APPLICATION_OCTET_STREAM_VALUE; + } + + String lowerName = fileName.toLowerCase(); + if (lowerName.endsWith(".pdf")) { + return MediaType.APPLICATION_PDF_VALUE; + } else if (lowerName.endsWith(".txt")) { + return MediaType.TEXT_PLAIN_VALUE; + } else if (lowerName.endsWith(".json")) { + return MediaType.APPLICATION_JSON_VALUE; + } else if (lowerName.endsWith(".xml")) { + return MediaType.APPLICATION_XML_VALUE; + } else if (lowerName.endsWith(".jpg") || lowerName.endsWith(".jpeg")) { + return MediaType.IMAGE_JPEG_VALUE; + } else if (lowerName.endsWith(".png")) { + return MediaType.IMAGE_PNG_VALUE; + } else { + return MediaType.APPLICATION_OCTET_STREAM_VALUE; + } + } + + /** + * Clean up files associated with a job result + */ + private void cleanupJobFiles(JobResult result, String jobId) { + // Clean up legacy single file + if (result.getFileId() != null) { + try { + fileStorage.deleteFile(result.getFileId()); + } catch (Exception e) { + log.warn("Failed to delete legacy file for job {}: {}", jobId, e.getMessage()); + } + } + + // Clean up multiple files + if (result.getResultFiles() != null) { + for (ResultFile resultFile : result.getResultFiles()) { + try { + fileStorage.deleteFile(resultFile.getFileId()); + } catch (Exception e) { + log.warn("Failed to delete file {} for job {}: {}", + resultFile.getFileId(), jobId, e.getMessage()); + } + } + } + } + + /** + * Find the ResultFile metadata for a given file ID by searching through all job results + */ + public ResultFile findResultFileByFileId(String fileId) { + for (JobResult jobResult : jobResults.values()) { + if (jobResult.hasFiles()) { + for (ResultFile resultFile : jobResult.getAllResultFiles()) { + if (fileId.equals(resultFile.getFileId())) { + return resultFile; + } + } + } + } + return null; + } } diff --git a/stirling-pdf/src/main/java/stirling/software/common/controller/JobController.java b/stirling-pdf/src/main/java/stirling/software/common/controller/JobController.java index 510488a64..8bbf01a76 100644 --- a/stirling-pdf/src/main/java/stirling/software/common/controller/JobController.java +++ b/stirling-pdf/src/main/java/stirling/software/common/controller/JobController.java @@ -1,5 +1,6 @@ package stirling.software.common.controller; +import java.util.List; import java.util.Map; import org.springframework.http.ResponseEntity; @@ -14,6 +15,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.job.JobResult; +import stirling.software.common.model.job.ResultFile; import stirling.software.common.service.FileStorage; import stirling.software.common.service.JobQueue; import stirling.software.common.service.TaskManager; @@ -78,6 +80,18 @@ public class JobController { return ResponseEntity.badRequest().body("Job failed: " + result.getError()); } + // Handle multiple files - return metadata for client to download individually + if (result.hasMultipleFiles()) { + return ResponseEntity.ok() + .header("Content-Type", "application/json") + .body(Map.of( + "jobId", jobId, + "hasMultipleFiles", true, + "files", result.getAllResultFiles() + )); + } + + // Handle single file (legacy support) if (result.getFileId() != null) { try { byte[] fileContent = fileStorage.retrieveBytes(result.getFileId()); @@ -170,4 +184,121 @@ public class JobController { } } } + + /** + * Get the list of files for a job + * + * @param jobId The job ID + * @return List of files for the job + */ + @GetMapping("/api/v1/general/job/{jobId}/result/files") + public ResponseEntity getJobFiles(@PathVariable("jobId") String jobId) { + JobResult result = taskManager.getJobResult(jobId); + if (result == null) { + return ResponseEntity.notFound().build(); + } + + if (!result.isComplete()) { + return ResponseEntity.badRequest().body("Job is not complete yet"); + } + + if (result.getError() != null) { + return ResponseEntity.badRequest().body("Job failed: " + result.getError()); + } + + List files = result.getAllResultFiles(); + return ResponseEntity.ok(Map.of( + "jobId", jobId, + "fileCount", files.size(), + "files", files + )); + } + + /** + * Get metadata for an individual file by its file ID + * + * @param fileId The file ID + * @return The file metadata + */ + @GetMapping("/api/v1/general/files/{fileId}/metadata") + public ResponseEntity getFileMetadata(@PathVariable("fileId") String fileId) { + try { + // Verify file exists + if (!fileStorage.fileExists(fileId)) { + return ResponseEntity.notFound().build(); + } + + // Find the file metadata from any job that contains this file + ResultFile resultFile = findResultFileByFileId(fileId); + + if (resultFile != null) { + return ResponseEntity.ok(resultFile); + } else { + // File exists but no metadata found, get basic info efficiently + long fileSize = fileStorage.getFileSize(fileId); + return ResponseEntity.ok(Map.of( + "fileId", fileId, + "fileName", "unknown", + "contentType", "application/octet-stream", + "fileSize", fileSize + )); + } + } catch (Exception e) { + log.error("Error retrieving file metadata {}: {}", fileId, e.getMessage(), e); + return ResponseEntity.internalServerError() + .body("Error retrieving file metadata: " + e.getMessage()); + } + } + + /** + * Download an individual file by its file ID + * + * @param fileId The file ID + * @return The file content + */ + @GetMapping("/api/v1/general/files/{fileId}") + public ResponseEntity downloadFile(@PathVariable("fileId") String fileId) { + try { + // Verify file exists + if (!fileStorage.fileExists(fileId)) { + return ResponseEntity.notFound().build(); + } + + // Retrieve file content + byte[] fileContent = fileStorage.retrieveBytes(fileId); + + // Find the file metadata from any job that contains this file + // This is for getting the original filename and content type + ResultFile resultFile = findResultFileByFileId(fileId); + + String fileName = resultFile != null ? resultFile.getFileName() : "download"; + String contentType = resultFile != null ? resultFile.getContentType() : "application/octet-stream"; + + return ResponseEntity.ok() + .header("Content-Type", contentType) + .header( + "Content-Disposition", + "attachment; filename=\"" + fileName + "\"") + .body(fileContent); + } catch (Exception e) { + log.error("Error retrieving file {}: {}", fileId, e.getMessage(), e); + return ResponseEntity.internalServerError() + .body("Error retrieving file: " + e.getMessage()); + } + } + + /** + * Find ResultFile metadata by fileId from any job + * This is a helper method to get original filename and content type + * + * @param fileId The file ID to search for + * @return ResultFile if found, null otherwise + */ + private ResultFile findResultFileByFileId(String fileId) { + // Since we don't have a direct way to map fileId to ResultFile, + // this would need to be implemented by searching through job results + // For now, we'll return null and use defaults + // TODO: Consider adding a fileId -> ResultFile mapping in TaskManager + return taskManager.findResultFileByFileId(fileId); + } }