From 9923411ade7ff37ffe50603b700b397b8674f17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+Balazs-Szucs@users.noreply.github.com> Date: Thu, 19 Jun 2025 17:18:57 +0200 Subject: [PATCH] Eml-to-pdf bug fixes: removal of incompatible fonts, removal of emoji in favor of @, jakarta-mail dependency handling improvements (#3770) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes This pull request introduces enhancements and code cleanup to the `EmlToPdf` utility class, focusing on improving email-to-PDF conversion, handling embedded images, and simplifying the codebase. Key changes include better handling of inline images, enhanced Jakarta Mail dependency checks, and refactoring for improved readability and maintainability. ### Enhancements to Email-to-PDF Conversion: * Added support for processing inline images (`cid:` references) by converting them into data URIs for proper inline display. * Improved attachment handling to always include embedded images regardless of size, ensuring inline display functionality. * Enhanced email HTML generation to process inline images and include them in the email body. ### Attachment Handling Enhancements: * Replaced the attachment icon placeholder (`icon` or 📎 emoji) with a new marker (`@`) for consistency across the application (non-fat images did not support the emoji, however @ is supported accross the board.) * Updated the annotation logic to use `AttachmentMarkerPositionFinder` instead of `EmojiPositionFinder`, aligning with the new attachment marker system. ### Jakarta Mail Dependency Handling: * Added detailed checks for core Jakarta Mail classes to determine availability in different environments (e.g., Docker). * Introduced validation for Jakarta Mail multipart and part types to prevent processing invalid objects. * Explicitly parse in the classes: - jakarta.mail.internet.MimeMessage – Core email message parsing - jakarta.mail.Session – Email session management - jakarta.mail.internet.MimeUtility – MIME encoding/decoding utilities - jakarta.mail.internet.MimePart – Individual MIME parts (attachments, body parts) - jakarta.mail.internet.MimeMultipart – Multi-part MIME messages - jakarta.mail.Multipart – Base multipart interface - jakarta.mail.Part – Base part interface ### Code Cleanup and Refactoring: * Simplified utility classes (`StyleConstants`, `MimeConstants`, `FileSizeConstants`) by removing unnecessary constructors and unused constants. * Updated log messages for clarity, such as distinguishing between general content processing errors and multipart-specific issues. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- .../software/common/util/EmlToPdf.java | 295 +++++++++++++----- .../SPDF/config/EndpointConfiguration.java | 3 +- 2 files changed, 221 insertions(+), 77 deletions(-) diff --git a/common/src/main/java/stirling/software/common/util/EmlToPdf.java b/common/src/main/java/stirling/software/common/util/EmlToPdf.java index a97673745..b08bc16a5 100644 --- a/common/src/main/java/stirling/software/common/util/EmlToPdf.java +++ b/common/src/main/java/stirling/software/common/util/EmlToPdf.java @@ -46,12 +46,10 @@ import stirling.software.common.model.api.converters.EmlToPdfRequest; @Slf4j @UtilityClass public class EmlToPdf { - private static final class StyleConstants { // Font and layout constants static final int DEFAULT_FONT_SIZE = 12; - static final String DEFAULT_FONT_FAMILY = - "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif"; + static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif"; static final float DEFAULT_LINE_HEIGHT = 1.4f; static final String DEFAULT_ZOOM = "1.0"; @@ -72,20 +70,15 @@ public class EmlToPdf { static final int EML_CHECK_LENGTH = 8192; static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2; - private StyleConstants() { - // Utility class - prevent instantiation - } + private StyleConstants() {} } private static final class MimeConstants { static final Pattern MIME_ENCODED_PATTERN = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); - static final String PAPERCLIP_EMOJI = "\uD83D\uDCCE"; // 📎 - static final String ATTACHMENT_ICON_PLACEHOLDER = "icon"; + static final String ATTACHMENT_MARKER = "@"; - private MimeConstants() { - // Utility class - prevent instantiation - } + private MimeConstants() {} } private static final class FileSizeConstants { @@ -93,9 +86,7 @@ public class EmlToPdf { static final long BYTES_IN_MB = BYTES_IN_KB * 1024L; static final long BYTES_IN_GB = BYTES_IN_MB * 1024L; - private FileSizeConstants() { - // Utility class - prevent instantiation - } + private FileSizeConstants() {} } // Cached Jakarta Mail availability check @@ -104,8 +95,15 @@ public class EmlToPdf { private static boolean isJakartaMailAvailable() { if (jakartaMailAvailable == null) { try { + // Check for core Jakarta Mail classes Class.forName("jakarta.mail.internet.MimeMessage"); Class.forName("jakarta.mail.Session"); + Class.forName("jakarta.mail.internet.MimeUtility"); + Class.forName("jakarta.mail.internet.MimePart"); + Class.forName("jakarta.mail.internet.MimeMultipart"); + Class.forName("jakarta.mail.Multipart"); + Class.forName("jakarta.mail.Part"); + jakartaMailAvailable = true; log.debug("Jakarta Mail libraries are available"); } catch (ClassNotFoundException e) { @@ -172,7 +170,7 @@ public class EmlToPdf { } } - private static void validateEmlInput(byte[] emlBytes) throws IOException { + private static void validateEmlInput(byte[] emlBytes) { if (emlBytes == null || emlBytes.length == 0) { throw new IllegalArgumentException("EML file is empty or null"); } @@ -208,7 +206,6 @@ public class EmlToPdf { disableSanitize); } catch (IOException | InterruptedException e) { log.warn("Initial HTML to PDF conversion failed, trying with simplified HTML"); - // Try with simplified HTML String simplifiedHtml = simplifyHtmlContent(htmlContent); return FileToPdf.convertHtmlToPdf( weasyprintPath, @@ -259,7 +256,7 @@ public class EmlToPdf { html.append("
\n"); html.append("