From 9923411ade7ff37ffe50603b700b397b8674f17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+Balazs-Szucs@users.noreply.github.com> Date: Thu, 19 Jun 2025 17:18:57 +0200 Subject: [PATCH] Eml-to-pdf bug fixes: removal of incompatible fonts, removal of emoji in favor of @, jakarta-mail dependency handling improvements (#3770) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes This pull request introduces enhancements and code cleanup to the `EmlToPdf` utility class, focusing on improving email-to-PDF conversion, handling embedded images, and simplifying the codebase. Key changes include better handling of inline images, enhanced Jakarta Mail dependency checks, and refactoring for improved readability and maintainability. ### Enhancements to Email-to-PDF Conversion: * Added support for processing inline images (`cid:` references) by converting them into data URIs for proper inline display. * Improved attachment handling to always include embedded images regardless of size, ensuring inline display functionality. * Enhanced email HTML generation to process inline images and include them in the email body. ### Attachment Handling Enhancements: * Replaced the attachment icon placeholder (`icon` or 📎 emoji) with a new marker (`@`) for consistency across the application (non-fat images did not support the emoji, however @ is supported accross the board.) * Updated the annotation logic to use `AttachmentMarkerPositionFinder` instead of `EmojiPositionFinder`, aligning with the new attachment marker system. ### Jakarta Mail Dependency Handling: * Added detailed checks for core Jakarta Mail classes to determine availability in different environments (e.g., Docker). * Introduced validation for Jakarta Mail multipart and part types to prevent processing invalid objects. * Explicitly parse in the classes: - jakarta.mail.internet.MimeMessage – Core email message parsing - jakarta.mail.Session – Email session management - jakarta.mail.internet.MimeUtility – MIME encoding/decoding utilities - jakarta.mail.internet.MimePart – Individual MIME parts (attachments, body parts) - jakarta.mail.internet.MimeMultipart – Multi-part MIME messages - jakarta.mail.Multipart – Base multipart interface - jakarta.mail.Part – Base part interface ### Code Cleanup and Refactoring: * Simplified utility classes (`StyleConstants`, `MimeConstants`, `FileSizeConstants`) by removing unnecessary constructors and unused constants. * Updated log messages for clarity, such as distinguishing between general content processing errors and multipart-specific issues. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- .../software/common/util/EmlToPdf.java | 295 +++++++++++++----- .../SPDF/config/EndpointConfiguration.java | 3 +- 2 files changed, 221 insertions(+), 77 deletions(-) diff --git a/common/src/main/java/stirling/software/common/util/EmlToPdf.java b/common/src/main/java/stirling/software/common/util/EmlToPdf.java index a97673745..b08bc16a5 100644 --- a/common/src/main/java/stirling/software/common/util/EmlToPdf.java +++ b/common/src/main/java/stirling/software/common/util/EmlToPdf.java @@ -46,12 +46,10 @@ import stirling.software.common.model.api.converters.EmlToPdfRequest; @Slf4j @UtilityClass public class EmlToPdf { - private static final class StyleConstants { // Font and layout constants static final int DEFAULT_FONT_SIZE = 12; - static final String DEFAULT_FONT_FAMILY = - "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif"; + static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif"; static final float DEFAULT_LINE_HEIGHT = 1.4f; static final String DEFAULT_ZOOM = "1.0"; @@ -72,20 +70,15 @@ public class EmlToPdf { static final int EML_CHECK_LENGTH = 8192; static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2; - private StyleConstants() { - // Utility class - prevent instantiation - } + private StyleConstants() {} } private static final class MimeConstants { static final Pattern MIME_ENCODED_PATTERN = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); - static final String PAPERCLIP_EMOJI = "\uD83D\uDCCE"; // 📎 - static final String ATTACHMENT_ICON_PLACEHOLDER = "icon"; + static final String ATTACHMENT_MARKER = "@"; - private MimeConstants() { - // Utility class - prevent instantiation - } + private MimeConstants() {} } private static final class FileSizeConstants { @@ -93,9 +86,7 @@ public class EmlToPdf { static final long BYTES_IN_MB = BYTES_IN_KB * 1024L; static final long BYTES_IN_GB = BYTES_IN_MB * 1024L; - private FileSizeConstants() { - // Utility class - prevent instantiation - } + private FileSizeConstants() {} } // Cached Jakarta Mail availability check @@ -104,8 +95,15 @@ public class EmlToPdf { private static boolean isJakartaMailAvailable() { if (jakartaMailAvailable == null) { try { + // Check for core Jakarta Mail classes Class.forName("jakarta.mail.internet.MimeMessage"); Class.forName("jakarta.mail.Session"); + Class.forName("jakarta.mail.internet.MimeUtility"); + Class.forName("jakarta.mail.internet.MimePart"); + Class.forName("jakarta.mail.internet.MimeMultipart"); + Class.forName("jakarta.mail.Multipart"); + Class.forName("jakarta.mail.Part"); + jakartaMailAvailable = true; log.debug("Jakarta Mail libraries are available"); } catch (ClassNotFoundException e) { @@ -172,7 +170,7 @@ public class EmlToPdf { } } - private static void validateEmlInput(byte[] emlBytes) throws IOException { + private static void validateEmlInput(byte[] emlBytes) { if (emlBytes == null || emlBytes.length == 0) { throw new IllegalArgumentException("EML file is empty or null"); } @@ -208,7 +206,6 @@ public class EmlToPdf { disableSanitize); } catch (IOException | InterruptedException e) { log.warn("Initial HTML to PDF conversion failed, trying with simplified HTML"); - // Try with simplified HTML String simplifiedHtml = simplifyHtmlContent(htmlContent); return FileToPdf.convertHtmlToPdf( weasyprintPath, @@ -259,7 +256,7 @@ public class EmlToPdf { html.append("\n"); html.append("").append(escapeHtml(subject)).append("\n"); html.append("\n"); html.append("\n"); @@ -298,7 +295,7 @@ public class EmlToPdf { html.append("

Attachments

\n"); html.append(attachmentInfo); - // Add status message about attachment inclusion + // Add a status message about attachment inclusion if (request != null && request.isIncludeAttachments()) { html.append("
\n"); html.append( @@ -316,7 +313,7 @@ public class EmlToPdf { // Show advanced features status if requested assert request != null; - if (request != null && request.getFileInput().isEmpty()) { + if (request.getFileInput().isEmpty()) { html.append("
\n"); html.append( "

Note: Some advanced features require Jakarta Mail dependencies.

\n"); @@ -340,8 +337,10 @@ public class EmlToPdf { sessionClass.getMethod("getDefaultInstance", Properties.class); Object session = getDefaultInstance.invoke(null, new Properties()); + // Cast the session object to the proper type for the constructor + Class[] constructorArgs = new Class[] {sessionClass, InputStream.class}; Constructor mimeMessageConstructor = - mimeMessageClass.getConstructor(sessionClass, InputStream.class); + mimeMessageClass.getConstructor(constructorArgs); Object message = mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes)); @@ -488,7 +487,7 @@ public class EmlToPdf { attachmentInfo .append("
") .append("") - .append(MimeConstants.ATTACHMENT_ICON_PLACEHOLDER) + .append(MimeConstants.ATTACHMENT_MARKER) .append(" ") .append("") .append(escapeHtml(filename)) @@ -651,6 +650,10 @@ public class EmlToPdf { } private static String processEmailHtmlBody(String htmlBody) { + return processEmailHtmlBody(htmlBody, null); + } + + private static String processEmailHtmlBody(String htmlBody, EmailContent emailContent) { if (htmlBody == null) return ""; String processed = htmlBody; @@ -659,10 +662,83 @@ public class EmlToPdf { processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", ""); processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", ""); + // Process inline images (cid: references) if we have email content with attachments + if (emailContent != null && !emailContent.getAttachments().isEmpty()) { + processed = processInlineImages(processed, emailContent); + } + return processed; } - private static void appendEnhancedStyles(StringBuilder html, EmlToPdfRequest request) { + private static String processInlineImages(String htmlContent, EmailContent emailContent) { + if (htmlContent == null || emailContent == null) return htmlContent; + + // Create a map of Content-ID to attachment data + Map contentIdMap = new HashMap<>(); + for (EmailAttachment attachment : emailContent.getAttachments()) { + if (attachment.isEmbedded() + && attachment.getContentId() != null + && attachment.getData() != null) { + contentIdMap.put(attachment.getContentId(), attachment); + } + } + + if (contentIdMap.isEmpty()) return htmlContent; + + // Pattern to match cid: references in img src attributes + Pattern cidPattern = + Pattern.compile( + "(?i)]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>", + Pattern.CASE_INSENSITIVE); + Matcher matcher = cidPattern.matcher(htmlContent); + + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + String contentId = matcher.group(1); + EmailAttachment attachment = contentIdMap.get(contentId); + + if (attachment != null && attachment.getData() != null) { + // Convert to data URI + String mimeType = attachment.getContentType(); + if (mimeType == null || mimeType.isEmpty()) { + // Try to determine MIME type from filename + String filename = attachment.getFilename(); + if (filename != null) { + if (filename.toLowerCase().endsWith(".png")) { + mimeType = "image/png"; + } else if (filename.toLowerCase().endsWith(".jpg") + || filename.toLowerCase().endsWith(".jpeg")) { + mimeType = "image/jpeg"; + } else if (filename.toLowerCase().endsWith(".gif")) { + mimeType = "image/gif"; + } else if (filename.toLowerCase().endsWith(".bmp")) { + mimeType = "image/bmp"; + } else { + mimeType = "image/png"; // fallback + } + } else { + mimeType = "image/png"; // fallback + } + } + + String base64Data = Base64.getEncoder().encodeToString(attachment.getData()); + String dataUri = "data:" + mimeType + ";base64," + base64Data; + + // Replace the cid: reference with the data URI + String replacement = + matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri); + matcher.appendReplacement(result, Matcher.quoteReplacement(replacement)); + } else { + // Keep original if attachment not found + matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0))); + } + } + matcher.appendTail(result); + + return result.toString(); + } + + private static void appendEnhancedStyles(StringBuilder html) { int fontSize = StyleConstants.DEFAULT_FONT_SIZE; String textColor = StyleConstants.DEFAULT_TEXT_COLOR; String backgroundColor = StyleConstants.DEFAULT_BACKGROUND_COLOR; @@ -844,7 +920,7 @@ public class EmlToPdf { processMultipartAdvanced(messageContent, content, request); } } catch (Exception e) { - log.warn("Error processing multipart content: {}", e.getMessage()); + log.warn("Error processing content: {}", e.getMessage()); } } @@ -861,6 +937,12 @@ public class EmlToPdf { private static void processMultipartAdvanced( Object multipart, EmailContent content, EmlToPdfRequest request) { try { + // Enhanced multipart type checking + if (!isValidJakartaMailMultipart(multipart)) { + log.warn("Invalid Jakarta Mail multipart type: {}", multipart.getClass().getName()); + return; + } + Class multipartClass = multipart.getClass(); java.lang.reflect.Method getCount = multipartClass.getMethod("getCount"); int count = (Integer) getCount.invoke(multipart); @@ -881,6 +963,11 @@ public class EmlToPdf { private static void processPartAdvanced( Object part, EmailContent content, EmlToPdfRequest request) { try { + if (!isValidJakartaMailPart(part)) { + log.warn("Invalid Jakarta Mail part type: {}", part.getClass().getName()); + return; + } + Class partClass = part.getClass(); java.lang.reflect.Method isMimeType = partClass.getMethod("isMimeType", String.class); java.lang.reflect.Method getContent = partClass.getMethod("getContent"); @@ -914,10 +1001,18 @@ public class EmlToPdf { String[] contentIdHeaders = (String[]) getHeader.invoke(part, "Content-ID"); if (contentIdHeaders != null && contentIdHeaders.length > 0) { attachment.setEmbedded(true); + // Store the Content-ID, removing angle brackets if present + String contentId = contentIdHeaders[0]; + if (contentId.startsWith("<") && contentId.endsWith(">")) { + contentId = contentId.substring(1, contentId.length() - 1); + } + attachment.setContentId(contentId); } - // Extract attachment data only if attachments should be included - if (request != null && request.isIncludeAttachments()) { + // Extract attachment data if attachments should be included OR if it's an + // embedded image (needed for inline display) + if ((request != null && request.isIncludeAttachments()) + || attachment.isEmbedded()) { try { Object attachmentContent = getContent.invoke(part); byte[] attachmentData = null; @@ -938,15 +1033,23 @@ public class EmlToPdf { if (attachmentData != null) { // Check size limit (use default 10MB if request is null) - long maxSizeMB = request.getMaxAttachmentSizeMB(); + long maxSizeMB = + request != null ? request.getMaxAttachmentSizeMB() : 10L; long maxSizeBytes = maxSizeMB * 1024 * 1024; if (attachmentData.length <= maxSizeBytes) { attachment.setData(attachmentData); attachment.setSizeBytes(attachmentData.length); } else { - // Still show attachment info even if too large - attachment.setSizeBytes(attachmentData.length); + // For embedded images, always include data regardless of size + // to ensure inline display works + if (attachment.isEmbedded()) { + attachment.setData(attachmentData); + attachment.setSizeBytes(attachmentData.length); + } else { + // Still show attachment info even if too large + attachment.setSizeBytes(attachmentData.length); + } } } } catch (Exception e) { @@ -982,7 +1085,7 @@ public class EmlToPdf { html.append("\n"); html.append("").append(escapeHtml(content.getSubject())).append("\n"); html.append("\n"); html.append("\n"); @@ -1006,7 +1109,7 @@ public class EmlToPdf { html.append("
\n"); if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) { - html.append(processEmailHtmlBody(content.getHtmlBody())); + html.append(processEmailHtmlBody(content.getHtmlBody(), content)); } else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) { html.append("
"); html.append(convertTextToHtml(content.getTextBody())); @@ -1039,7 +1142,7 @@ public class EmlToPdf { .append(uniqueId) .append("\">") .append("") - .append(MimeConstants.PAPERCLIP_EMOJI) + .append(MimeConstants.ATTACHMENT_MARKER) .append(" ") .append("") .append(escapeHtml(safeMimeDecode(attachment.getFilename()))) @@ -1205,24 +1308,24 @@ public class EmlToPdf { return; } - // 1. Find the screen position of all emoji anchors - EmojiPositionFinder finder = new EmojiPositionFinder(); + // 1. Find the screen position of all attachment markers + AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder(); finder.setSortByPosition(true); // Process pages in order finder.getText(document); - List emojiPositions = finder.getPositions(); + List markerPositions = finder.getPositions(); - // 2. Warn if the number of anchors and attachments don't match - if (emojiPositions.size() != attachments.size()) { + // 2. Warn if the number of markers and attachments don't match + if (markerPositions.size() != attachments.size()) { log.warn( - "Found {} emoji anchors, but there are {} attachments. Annotation count may be incorrect.", - emojiPositions.size(), + "Found {} attachment markers, but there are {} attachments. Annotation count may be incorrect.", + markerPositions.size(), attachments.size()); } - // 3. Create an invisible annotation over each found emoji - int annotationsToAdd = Math.min(emojiPositions.size(), attachments.size()); + // 3. Create an invisible annotation over each found marker + int annotationsToAdd = Math.min(markerPositions.size(), attachments.size()); for (int i = 0; i < annotationsToAdd; i++) { - EmojiPosition position = emojiPositions.get(i); + MarkerPosition position = markerPositions.get(i); EmailAttachment attachment = attachments.get(i); if (attachment.getEmbeddedFilename() != null) { @@ -1356,8 +1459,6 @@ public class EmlToPdf { } } - // MIME header decoding functionality for RFC 2047 encoded headers - moved to constants - private static String decodeMimeHeader(String encodedText) { if (encodedText == null || encodedText.trim().isEmpty()) { return encodedText; @@ -1446,13 +1547,73 @@ public class EmlToPdf { } try { - return decodeMimeHeader(headerValue.trim()); + if (isJakartaMailAvailable()) { + // Use Jakarta Mail's MimeUtility for proper MIME decoding + Class mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility"); + Method decodeText = mimeUtilityClass.getMethod("decodeText", String.class); + return (String) decodeText.invoke(null, headerValue.trim()); + } else { + // Fallback to basic MIME decoding + return decodeMimeHeader(headerValue.trim()); + } } catch (Exception e) { log.warn("Failed to decode MIME header, using original: {}", headerValue, e); return headerValue; } } + private static boolean isValidJakartaMailPart(Object part) { + if (part == null) return false; + + try { + // Check if the object implements jakarta.mail.Part interface + Class partInterface = Class.forName("jakarta.mail.Part"); + if (!partInterface.isInstance(part)) { + return false; + } + + // Additional check for MimePart + try { + Class mimePartInterface = Class.forName("jakarta.mail.internet.MimePart"); + return mimePartInterface.isInstance(part); + } catch (ClassNotFoundException e) { + // MimePart not available, but Part is sufficient + return true; + } + } catch (ClassNotFoundException e) { + log.debug("Jakarta Mail Part interface not available for validation"); + return false; + } + } + + private static boolean isValidJakartaMailMultipart(Object multipart) { + if (multipart == null) return false; + + try { + // Check if the object implements jakarta.mail.Multipart interface + Class multipartInterface = Class.forName("jakarta.mail.Multipart"); + if (!multipartInterface.isInstance(multipart)) { + return false; + } + + // Additional check for MimeMultipart + try { + Class mimeMultipartClass = Class.forName("jakarta.mail.internet.MimeMultipart"); + if (mimeMultipartClass.isInstance(multipart)) { + log.debug("Found MimeMultipart instance for enhanced processing"); + return true; + } + } catch (ClassNotFoundException e) { + log.debug("MimeMultipart not available, using base Multipart interface"); + } + + return true; + } catch (ClassNotFoundException e) { + log.debug("Jakarta Mail Multipart interface not available for validation"); + return false; + } + } + @Data public static class EmailContent { private String subject; @@ -1497,15 +1658,13 @@ public class EmlToPdf { } @Data - public static class EmojiPosition { + public static class MarkerPosition { private int pageIndex; private float x; private float y; private String character; - public EmojiPosition() {} - - public EmojiPosition(int pageIndex, float x, float y, String character) { + public MarkerPosition(int pageIndex, float x, float y, String character) { this.pageIndex = pageIndex; this.x = x; this.y = y; @@ -1513,14 +1672,15 @@ public class EmlToPdf { } } - public static class EmojiPositionFinder extends org.apache.pdfbox.text.PDFTextStripper { - @Getter private final List positions = new ArrayList<>(); + public static class AttachmentMarkerPositionFinder + extends org.apache.pdfbox.text.PDFTextStripper { + @Getter private final List positions = new ArrayList<>(); private int currentPageIndex; - private boolean sortByPosition; + protected boolean sortByPosition; private boolean isInAttachmentSection; private boolean attachmentSectionFound; - public EmojiPositionFinder() throws IOException { + public AttachmentMarkerPositionFinder() { super(); this.currentPageIndex = 0; this.sortByPosition = false; @@ -1563,24 +1723,18 @@ public class EmlToPdf { isInAttachmentSection = false; } - // Only look for emojis if we are in the attachment section + // Only look for markers if we are in the attachment section if (isInAttachmentSection) { - // Look for paperclip emoji characters (U+1F4CE) - String paperclipEmoji = "\uD83D\uDCCE"; // 📎 Unicode representation - - for (int i = 0; i < string.length(); i++) { - // Check if we have a complete paperclip emoji at this position - if (i < string.length() - 1 - && string.substring(i, i + 2).equals(paperclipEmoji) - && i < textPositions.size()) { - + String attachmentMarker = MimeConstants.ATTACHMENT_MARKER; + for (int i = 0; (i = string.indexOf(attachmentMarker, i)) != -1; i++) { + if (i < textPositions.size()) { org.apache.pdfbox.text.TextPosition textPosition = textPositions.get(i); - EmojiPosition position = - new EmojiPosition( + MarkerPosition position = + new MarkerPosition( currentPageIndex, textPosition.getXDirAdj(), textPosition.getYDirAdj(), - paperclipEmoji); + attachmentMarker); positions.add(position); } } @@ -1592,16 +1746,5 @@ public class EmlToPdf { public void setSortByPosition(boolean sortByPosition) { this.sortByPosition = sortByPosition; } - - public boolean isSortByPosition() { - return sortByPosition; - } - - public void reset() { - positions.clear(); - currentPageIndex = 0; - isInAttachmentSection = false; - attachmentSectionFound = false; - } } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index c9872992a..2e7a197de 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -10,7 +10,6 @@ import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import lombok.extern.slf4j.Slf4j; - import stirling.software.common.model.ApplicationProperties; @Service @@ -142,6 +141,7 @@ public class EndpointConfiguration { addEndpointToGroup("Convert", "markdown-to-pdf"); addEndpointToGroup("Convert", "pdf-to-csv"); addEndpointToGroup("Convert", "pdf-to-markdown"); + addEndpointToGroup("Convert", "eml-to-pdf"); // Adding endpoints to "Security" group addEndpointToGroup("Security", "add-password"); @@ -265,6 +265,7 @@ public class EndpointConfiguration { addEndpointToGroup("Weasyprint", "html-to-pdf"); addEndpointToGroup("Weasyprint", "url-to-pdf"); addEndpointToGroup("Weasyprint", "markdown-to-pdf"); + addEndpointToGroup("Weasyprint", "eml-to-pdf"); // Pdftohtml dependent endpoints addEndpointToGroup("Pdftohtml", "pdf-to-html");