diff --git a/app/common/src/main/java/stirling/software/common/util/EmlParser.java b/app/common/src/main/java/stirling/software/common/util/EmlParser.java new file mode 100644 index 000000000..0815b1c56 --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/util/EmlParser.java @@ -0,0 +1,652 @@ +package stirling.software.common.util; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Properties; +import java.util.regex.Pattern; + +import lombok.Data; +import lombok.experimental.UtilityClass; + +import stirling.software.common.model.api.converters.EmlToPdfRequest; + +@UtilityClass +public class EmlParser { + + private static volatile Boolean jakartaMailAvailable = null; + private static volatile Method mimeUtilityDecodeTextMethod = null; + private static volatile boolean mimeUtilityChecked = false; + + private static final Pattern MIME_ENCODED_PATTERN = + Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); + + private static final String DISPOSITION_ATTACHMENT = "attachment"; + private static final String TEXT_PLAIN = "text/plain"; + private static final String TEXT_HTML = "text/html"; + private static final String MULTIPART_PREFIX = "multipart/"; + + private static final String HEADER_CONTENT_TYPE = "content-type:"; + private static final String HEADER_CONTENT_DISPOSITION = "content-disposition:"; + private static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:"; + private static final String HEADER_CONTENT_ID = "Content-ID"; + private static final String HEADER_SUBJECT = "Subject:"; + private static final String HEADER_FROM = "From:"; + private static final String HEADER_TO = "To:"; + private static final String HEADER_CC = "Cc:"; + private static final String HEADER_BCC = "Bcc:"; + private static final String HEADER_DATE = "Date:"; + + private static synchronized boolean isJakartaMailAvailable() { + if (jakartaMailAvailable == null) { + try { + Class.forName("jakarta.mail.internet.MimeMessage"); + Class.forName("jakarta.mail.Session"); + Class.forName("jakarta.mail.internet.MimeUtility"); + Class.forName("jakarta.mail.internet.MimePart"); + Class.forName("jakarta.mail.internet.MimeMultipart"); + Class.forName("jakarta.mail.Multipart"); + Class.forName("jakarta.mail.Part"); + jakartaMailAvailable = true; + } catch (ClassNotFoundException e) { + jakartaMailAvailable = false; + } + } + return jakartaMailAvailable; + } + + public static EmailContent extractEmailContent( + byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) + throws IOException { + EmlProcessingUtils.validateEmlInput(emlBytes); + + if (isJakartaMailAvailable()) { + return extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer); + } else { + return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer); + } + } + + private static EmailContent extractEmailContentBasic( + byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { + String emlContent = new String(emlBytes, StandardCharsets.UTF_8); + EmailContent content = new EmailContent(); + + content.setSubject(extractBasicHeader(emlContent, HEADER_SUBJECT)); + content.setFrom(extractBasicHeader(emlContent, HEADER_FROM)); + content.setTo(extractBasicHeader(emlContent, HEADER_TO)); + content.setCc(extractBasicHeader(emlContent, HEADER_CC)); + content.setBcc(extractBasicHeader(emlContent, HEADER_BCC)); + + String dateStr = extractBasicHeader(emlContent, HEADER_DATE); + if (!dateStr.isEmpty()) { + content.setDateString(dateStr); + } + + String htmlBody = extractHtmlBody(emlContent); + if (htmlBody != null) { + content.setHtmlBody(htmlBody); + } else { + String textBody = extractTextBody(emlContent); + content.setTextBody(textBody != null ? textBody : "Email content could not be parsed"); + } + + content.getAttachments().addAll(extractAttachmentsBasic(emlContent)); + + return content; + } + + private static EmailContent extractEmailContentAdvanced( + byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { + try { + Class sessionClass = Class.forName("jakarta.mail.Session"); + Class mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage"); + + Method getDefaultInstance = + sessionClass.getMethod("getDefaultInstance", Properties.class); + Object session = getDefaultInstance.invoke(null, new Properties()); + + Class[] constructorArgs = new Class[] {sessionClass, InputStream.class}; + Constructor mimeMessageConstructor = + mimeMessageClass.getConstructor(constructorArgs); + Object message = + mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes)); + + return extractFromMimeMessage(message, request, customHtmlSanitizer); + + } catch (ReflectiveOperationException e) { + return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer); + } + } + + private static EmailContent extractFromMimeMessage( + Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { + EmailContent content = new EmailContent(); + + try { + Class messageClass = message.getClass(); + + Method getSubject = messageClass.getMethod("getSubject"); + String subject = (String) getSubject.invoke(message); + content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject"); + + Method getFrom = messageClass.getMethod("getFrom"); + Object[] fromAddresses = (Object[]) getFrom.invoke(message); + content.setFrom(buildAddressString(fromAddresses)); + + extractRecipients(message, messageClass, content); + + Method getSentDate = messageClass.getMethod("getSentDate"); + content.setDate((Date) getSentDate.invoke(message)); + + Method getContent = messageClass.getMethod("getContent"); + Object messageContent = getContent.invoke(message); + + processMessageContent(message, messageContent, content, request, customHtmlSanitizer); + + } catch (ReflectiveOperationException | RuntimeException e) { + content.setSubject("Email Conversion"); + content.setFrom("Unknown"); + content.setTo("Unknown"); + content.setCc(""); + content.setBcc(""); + content.setTextBody("Email content could not be parsed with advanced processing"); + } + + return content; + } + + private static void extractRecipients( + Object message, Class messageClass, EmailContent content) { + try { + Method getRecipients = + messageClass.getMethod( + "getRecipients", Class.forName("jakarta.mail.Message$RecipientType")); + Class recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType"); + + Object toType = recipientTypeClass.getField("TO").get(null); + Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType); + content.setTo(buildAddressString(toRecipients)); + + Object ccType = recipientTypeClass.getField("CC").get(null); + Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType); + content.setCc(buildAddressString(ccRecipients)); + + Object bccType = recipientTypeClass.getField("BCC").get(null); + Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType); + content.setBcc(buildAddressString(bccRecipients)); + + } catch (ReflectiveOperationException e) { + try { + Method getAllRecipients = messageClass.getMethod("getAllRecipients"); + Object[] recipients = (Object[]) getAllRecipients.invoke(message); + content.setTo(buildAddressString(recipients)); + content.setCc(""); + content.setBcc(""); + } catch (ReflectiveOperationException ex) { + content.setTo(""); + content.setCc(""); + content.setBcc(""); + } + } + } + + private static String buildAddressString(Object[] addresses) { + if (addresses == null || addresses.length == 0) { + return ""; + } + + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < addresses.length; i++) { + if (i > 0) builder.append(", "); + builder.append(safeMimeDecode(addresses[i].toString())); + } + return builder.toString(); + } + + private static void processMessageContent( + Object message, + Object messageContent, + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer) { + try { + if (messageContent instanceof String stringContent) { + Method getContentType = message.getClass().getMethod("getContentType"); + String contentType = (String) getContentType.invoke(message); + + if (contentType != null && contentType.toLowerCase().contains(TEXT_HTML)) { + content.setHtmlBody(stringContent); + } else { + content.setTextBody(stringContent); + } + } else { + Class multipartClass = Class.forName("jakarta.mail.Multipart"); + if (multipartClass.isInstance(messageContent)) { + processMultipart(messageContent, content, request, customHtmlSanitizer, 0); + } + } + } catch (ReflectiveOperationException | ClassCastException e) { + content.setTextBody("Email content could not be parsed with advanced processing"); + } + } + + private static void processMultipart( + Object multipart, + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer, + int depth) { + + final int MAX_MULTIPART_DEPTH = 10; + if (depth > MAX_MULTIPART_DEPTH) { + content.setHtmlBody("
Maximum multipart depth exceeded
"); + return; + } + + try { + Class multipartClass = multipart.getClass(); + Method getCount = multipartClass.getMethod("getCount"); + int count = (Integer) getCount.invoke(multipart); + + Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class); + + for (int i = 0; i < count; i++) { + Object part = getBodyPart.invoke(multipart, i); + processPart(part, content, request, customHtmlSanitizer, depth + 1); + } + + } catch (ReflectiveOperationException | ClassCastException e) { + content.setHtmlBody("
Error processing multipart content
"); + } + } + + private static void processPart( + Object part, + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer, + int depth) { + try { + Class partClass = part.getClass(); + + Method isMimeType = partClass.getMethod("isMimeType", String.class); + Method getContent = partClass.getMethod("getContent"); + Method getDisposition = partClass.getMethod("getDisposition"); + Method getFileName = partClass.getMethod("getFileName"); + Method getContentType = partClass.getMethod("getContentType"); + Method getHeader = partClass.getMethod("getHeader", String.class); + + Object disposition = getDisposition.invoke(part); + String filename = (String) getFileName.invoke(part); + String contentType = (String) getContentType.invoke(part); + + String normalizedDisposition = + disposition != null ? ((String) disposition).toLowerCase() : null; + + if ((Boolean) isMimeType.invoke(part, TEXT_PLAIN) && normalizedDisposition == null) { + Object partContent = getContent.invoke(part); + if (partContent instanceof String stringContent) { + content.setTextBody(stringContent); + } + } else if ((Boolean) isMimeType.invoke(part, TEXT_HTML) + && normalizedDisposition == null) { + Object partContent = getContent.invoke(part); + if (partContent instanceof String stringContent) { + String htmlBody = + customHtmlSanitizer != null + ? customHtmlSanitizer.sanitize(stringContent) + : stringContent; + content.setHtmlBody(htmlBody); + } + } else if ((normalizedDisposition != null + && normalizedDisposition.contains(DISPOSITION_ATTACHMENT)) + || (filename != null && !filename.trim().isEmpty())) { + + processAttachment( + part, content, request, getHeader, getContent, filename, contentType); + } else if ((Boolean) isMimeType.invoke(part, "multipart/*")) { + Object multipartContent = getContent.invoke(part); + if (multipartContent != null) { + Class multipartClass = Class.forName("jakarta.mail.Multipart"); + if (multipartClass.isInstance(multipartContent)) { + processMultipart( + multipartContent, content, request, customHtmlSanitizer, depth + 1); + } + } + } + + } catch (ReflectiveOperationException | RuntimeException e) { + // Continue processing other parts if one fails + } + } + + private static void processAttachment( + Object part, + EmailContent content, + EmlToPdfRequest request, + Method getHeader, + Method getContent, + String filename, + String contentType) { + + content.setAttachmentCount(content.getAttachmentCount() + 1); + + if (filename != null && !filename.trim().isEmpty()) { + EmailAttachment attachment = new EmailAttachment(); + attachment.setFilename(safeMimeDecode(filename)); + attachment.setContentType(contentType); + + try { + String[] contentIdHeaders = (String[]) getHeader.invoke(part, HEADER_CONTENT_ID); + if (contentIdHeaders != null) { + for (String contentIdHeader : contentIdHeaders) { + if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) { + attachment.setEmbedded(true); + String contentId = contentIdHeader.trim().replaceAll("[<>]", ""); + attachment.setContentId(contentId); + break; + } + } + } + } catch (ReflectiveOperationException e) { + } + + if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) { + extractAttachmentData(part, attachment, getContent, request); + } + + content.getAttachments().add(attachment); + } + } + + private static void extractAttachmentData( + Object part, EmailAttachment attachment, Method getContent, EmlToPdfRequest request) { + try { + Object attachmentContent = getContent.invoke(part); + byte[] attachmentData = null; + + if (attachmentContent instanceof InputStream inputStream) { + try (InputStream stream = inputStream) { + attachmentData = stream.readAllBytes(); + } catch (IOException e) { + if (attachment.isEmbedded()) { + attachmentData = new byte[0]; + } else { + throw new RuntimeException(e); + } + } + } else if (attachmentContent instanceof byte[] byteArray) { + attachmentData = byteArray; + } else if (attachmentContent instanceof String stringContent) { + attachmentData = stringContent.getBytes(StandardCharsets.UTF_8); + } + + if (attachmentData != null) { + long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L; + long maxSizeBytes = maxSizeMB * 1024 * 1024; + + if (attachmentData.length <= maxSizeBytes || attachment.isEmbedded()) { + attachment.setData(attachmentData); + attachment.setSizeBytes(attachmentData.length); + } else { + attachment.setSizeBytes(attachmentData.length); + } + } + } catch (ReflectiveOperationException | RuntimeException e) { + // Continue without attachment data + } + } + + private static String extractBasicHeader(String emlContent, String headerName) { + try { + String[] lines = emlContent.split("\r?\n"); + for (int i = 0; i < lines.length; i++) { + String line = lines[i]; + if (line.toLowerCase().startsWith(headerName.toLowerCase())) { + StringBuilder value = + new StringBuilder(line.substring(headerName.length()).trim()); + for (int j = i + 1; j < lines.length; j++) { + if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) { + value.append(" ").append(lines[j].trim()); + } else { + break; + } + } + return safeMimeDecode(value.toString()); + } + if (line.trim().isEmpty()) break; + } + } catch (RuntimeException e) { + // Ignore errors in header extraction + } + return ""; + } + + private static String extractHtmlBody(String emlContent) { + try { + String lowerContent = emlContent.toLowerCase(); + int htmlStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_HTML); + if (htmlStart == -1) return null; + + int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart); + if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart); + if (bodyStart == -1) return null; + + bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2; + int bodyEnd = findPartEnd(emlContent, bodyStart); + + return emlContent.substring(bodyStart, bodyEnd).trim(); + } catch (Exception e) { + return null; + } + } + + private static String extractTextBody(String emlContent) { + try { + String lowerContent = emlContent.toLowerCase(); + int textStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_PLAIN); + if (textStart == -1) { + int bodyStart = emlContent.indexOf("\r\n\r\n"); + if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n"); + if (bodyStart != -1) { + bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2; + int bodyEnd = findPartEnd(emlContent, bodyStart); + return emlContent.substring(bodyStart, bodyEnd).trim(); + } + return null; + } + + int bodyStart = emlContent.indexOf("\r\n\r\n", textStart); + if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", textStart); + if (bodyStart == -1) return null; + + bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2; + int bodyEnd = findPartEnd(emlContent, bodyStart); + + return emlContent.substring(bodyStart, bodyEnd).trim(); + } catch (RuntimeException e) { + return null; + } + } + + private static int findPartEnd(String content, int start) { + String[] lines = content.substring(start).split("\r?\n"); + StringBuilder result = new StringBuilder(); + + for (String line : lines) { + if (line.startsWith("--") && line.length() > 10) break; + result.append(line).append("\n"); + } + + return start + result.length(); + } + + private static List extractAttachmentsBasic(String emlContent) { + List attachments = new ArrayList<>(); + try { + String[] lines = emlContent.split("\r?\n"); + boolean inHeaders = true; + String currentContentType = ""; + String currentDisposition = ""; + String currentFilename = ""; + String currentEncoding = ""; + + for (String line : lines) { + String lowerLine = line.toLowerCase().trim(); + + if (line.trim().isEmpty()) { + inHeaders = false; + if (isAttachment(currentDisposition, currentFilename, currentContentType)) { + EmailAttachment attachment = new EmailAttachment(); + attachment.setFilename(currentFilename); + attachment.setContentType(currentContentType); + attachment.setTransferEncoding(currentEncoding); + attachments.add(attachment); + } + currentContentType = ""; + currentDisposition = ""; + currentFilename = ""; + currentEncoding = ""; + inHeaders = true; + continue; + } + + if (!inHeaders) continue; + + if (lowerLine.startsWith(HEADER_CONTENT_TYPE)) { + currentContentType = line.substring(HEADER_CONTENT_TYPE.length()).trim(); + } else if (lowerLine.startsWith(HEADER_CONTENT_DISPOSITION)) { + currentDisposition = line.substring(HEADER_CONTENT_DISPOSITION.length()).trim(); + currentFilename = extractFilenameFromDisposition(currentDisposition); + } else if (lowerLine.startsWith(HEADER_CONTENT_TRANSFER_ENCODING)) { + currentEncoding = + line.substring(HEADER_CONTENT_TRANSFER_ENCODING.length()).trim(); + } + } + } catch (RuntimeException e) { + // Continue with empty list + } + return attachments; + } + + private static boolean isAttachment(String disposition, String filename, String contentType) { + return (disposition.toLowerCase().contains(DISPOSITION_ATTACHMENT) && !filename.isEmpty()) + || (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/")) + || (contentType.toLowerCase().contains("application/") && !filename.isEmpty()); + } + + private static String extractFilenameFromDisposition(String disposition) { + if (disposition == null || !disposition.contains("filename=")) { + return ""; + } + + // Handle filename*= (RFC 2231 encoded filename) + if (disposition.toLowerCase().contains("filename*=")) { + int filenameStarStart = disposition.toLowerCase().indexOf("filename*=") + 10; + int filenameStarEnd = disposition.indexOf(";", filenameStarStart); + if (filenameStarEnd == -1) filenameStarEnd = disposition.length(); + String extendedFilename = + disposition.substring(filenameStarStart, filenameStarEnd).trim(); + extendedFilename = extendedFilename.replaceAll("^\"|\"$", ""); + + if (extendedFilename.contains("'")) { + String[] parts = extendedFilename.split("'", 3); + if (parts.length == 3) { + return EmlProcessingUtils.decodeUrlEncoded(parts[2]); + } + } + } + + // Handle regular filename= + int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9; + int filenameEnd = disposition.indexOf(";", filenameStart); + if (filenameEnd == -1) filenameEnd = disposition.length(); + String filename = disposition.substring(filenameStart, filenameEnd).trim(); + filename = filename.replaceAll("^\"|\"$", ""); + return safeMimeDecode(filename); + } + + public static String safeMimeDecode(String headerValue) { + if (headerValue == null || headerValue.trim().isEmpty()) { + return ""; + } + + if (!mimeUtilityChecked) { + synchronized (EmlParser.class) { + if (!mimeUtilityChecked) { + initializeMimeUtilityDecoding(); + } + } + } + + if (mimeUtilityDecodeTextMethod != null) { + try { + return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim()); + } catch (ReflectiveOperationException | RuntimeException e) { + // Fall through to custom implementation + } + } + + return EmlProcessingUtils.decodeMimeHeader(headerValue.trim()); + } + + private static void initializeMimeUtilityDecoding() { + try { + Class mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility"); + mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class); + } catch (ClassNotFoundException | NoSuchMethodException e) { + mimeUtilityDecodeTextMethod = null; + } + mimeUtilityChecked = true; + } + + @Data + public static class EmailContent { + private String subject; + private String from; + private String to; + private String cc; + private String bcc; + private Date date; + private String dateString; // For basic parsing fallback + private String htmlBody; + private String textBody; + private int attachmentCount; + private List attachments = new ArrayList<>(); + + public void setHtmlBody(String htmlBody) { + this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null; + } + + public void setTextBody(String textBody) { + this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null; + } + } + + @Data + public static class EmailAttachment { + private String filename; + private String contentType; + private byte[] data; + private boolean embedded; + private String embeddedFilename; + private long sizeBytes; + private String contentId; + private String disposition; + private String transferEncoding; + + public void setData(byte[] data) { + this.data = data; + if (data != null) { + this.sizeBytes = data.length; + } + } + } +} diff --git a/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java b/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java new file mode 100644 index 000000000..9acc30c16 --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java @@ -0,0 +1,601 @@ +package stirling.software.common.util; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Locale; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import lombok.experimental.UtilityClass; + +import stirling.software.common.model.api.converters.EmlToPdfRequest; +import stirling.software.common.model.api.converters.HTMLToPdfRequest; + +@UtilityClass +public class EmlProcessingUtils { + + // Style constants + private static final int DEFAULT_FONT_SIZE = 12; + private static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif"; + private static final float DEFAULT_LINE_HEIGHT = 1.4f; + private static final String DEFAULT_ZOOM = "1.0"; + private static final String DEFAULT_TEXT_COLOR = "#202124"; + private static final String DEFAULT_BACKGROUND_COLOR = "#ffffff"; + private static final String DEFAULT_BORDER_COLOR = "#e8eaed"; + private static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9"; + private static final String ATTACHMENT_BORDER_COLOR = "#eeeeee"; + + private static final int EML_CHECK_LENGTH = 8192; + private static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2; + + // MIME type detection + private static final Map EXTENSION_TO_MIME_TYPE = + Map.of( + ".png", "image/png", + ".jpg", "image/jpeg", + ".jpeg", "image/jpeg", + ".gif", "image/gif", + ".bmp", "image/bmp", + ".webp", "image/webp", + ".svg", "image/svg+xml", + ".ico", "image/x-icon", + ".tiff", "image/tiff", + ".tif", "image/tiff"); + + public static void validateEmlInput(byte[] emlBytes) { + if (emlBytes == null || emlBytes.length == 0) { + throw new IllegalArgumentException("EML file is empty or null"); + } + + if (isInvalidEmlFormat(emlBytes)) { + throw new IllegalArgumentException("Invalid EML file format"); + } + } + + private static boolean isInvalidEmlFormat(byte[] emlBytes) { + try { + int checkLength = Math.min(emlBytes.length, EML_CHECK_LENGTH); + String content; + + try { + content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8); + if (content.contains("\uFFFD")) { + content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1); + } + } catch (Exception e) { + content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1); + } + + String lowerContent = content.toLowerCase(Locale.ROOT); + + boolean hasFrom = + lowerContent.contains("from:") || lowerContent.contains("return-path:"); + boolean hasSubject = lowerContent.contains("subject:"); + boolean hasMessageId = lowerContent.contains("message-id:"); + boolean hasDate = lowerContent.contains("date:"); + boolean hasTo = + lowerContent.contains("to:") + || lowerContent.contains("cc:") + || lowerContent.contains("bcc:"); + boolean hasMimeStructure = + lowerContent.contains("multipart/") + || lowerContent.contains("text/plain") + || lowerContent.contains("text/html") + || lowerContent.contains("boundary="); + + int headerCount = 0; + if (hasFrom) headerCount++; + if (hasSubject) headerCount++; + if (hasMessageId) headerCount++; + if (hasDate) headerCount++; + if (hasTo) headerCount++; + + return headerCount < MIN_HEADER_COUNT_FOR_VALID_EML && !hasMimeStructure; + + } catch (RuntimeException e) { + return false; + } + } + + public static String generateEnhancedEmailHtml( + EmlParser.EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer) { + StringBuilder html = new StringBuilder(); + + html.append( + String.format( + """ + + + %s + + + """); + + html.append( + String.format( + """ + \n"); + return html.toString(); + } + + public static String processEmailHtmlBody( + String htmlBody, + EmlParser.EmailContent emailContent, + CustomHtmlSanitizer customHtmlSanitizer) { + if (htmlBody == null) return ""; + + String processed = + customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody; + + processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", ""); + processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", ""); + + if (emailContent != null && !emailContent.getAttachments().isEmpty()) { + processed = PdfAttachmentHandler.processInlineImages(processed, emailContent); + } + + return processed; + } + + public static String convertTextToHtml( + String textBody, CustomHtmlSanitizer customHtmlSanitizer) { + if (textBody == null) return ""; + + String html = + customHtmlSanitizer != null + ? customHtmlSanitizer.sanitize(textBody) + : escapeHtml(textBody); + + html = html.replace("\r\n", "\n").replace("\r", "\n"); + html = html.replace("\n", "
\n"); + + html = + html.replaceAll( + "(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)", + "$1"); + + html = + html.replaceAll( + "([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})", + "$1"); + + return html; + } + + private static void appendEnhancedStyles(StringBuilder html) { + String css = + String.format( + """ + body { + font-family: %s; + font-size: %dpx; + line-height: %s; + color: %s; + margin: 0; + padding: 16px; + background-color: %s; + } + + .email-container { + width: 100%%; + max-width: 100%%; + margin: 0 auto; + } + + .email-header { + padding-bottom: 10px; + border-bottom: 1px solid %s; + margin-bottom: 10px; + } + + .email-header h1 { + margin: 0 0 10px 0; + font-size: %dpx; + font-weight: bold; + } + + .email-meta div { + margin-bottom: 2px; + font-size: %dpx; + } + + .email-body { + word-wrap: break-word; + } + + .attachment-section { + margin-top: 15px; + padding: 10px; + background-color: %s; + border: 1px solid %s; + border-radius: 3px; + } + + .attachment-section h3 { + margin: 0 0 8px 0; + font-size: %dpx; + } + + .attachment-item { + padding: 5px 0; + } + + .attachment-icon { + margin-right: 5px; + } + + .attachment-details, .attachment-type { + font-size: %dpx; + color: #555555; + } + + .attachment-inclusion-note, .attachment-info-note { + margin-top: 8px; + padding: 6px; + font-size: %dpx; + border-radius: 3px; + } + + .attachment-inclusion-note { + background-color: #e6ffed; + border: 1px solid #d4f7dc; + color: #006420; + } + + .attachment-info-note { + background-color: #fff9e6; + border: 1px solid #fff0c2; + color: #664d00; + } + + .attachment-link-container { + display: flex; + align-items: center; + padding: 8px; + background-color: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + margin: 4px 0; + } + + .attachment-link-container:hover { + background-color: #e9ecef; + } + + .attachment-note { + font-size: %dpx; + color: #6c757d; + font-style: italic; + margin-left: 8px; + } + + .no-content { + padding: 20px; + text-align: center; + color: #666; + font-style: italic; + } + + .text-body { + white-space: pre-wrap; + } + + img { + max-width: 100%%; + height: auto; + display: block; + } + """, + DEFAULT_FONT_FAMILY, + DEFAULT_FONT_SIZE, + DEFAULT_LINE_HEIGHT, + DEFAULT_TEXT_COLOR, + DEFAULT_BACKGROUND_COLOR, + DEFAULT_BORDER_COLOR, + DEFAULT_FONT_SIZE + 4, + DEFAULT_FONT_SIZE - 1, + ATTACHMENT_BACKGROUND_COLOR, + ATTACHMENT_BORDER_COLOR, + DEFAULT_FONT_SIZE + 1, + DEFAULT_FONT_SIZE - 2, + DEFAULT_FONT_SIZE - 2, + DEFAULT_FONT_SIZE - 3); + + html.append(css); + } + + private static void appendAttachmentsSection( + StringBuilder html, + EmlParser.EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer) { + html.append("
\n"); + int displayedAttachmentCount = + content.getAttachmentCount() > 0 + ? content.getAttachmentCount() + : content.getAttachments().size(); + html.append("

Attachments (").append(displayedAttachmentCount).append(")

\n"); + + if (!content.getAttachments().isEmpty()) { + for (int i = 0; i < content.getAttachments().size(); i++) { + EmlParser.EmailAttachment attachment = content.getAttachments().get(i); + + String embeddedFilename = + attachment.getFilename() != null + ? attachment.getFilename() + : ("attachment_" + i); + attachment.setEmbeddedFilename(embeddedFilename); + + String sizeStr = GeneralUtils.formatBytes(attachment.getSizeBytes()); + String contentType = + attachment.getContentType() != null + && !attachment.getContentType().isEmpty() + ? ", " + escapeHtml(attachment.getContentType()) + : ""; + + String attachmentId = "attachment_" + i; + html.append( + String.format( + """ +
+ @ + %s + (%s%s) +
+ """, + attachmentId, + escapeHtml(embeddedFilename), + escapeHtml(EmlParser.safeMimeDecode(attachment.getFilename())), + sizeStr, + contentType)); + } + } + + if (request != null && request.isIncludeAttachments()) { + html.append( + """ +
+

Attachments are embedded in the file.

+
+ """); + } else { + html.append( + """ +
+

Attachment information displayed - files not included in PDF.

+
+ """); + } + html.append("
\n"); + } + + public static HTMLToPdfRequest createHtmlRequest(EmlToPdfRequest request) { + HTMLToPdfRequest htmlRequest = new HTMLToPdfRequest(); + + if (request != null) { + htmlRequest.setFileInput(request.getFileInput()); + } + + htmlRequest.setZoom(Float.parseFloat(DEFAULT_ZOOM)); + return htmlRequest; + } + + public static String detectMimeType(String filename, String existingMimeType) { + if (existingMimeType != null && !existingMimeType.isEmpty()) { + return existingMimeType; + } + + if (filename != null) { + String lowerFilename = filename.toLowerCase(); + for (Map.Entry entry : EXTENSION_TO_MIME_TYPE.entrySet()) { + if (lowerFilename.endsWith(entry.getKey())) { + return entry.getValue(); + } + } + } + + return "image/png"; + } + + public static String decodeUrlEncoded(String encoded) { + try { + return java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8); + } catch (Exception e) { + return encoded; // Return original if decoding fails + } + } + + public static String decodeMimeHeader(String encodedText) { + if (encodedText == null || encodedText.trim().isEmpty()) { + return encodedText; + } + + try { + StringBuilder result = new StringBuilder(); + Pattern concatenatedPattern = + Pattern.compile( + "(=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)(\\s*=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)+"); + Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText); + String processedText = + concatenatedMatcher.replaceAll( + match -> match.group().replaceAll("\\s+(?==\\?)", "")); + + Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); + Matcher matcher = mimePattern.matcher(processedText); + int lastEnd = 0; + + while (matcher.find()) { + result.append(processedText, lastEnd, matcher.start()); + + String charset = matcher.group(1); + String encoding = matcher.group(2).toUpperCase(); + String encodedValue = matcher.group(3); + + try { + String decodedValue = + switch (encoding) { + case "B" -> { + String cleanBase64 = encodedValue.replaceAll("\\s", ""); + byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64); + Charset targetCharset; + try { + targetCharset = Charset.forName(charset); + } catch (Exception e) { + targetCharset = StandardCharsets.UTF_8; + } + yield new String(decodedBytes, targetCharset); + } + case "Q" -> decodeQuotedPrintable(encodedValue, charset); + default -> matcher.group(0); // Return original if unknown encoding + }; + result.append(decodedValue); + } catch (RuntimeException e) { + result.append(matcher.group(0)); // Keep original on decode error + } + + lastEnd = matcher.end(); + } + + result.append(processedText.substring(lastEnd)); + return result.toString(); + } catch (Exception e) { + return encodedText; // Return original on any parsing error + } + } + + private static String decodeQuotedPrintable(String encodedText, String charset) { + StringBuilder result = new StringBuilder(); + for (int i = 0; i < encodedText.length(); i++) { + char c = encodedText.charAt(i); + switch (c) { + case '=' -> { + if (i + 2 < encodedText.length()) { + String hex = encodedText.substring(i + 1, i + 3); + try { + int value = Integer.parseInt(hex, 16); + result.append((char) value); + i += 2; + } catch (NumberFormatException e) { + result.append(c); + } + } else if (i + 1 == encodedText.length() + || (i + 2 == encodedText.length() + && encodedText.charAt(i + 1) == '\n')) { + if (i + 1 < encodedText.length() && encodedText.charAt(i + 1) == '\n') { + i++; // Skip the newline too + } + } else { + result.append(c); + } + } + case '_' -> result.append(' '); // Space encoding in Q encoding + default -> result.append(c); + } + } + + byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1); + try { + Charset targetCharset = Charset.forName(charset); + return new String(bytes, targetCharset); + } catch (Exception e) { + try { + return new String(bytes, StandardCharsets.UTF_8); + } catch (Exception fallbackException) { + return new String(bytes, StandardCharsets.ISO_8859_1); + } + } + } + + public static String escapeHtml(String text) { + if (text == null) return ""; + return text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'"); + } + + public static String sanitizeText(String text, CustomHtmlSanitizer customHtmlSanitizer) { + if (customHtmlSanitizer != null) { + return customHtmlSanitizer.sanitize(text); + } else { + return escapeHtml(text); + } + } + + public static String simplifyHtmlContent(String htmlContent) { + String simplified = htmlContent.replaceAll("(?i)]*>.*?", ""); + simplified = simplified.replaceAll("(?i)]*>.*?", ""); + return simplified; + } +} diff --git a/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java b/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java index 6b28dc683..85005af40 100644 --- a/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java +++ b/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java @@ -1,131 +1,23 @@ package stirling.software.common.util; -import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Constructor; -import java.lang.reflect.Method; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Base64; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; -import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PageMode; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; -import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -import lombok.Data; -import lombok.Getter; import lombok.experimental.UtilityClass; -import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.converters.EmlToPdfRequest; -import stirling.software.common.model.api.converters.HTMLToPdfRequest; import stirling.software.common.service.CustomPDFDocumentFactory; -@Slf4j @UtilityClass public class EmlToPdf { - private static final class StyleConstants { - // Font and layout constants - static final int DEFAULT_FONT_SIZE = 12; - static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif"; - static final float DEFAULT_LINE_HEIGHT = 1.4f; - static final String DEFAULT_ZOOM = "1.0"; - - // Color constants - aligned with application theme - static final String DEFAULT_TEXT_COLOR = "#202124"; - static final String DEFAULT_BACKGROUND_COLOR = "#ffffff"; - static final String DEFAULT_BORDER_COLOR = "#e8eaed"; - static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9"; - static final String ATTACHMENT_BORDER_COLOR = "#eeeeee"; - - // Size constants for PDF annotations - static final float ATTACHMENT_ICON_WIDTH = 12f; - static final float ATTACHMENT_ICON_HEIGHT = 14f; - static final float ANNOTATION_X_OFFSET = 2f; - static final float ANNOTATION_Y_OFFSET = 10f; - - // Content validation constants - static final int EML_CHECK_LENGTH = 8192; - static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2; - - private StyleConstants() {} - } - - private static final class MimeConstants { - static final Pattern MIME_ENCODED_PATTERN = - Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); - static final String ATTACHMENT_MARKER = "@"; - - private MimeConstants() {} - } - - private static final class FileSizeConstants { - static final long BYTES_IN_KB = 1024L; - static final long BYTES_IN_MB = BYTES_IN_KB * 1024L; - static final long BYTES_IN_GB = BYTES_IN_MB * 1024L; - - private FileSizeConstants() {} - } - - // Cached Jakarta Mail availability check - private static Boolean jakartaMailAvailable = null; - - private static boolean isJakartaMailAvailable() { - if (jakartaMailAvailable == null) { - try { - // Check for core Jakarta Mail classes - Class.forName("jakarta.mail.internet.MimeMessage"); - Class.forName("jakarta.mail.Session"); - Class.forName("jakarta.mail.internet.MimeUtility"); - Class.forName("jakarta.mail.internet.MimePart"); - Class.forName("jakarta.mail.internet.MimeMultipart"); - Class.forName("jakarta.mail.Multipart"); - Class.forName("jakarta.mail.Part"); - - jakartaMailAvailable = true; - log.debug("Jakarta Mail libraries are available"); - } catch (ClassNotFoundException e) { - jakartaMailAvailable = false; - log.debug("Jakarta Mail libraries are not available, using basic parsing"); - } - } - return jakartaMailAvailable; - } - public static String convertEmlToHtml(byte[] emlBytes, EmlToPdfRequest request) throws IOException { - validateEmlInput(emlBytes); + EmlProcessingUtils.validateEmlInput(emlBytes); - if (isJakartaMailAvailable()) { - return convertEmlToHtmlAdvanced(emlBytes, request); - } else { - return convertEmlToHtmlBasic(emlBytes, request); - } + EmlParser.EmailContent emailContent = + EmlParser.extractEmailContent(emlBytes, request, null); + return EmlProcessingUtils.generateEnhancedEmailHtml(emailContent, request, null); } public static byte[] convertEmlToPdf( @@ -133,26 +25,21 @@ public class EmlToPdf { EmlToPdfRequest request, byte[] emlBytes, String fileName, - stirling.software.common.service.CustomPDFDocumentFactory pdfDocumentFactory, + CustomPDFDocumentFactory pdfDocumentFactory, TempFileManager tempFileManager, CustomHtmlSanitizer customHtmlSanitizer) throws IOException, InterruptedException { - validateEmlInput(emlBytes); + EmlProcessingUtils.validateEmlInput(emlBytes); try { - // Generate HTML representation - EmailContent emailContent = null; - String htmlContent; + EmlParser.EmailContent emailContent = + EmlParser.extractEmailContent(emlBytes, request, customHtmlSanitizer); - if (isJakartaMailAvailable()) { - emailContent = extractEmailContentAdvanced(emlBytes, request); - htmlContent = generateEnhancedEmailHtml(emailContent, request); - } else { - htmlContent = convertEmlToHtmlBasic(emlBytes, request); - } + String htmlContent = + EmlProcessingUtils.generateEnhancedEmailHtml( + emailContent, request, customHtmlSanitizer); - // Convert HTML to PDF byte[] pdfBytes = convertHtmlToPdf( weasyprintPath, @@ -161,35 +48,23 @@ public class EmlToPdf { tempFileManager, customHtmlSanitizer); - // Attach files if available and requested if (shouldAttachFiles(emailContent, request)) { pdfBytes = - attachFilesToPdf( + PdfAttachmentHandler.attachFilesToPdf( pdfBytes, emailContent.getAttachments(), pdfDocumentFactory); } return pdfBytes; } catch (IOException | InterruptedException e) { - log.error("Failed to convert EML to PDF for file: {}", fileName, e); throw e; } catch (Exception e) { - log.error("Unexpected error during EML to PDF conversion for file: {}", fileName, e); - throw new IOException("Conversion failed: " + e.getMessage(), e); + throw new IOException("Error converting EML to PDF", e); } } - private static void validateEmlInput(byte[] emlBytes) { - if (emlBytes == null || emlBytes.length == 0) { - throw new IllegalArgumentException("EML file is empty or null"); - } - - if (isInvalidEmlFormat(emlBytes)) { - throw new IllegalArgumentException("Invalid EML file format"); - } - } - - private static boolean shouldAttachFiles(EmailContent emailContent, EmlToPdfRequest request) { + private static boolean shouldAttachFiles( + EmlParser.EmailContent emailContent, EmlToPdfRequest request) { return emailContent != null && request != null && request.isIncludeAttachments() @@ -204,7 +79,7 @@ public class EmlToPdf { CustomHtmlSanitizer customHtmlSanitizer) throws IOException, InterruptedException { - HTMLToPdfRequest htmlRequest = createHtmlRequest(request); + var htmlRequest = EmlProcessingUtils.createHtmlRequest(request); try { return FileToPdf.convertHtmlToPdf( @@ -215,8 +90,7 @@ public class EmlToPdf { tempFileManager, customHtmlSanitizer); } catch (IOException | InterruptedException e) { - log.warn("Initial HTML to PDF conversion failed, trying with simplified HTML"); - String simplifiedHtml = simplifyHtmlContent(htmlContent); + String simplifiedHtml = EmlProcessingUtils.simplifyHtmlContent(htmlContent); return FileToPdf.convertHtmlToPdf( weasyprintPath, htmlRequest, @@ -226,1499 +100,4 @@ public class EmlToPdf { customHtmlSanitizer); } } - - private static String simplifyHtmlContent(String htmlContent) { - String simplified = htmlContent.replaceAll("(?i)]*>.*?", ""); - simplified = simplified.replaceAll("(?i)]*>.*?", ""); - return simplified; - } - - private static String generateUniqueAttachmentId(String filename) { - return "attachment_" + filename.hashCode() + "_" + System.nanoTime(); - } - - private static String convertEmlToHtmlBasic(byte[] emlBytes, EmlToPdfRequest request) { - if (emlBytes == null || emlBytes.length == 0) { - throw new IllegalArgumentException("EML file is empty or null"); - } - - String emlContent = new String(emlBytes, StandardCharsets.UTF_8); - - // Basic email parsing - String subject = extractBasicHeader(emlContent, "Subject:"); - String from = extractBasicHeader(emlContent, "From:"); - String to = extractBasicHeader(emlContent, "To:"); - String cc = extractBasicHeader(emlContent, "Cc:"); - String bcc = extractBasicHeader(emlContent, "Bcc:"); - String date = extractBasicHeader(emlContent, "Date:"); - - // Try to extract HTML content - String htmlBody = extractHtmlBody(emlContent); - if (htmlBody == null) { - String textBody = extractTextBody(emlContent); - htmlBody = - convertTextToHtml( - textBody != null ? textBody : "Email content could not be parsed"); - } - - // Generate HTML with custom styling based on request - StringBuilder html = new StringBuilder(); - html.append("\n"); - html.append("\n"); - html.append("").append(escapeHtml(subject)).append("\n"); - html.append("\n"); - html.append("\n"); - - html.append("
\n"); - html.append("
\n"); - html.append("

").append(escapeHtml(subject)).append("

\n"); - html.append("
\n"); - html.append("
From: ").append(escapeHtml(from)).append("
\n"); - html.append("
To: ").append(escapeHtml(to)).append("
\n"); - - // Include CC and BCC if present and requested - if (request != null && request.isIncludeAllRecipients()) { - if (!cc.trim().isEmpty()) { - html.append("
CC: ").append(escapeHtml(cc)).append("
\n"); - } - if (!bcc.trim().isEmpty()) { - html.append("
BCC: ") - .append(escapeHtml(bcc)) - .append("
\n"); - } - } - - if (!date.trim().isEmpty()) { - html.append("
Date: ").append(escapeHtml(date)).append("
\n"); - } - html.append("
\n"); - - html.append("
\n"); - html.append(processEmailHtmlBody(htmlBody)); - html.append("
\n"); - - // Add attachment information - always check for and display attachments - String attachmentInfo = extractAttachmentInfo(emlContent); - if (!attachmentInfo.isEmpty()) { - html.append("
\n"); - html.append("

Attachments

\n"); - html.append(attachmentInfo); - - // Add a status message about attachment inclusion - if (request != null && request.isIncludeAttachments()) { - html.append("
\n"); - html.append( - "

Note: Attachments are saved as external files and linked in this PDF. Click the links to open files externally.

\n"); - html.append("
\n"); - } else { - html.append("
\n"); - html.append( - "

Attachment information displayed - files not included in PDF. Enable 'Include attachments' to embed files.

\n"); - html.append("
\n"); - } - - html.append("
\n"); - } - - // Show advanced features status if requested - assert request != null; - if (request.getFileInput().isEmpty()) { - html.append("
\n"); - html.append( - "

Note: Some advanced features require Jakarta Mail dependencies.

\n"); - html.append("
\n"); - } - - html.append("
\n"); - html.append(""); - - return html.toString(); - } - - private static EmailContent extractEmailContentAdvanced( - byte[] emlBytes, EmlToPdfRequest request) { - try { - // Use Jakarta Mail for processing - Class sessionClass = Class.forName("jakarta.mail.Session"); - Class mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage"); - - Method getDefaultInstance = - sessionClass.getMethod("getDefaultInstance", Properties.class); - Object session = getDefaultInstance.invoke(null, new Properties()); - - // Cast the session object to the proper type for the constructor - Class[] constructorArgs = new Class[] {sessionClass, InputStream.class}; - Constructor mimeMessageConstructor = - mimeMessageClass.getConstructor(constructorArgs); - Object message = - mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes)); - - return extractEmailContentAdvanced(message, request); - - } catch (ReflectiveOperationException e) { - // Create basic EmailContent from basic processing - EmailContent content = new EmailContent(); - content.setHtmlBody(convertEmlToHtmlBasic(emlBytes, request)); - return content; - } - } - - private static String convertEmlToHtmlAdvanced(byte[] emlBytes, EmlToPdfRequest request) { - EmailContent content = extractEmailContentAdvanced(emlBytes, request); - return generateEnhancedEmailHtml(content, request); - } - - private static String extractAttachmentInfo(String emlContent) { - StringBuilder attachmentInfo = new StringBuilder(); - try { - String[] lines = emlContent.split("\r?\n"); - boolean inHeaders = true; - String currentContentType = ""; - String currentDisposition = ""; - String currentFilename = ""; - String currentEncoding = ""; - boolean inMultipart = false; - String boundary = ""; - - // First pass: find boundary for multipart messages - for (String line : lines) { - String lowerLine = line.toLowerCase().trim(); - if (lowerLine.startsWith("content-type:") && lowerLine.contains("multipart")) { - if (lowerLine.contains("boundary=")) { - int boundaryStart = lowerLine.indexOf("boundary=") + 9; - String boundaryPart = line.substring(boundaryStart).trim(); - if (boundaryPart.startsWith("\"")) { - boundary = boundaryPart.substring(1, boundaryPart.indexOf("\"", 1)); - } else { - int spaceIndex = boundaryPart.indexOf(" "); - boundary = - spaceIndex > 0 - ? boundaryPart.substring(0, spaceIndex) - : boundaryPart; - } - inMultipart = true; - break; - } - } - if (line.trim().isEmpty()) break; - } - - // Second pass: extract attachment information - for (String line : lines) { - String lowerLine = line.toLowerCase().trim(); - - // Check for boundary markers in multipart messages - if (inMultipart && line.trim().startsWith("--" + boundary)) { - // Reset for new part - currentContentType = ""; - currentDisposition = ""; - currentFilename = ""; - currentEncoding = ""; - inHeaders = true; - continue; - } - - if (inHeaders && line.trim().isEmpty()) { - inHeaders = false; - - // Process accumulated attachment info - if (isAttachment(currentDisposition, currentFilename, currentContentType)) { - addAttachmentToInfo( - attachmentInfo, - currentFilename, - currentContentType, - currentEncoding); - - // Reset for next attachment - currentContentType = ""; - currentDisposition = ""; - currentFilename = ""; - currentEncoding = ""; - } - continue; - } - - if (!inHeaders) continue; // Skip body content - - // Parse headers - if (lowerLine.startsWith("content-type:")) { - currentContentType = line.substring(13).trim(); - } else if (lowerLine.startsWith("content-disposition:")) { - currentDisposition = line.substring(20).trim(); - // Extract filename if present - currentFilename = extractFilenameFromDisposition(currentDisposition); - } else if (lowerLine.startsWith("content-transfer-encoding:")) { - currentEncoding = line.substring(26).trim(); - } else if (line.startsWith(" ") || line.startsWith("\t")) { - // Continuation of previous header - if (currentDisposition.contains("filename=")) { - currentDisposition += " " + line.trim(); - currentFilename = extractFilenameFromDisposition(currentDisposition); - } else if (!currentContentType.isEmpty()) { - currentContentType += " " + line.trim(); - } - } - } - - if (isAttachment(currentDisposition, currentFilename, currentContentType)) { - addAttachmentToInfo( - attachmentInfo, currentFilename, currentContentType, currentEncoding); - } - - } catch (RuntimeException e) { - log.warn("Error extracting attachment info: {}", e.getMessage()); - } - return attachmentInfo.toString(); - } - - private static boolean isAttachment(String disposition, String filename, String contentType) { - return (disposition.toLowerCase().contains("attachment") && !filename.isEmpty()) - || (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/")) - || (contentType.toLowerCase().contains("application/") && !filename.isEmpty()); - } - - private static String extractFilenameFromDisposition(String disposition) { - if (disposition.contains("filename=")) { - int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9; - int filenameEnd = disposition.indexOf(";", filenameStart); - if (filenameEnd == -1) filenameEnd = disposition.length(); - String filename = disposition.substring(filenameStart, filenameEnd).trim(); - filename = filename.replaceAll("^\"|\"$", ""); - // Apply MIME decoding to handle encoded filenames - return safeMimeDecode(filename); - } - return ""; - } - - private static void addAttachmentToInfo( - StringBuilder attachmentInfo, String filename, String contentType, String encoding) { - // Create attachment info with paperclip emoji before filename - attachmentInfo - .append("
") - .append("") - .append(MimeConstants.ATTACHMENT_MARKER) - .append(" ") - .append("") - .append(escapeHtml(filename)) - .append(""); - - // Add content type and encoding info - if (!contentType.isEmpty() || !encoding.isEmpty()) { - attachmentInfo.append(" ("); - if (!contentType.isEmpty()) { - attachmentInfo.append(escapeHtml(contentType)); - } - if (!encoding.isEmpty()) { - if (!contentType.isEmpty()) attachmentInfo.append(", "); - attachmentInfo.append("encoding: ").append(escapeHtml(encoding)); - } - attachmentInfo.append(")"); - } - attachmentInfo.append("
\n"); - } - - private static boolean isInvalidEmlFormat(byte[] emlBytes) { - try { - int checkLength = Math.min(emlBytes.length, StyleConstants.EML_CHECK_LENGTH); - String content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8); - String lowerContent = content.toLowerCase(); - - boolean hasFrom = - lowerContent.contains("from:") || lowerContent.contains("return-path:"); - boolean hasSubject = lowerContent.contains("subject:"); - boolean hasMessageId = lowerContent.contains("message-id:"); - boolean hasDate = lowerContent.contains("date:"); - boolean hasTo = - lowerContent.contains("to:") - || lowerContent.contains("cc:") - || lowerContent.contains("bcc:"); - boolean hasMimeStructure = - lowerContent.contains("multipart/") - || lowerContent.contains("text/plain") - || lowerContent.contains("text/html") - || lowerContent.contains("boundary="); - - int headerCount = 0; - if (hasFrom) headerCount++; - if (hasSubject) headerCount++; - if (hasMessageId) headerCount++; - if (hasDate) headerCount++; - if (hasTo) headerCount++; - - return headerCount < StyleConstants.MIN_HEADER_COUNT_FOR_VALID_EML && !hasMimeStructure; - - } catch (RuntimeException e) { - return false; - } - } - - private static String extractBasicHeader(String emlContent, String headerName) { - try { - String[] lines = emlContent.split("\r?\n"); - for (int i = 0; i < lines.length; i++) { - String line = lines[i]; - if (line.toLowerCase().startsWith(headerName.toLowerCase())) { - StringBuilder value = - new StringBuilder(line.substring(headerName.length()).trim()); - // Handle multi-line headers - for (int j = i + 1; j < lines.length; j++) { - if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) { - value.append(" ").append(lines[j].trim()); - } else { - break; - } - } - // Apply MIME header decoding - return safeMimeDecode(value.toString()); - } - if (line.trim().isEmpty()) break; - } - } catch (RuntimeException e) { - log.warn("Error extracting header '{}': {}", headerName, e.getMessage()); - } - return ""; - } - - private static String extractHtmlBody(String emlContent) { - try { - String lowerContent = emlContent.toLowerCase(); - int htmlStart = lowerContent.indexOf("content-type: text/html"); - if (htmlStart == -1) return null; - - return getString(emlContent, htmlStart); - - } catch (Exception e) { - return null; - } - } - - @Nullable - private static String getString(String emlContent, int htmlStart) { - int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart); - if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart); - if (bodyStart == -1) return null; - - bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2; - int bodyEnd = findPartEnd(emlContent, bodyStart); - - return emlContent.substring(bodyStart, bodyEnd).trim(); - } - - private static String extractTextBody(String emlContent) { - try { - String lowerContent = emlContent.toLowerCase(); - int textStart = lowerContent.indexOf("content-type: text/plain"); - if (textStart == -1) { - int bodyStart = emlContent.indexOf("\r\n\r\n"); - if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n"); - if (bodyStart != -1) { - bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2; - int bodyEnd = findPartEnd(emlContent, bodyStart); - return emlContent.substring(bodyStart, bodyEnd).trim(); - } - return null; - } - - return getString(emlContent, textStart); - - } catch (RuntimeException e) { - return null; - } - } - - private static int findPartEnd(String content, int start) { - String[] lines = content.substring(start).split("\r?\n"); - StringBuilder result = new StringBuilder(); - - for (String line : lines) { - if (line.startsWith("--") && line.length() > 10) break; - result.append(line).append("\n"); - } - - return start + result.length(); - } - - private static String convertTextToHtml(String textBody) { - if (textBody == null) return ""; - - String html = escapeHtml(textBody); - html = html.replace("\r\n", "\n").replace("\r", "\n"); - html = html.replace("\n", "
\n"); - - html = - html.replaceAll( - "(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)", - "$1"); - - html = - html.replaceAll( - "([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})", - "$1"); - - return html; - } - - private static String processEmailHtmlBody(String htmlBody) { - return processEmailHtmlBody(htmlBody, null); - } - - private static String processEmailHtmlBody(String htmlBody, EmailContent emailContent) { - if (htmlBody == null) return ""; - - String processed = htmlBody; - - // Remove problematic CSS - processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", ""); - processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", ""); - - // Process inline images (cid: references) if we have email content with attachments - if (emailContent != null && !emailContent.getAttachments().isEmpty()) { - processed = processInlineImages(processed, emailContent); - } - - return processed; - } - - private static String processInlineImages(String htmlContent, EmailContent emailContent) { - if (htmlContent == null || emailContent == null) return htmlContent; - - // Create a map of Content-ID to attachment data - Map contentIdMap = new HashMap<>(); - for (EmailAttachment attachment : emailContent.getAttachments()) { - if (attachment.isEmbedded() - && attachment.getContentId() != null - && attachment.getData() != null) { - contentIdMap.put(attachment.getContentId(), attachment); - } - } - - if (contentIdMap.isEmpty()) return htmlContent; - - // Pattern to match cid: references in img src attributes - Pattern cidPattern = - Pattern.compile( - "(?i)]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>", - Pattern.CASE_INSENSITIVE); - Matcher matcher = cidPattern.matcher(htmlContent); - - StringBuffer result = new StringBuffer(); - while (matcher.find()) { - String contentId = matcher.group(1); - EmailAttachment attachment = contentIdMap.get(contentId); - - if (attachment != null && attachment.getData() != null) { - // Convert to data URI - String mimeType = attachment.getContentType(); - if (mimeType == null || mimeType.isEmpty()) { - // Try to determine MIME type from filename - String filename = attachment.getFilename(); - if (filename != null) { - if (filename.toLowerCase().endsWith(".png")) { - mimeType = "image/png"; - } else if (filename.toLowerCase().endsWith(".jpg") - || filename.toLowerCase().endsWith(".jpeg")) { - mimeType = "image/jpeg"; - } else if (filename.toLowerCase().endsWith(".gif")) { - mimeType = "image/gif"; - } else if (filename.toLowerCase().endsWith(".bmp")) { - mimeType = "image/bmp"; - } else { - mimeType = "image/png"; // fallback - } - } else { - mimeType = "image/png"; // fallback - } - } - - String base64Data = Base64.getEncoder().encodeToString(attachment.getData()); - String dataUri = "data:" + mimeType + ";base64," + base64Data; - - // Replace the cid: reference with the data URI - String replacement = - matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri); - matcher.appendReplacement(result, Matcher.quoteReplacement(replacement)); - } else { - // Keep original if attachment not found - matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0))); - } - } - matcher.appendTail(result); - - return result.toString(); - } - - private static void appendEnhancedStyles(StringBuilder html) { - int fontSize = StyleConstants.DEFAULT_FONT_SIZE; - String textColor = StyleConstants.DEFAULT_TEXT_COLOR; - String backgroundColor = StyleConstants.DEFAULT_BACKGROUND_COLOR; - String borderColor = StyleConstants.DEFAULT_BORDER_COLOR; - - html.append("body {\n"); - html.append(" font-family: ").append(StyleConstants.DEFAULT_FONT_FAMILY).append(";\n"); - html.append(" font-size: ").append(fontSize).append("px;\n"); - html.append(" line-height: ").append(StyleConstants.DEFAULT_LINE_HEIGHT).append(";\n"); - html.append(" color: ").append(textColor).append(";\n"); - html.append(" margin: 0;\n"); - html.append(" padding: 16px;\n"); - html.append(" background-color: ").append(backgroundColor).append(";\n"); - html.append("}\n\n"); - - html.append(".email-container {\n"); - html.append(" width: 100%;\n"); - html.append(" max-width: 100%;\n"); - html.append(" margin: 0 auto;\n"); - html.append("}\n\n"); - - html.append(".email-header {\n"); - html.append(" padding-bottom: 10px;\n"); - html.append(" border-bottom: 1px solid ").append(borderColor).append(";\n"); - html.append(" margin-bottom: 10px;\n"); - html.append("}\n\n"); - html.append(".email-header h1 {\n"); - html.append(" margin: 0 0 10px 0;\n"); - html.append(" font-size: ").append(fontSize + 4).append("px;\n"); - html.append(" font-weight: bold;\n"); - html.append("}\n\n"); - html.append(".email-meta div {\n"); - html.append(" margin-bottom: 2px;\n"); - html.append(" font-size: ").append(fontSize - 1).append("px;\n"); - html.append("}\n\n"); - - html.append(".email-body {\n"); - html.append(" word-wrap: break-word;\n"); - html.append("}\n\n"); - - html.append(".attachment-section {\n"); - html.append(" margin-top: 15px;\n"); - html.append(" padding: 10px;\n"); - html.append(" background-color: ") - .append(StyleConstants.ATTACHMENT_BACKGROUND_COLOR) - .append(";\n"); - html.append(" border: 1px solid ") - .append(StyleConstants.ATTACHMENT_BORDER_COLOR) - .append(";\n"); - html.append(" border-radius: 3px;\n"); - html.append("}\n\n"); - html.append(".attachment-section h3 {\n"); - html.append(" margin: 0 0 8px 0;\n"); - html.append(" font-size: ").append(fontSize + 1).append("px;\n"); - html.append("}\n\n"); - html.append(".attachment-item {\n"); - html.append(" padding: 5px 0;\n"); - html.append("}\n\n"); - html.append(".attachment-icon {\n"); - html.append(" margin-right: 5px;\n"); - html.append("}\n\n"); - html.append(".attachment-details, .attachment-type {\n"); - html.append(" font-size: ").append(fontSize - 2).append("px;\n"); - html.append(" color: #555555;\n"); - html.append("}\n\n"); - html.append(".attachment-inclusion-note, .attachment-info-note {\n"); - html.append(" margin-top: 8px;\n"); - html.append(" padding: 6px;\n"); - html.append(" font-size: ").append(fontSize - 2).append("px;\n"); - html.append(" border-radius: 3px;\n"); - html.append("}\n\n"); - html.append(".attachment-inclusion-note {\n"); - html.append(" background-color: #e6ffed;\n"); - html.append(" border: 1px solid #d4f7dc;\n"); - html.append(" color: #006420;\n"); - html.append("}\n\n"); - html.append(".attachment-info-note {\n"); - html.append(" background-color: #fff9e6;\n"); - html.append(" border: 1px solid #fff0c2;\n"); - html.append(" color: #664d00;\n"); - html.append("}\n\n"); - html.append(".attachment-link-container {\n"); - html.append(" display: flex;\n"); - html.append(" align-items: center;\n"); - html.append(" padding: 8px;\n"); - html.append(" background-color: #f8f9fa;\n"); - html.append(" border: 1px solid #dee2e6;\n"); - html.append(" border-radius: 4px;\n"); - html.append(" margin: 4px 0;\n"); - html.append("}\n\n"); - html.append(".attachment-link-container:hover {\n"); - html.append(" background-color: #e9ecef;\n"); - html.append("}\n\n"); - html.append(".attachment-note {\n"); - html.append(" font-size: ").append(fontSize - 3).append("px;\n"); - html.append(" color: #6c757d;\n"); - html.append(" font-style: italic;\n"); - html.append(" margin-left: 8px;\n"); - html.append("}\n\n"); - - // Basic image styling: ensure images are responsive but not overly constrained. - html.append("img {\n"); - html.append(" max-width: 100%;\n"); // Make images responsive to container width - html.append(" height: auto;\n"); // Maintain aspect ratio - html.append(" display: block;\n"); // Avoid extra space below images - html.append("}\n\n"); - } - - private static String escapeHtml(String text) { - if (text == null) return ""; - return text.replace("&", "&") - .replace("<", "<") - .replace(">", ">") - .replace("\"", """) - .replace("'", "'"); - } - - private static stirling.software.common.model.api.converters.HTMLToPdfRequest createHtmlRequest( - EmlToPdfRequest request) { - stirling.software.common.model.api.converters.HTMLToPdfRequest htmlRequest = - new stirling.software.common.model.api.converters.HTMLToPdfRequest(); - - if (request != null) { - htmlRequest.setFileInput(request.getFileInput()); - } - - // Set default zoom level - htmlRequest.setZoom(Float.parseFloat(StyleConstants.DEFAULT_ZOOM)); - - return htmlRequest; - } - - private static EmailContent extractEmailContentAdvanced( - Object message, EmlToPdfRequest request) { - EmailContent content = new EmailContent(); - - try { - Class messageClass = message.getClass(); - - // Extract headers via reflection - Method getSubject = messageClass.getMethod("getSubject"); - String subject = (String) getSubject.invoke(message); - content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject"); - - Method getFrom = messageClass.getMethod("getFrom"); - Object[] fromAddresses = (Object[]) getFrom.invoke(message); - content.setFrom( - fromAddresses != null && fromAddresses.length > 0 - ? safeMimeDecode(fromAddresses[0].toString()) - : ""); - - Method getAllRecipients = messageClass.getMethod("getAllRecipients"); - Object[] recipients = (Object[]) getAllRecipients.invoke(message); - content.setTo( - recipients != null && recipients.length > 0 - ? safeMimeDecode(recipients[0].toString()) - : ""); - - Method getSentDate = messageClass.getMethod("getSentDate"); - content.setDate((Date) getSentDate.invoke(message)); - - // Extract content - Method getContent = messageClass.getMethod("getContent"); - Object messageContent = getContent.invoke(message); - - if (messageContent instanceof String stringContent) { - Method getContentType = messageClass.getMethod("getContentType"); - String contentType = (String) getContentType.invoke(message); - if (contentType != null && contentType.toLowerCase().contains("text/html")) { - content.setHtmlBody(stringContent); - } else { - content.setTextBody(stringContent); - } - } else { - // Handle multipart content - try { - Class multipartClass = Class.forName("jakarta.mail.Multipart"); - if (multipartClass.isInstance(messageContent)) { - processMultipartAdvanced(messageContent, content, request); - } - } catch (Exception e) { - log.warn("Error processing content: {}", e.getMessage()); - } - } - - } catch (Exception e) { - content.setSubject("Email Conversion"); - content.setFrom("Unknown"); - content.setTo("Unknown"); - content.setTextBody("Email content could not be parsed with advanced processing"); - } - - return content; - } - - private static void processMultipartAdvanced( - Object multipart, EmailContent content, EmlToPdfRequest request) { - try { - // Enhanced multipart type checking - if (!isValidJakartaMailMultipart(multipart)) { - log.warn("Invalid Jakarta Mail multipart type: {}", multipart.getClass().getName()); - return; - } - - Class multipartClass = multipart.getClass(); - Method getCount = multipartClass.getMethod("getCount"); - int count = (Integer) getCount.invoke(multipart); - - Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class); - - for (int i = 0; i < count; i++) { - Object part = getBodyPart.invoke(multipart, i); - processPartAdvanced(part, content, request); - } - - } catch (Exception e) { - content.setTextBody("Email content could not be parsed with advanced processing"); - } - } - - private static void processPartAdvanced( - Object part, EmailContent content, EmlToPdfRequest request) { - try { - if (!isValidJakartaMailPart(part)) { - log.warn("Invalid Jakarta Mail part type: {}", part.getClass().getName()); - return; - } - - Class partClass = part.getClass(); - Method isMimeType = partClass.getMethod("isMimeType", String.class); - Method getContent = partClass.getMethod("getContent"); - Method getDisposition = partClass.getMethod("getDisposition"); - Method getFileName = partClass.getMethod("getFileName"); - Method getContentType = partClass.getMethod("getContentType"); - Method getHeader = partClass.getMethod("getHeader", String.class); - - Object disposition = getDisposition.invoke(part); - String filename = (String) getFileName.invoke(part); - String contentType = (String) getContentType.invoke(part); - - if ((Boolean) isMimeType.invoke(part, "text/plain") && disposition == null) { - content.setTextBody((String) getContent.invoke(part)); - } else if ((Boolean) isMimeType.invoke(part, "text/html") && disposition == null) { - content.setHtmlBody((String) getContent.invoke(part)); - } else if ("attachment".equalsIgnoreCase((String) disposition) - || (filename != null && !filename.trim().isEmpty())) { - - content.setAttachmentCount(content.getAttachmentCount() + 1); - - // Always extract basic attachment metadata for display - if (filename != null && !filename.trim().isEmpty()) { - // Create attachment with metadata only - EmailAttachment attachment = new EmailAttachment(); - // Apply MIME decoding to filename to handle encoded attachment names - attachment.setFilename(safeMimeDecode(filename)); - attachment.setContentType(contentType); - - // Check if it's an embedded image - String[] contentIdHeaders = (String[]) getHeader.invoke(part, "Content-ID"); - if (contentIdHeaders != null && contentIdHeaders.length > 0) { - attachment.setEmbedded(true); - // Store the Content-ID, removing angle brackets if present - String contentId = contentIdHeaders[0]; - if (contentId.startsWith("<") && contentId.endsWith(">")) { - contentId = contentId.substring(1, contentId.length() - 1); - } - attachment.setContentId(contentId); - } - - // Extract attachment data if attachments should be included OR if it's an - // embedded image (needed for inline display) - if ((request != null && request.isIncludeAttachments()) - || attachment.isEmbedded()) { - try { - Object attachmentContent = getContent.invoke(part); - byte[] attachmentData = null; - - if (attachmentContent instanceof java.io.InputStream inputStream) { - try { - attachmentData = inputStream.readAllBytes(); - } catch (IOException e) { - log.warn( - "Failed to read InputStream attachment: {}", - e.getMessage()); - } - } else if (attachmentContent instanceof byte[] byteArray) { - attachmentData = byteArray; - } else if (attachmentContent instanceof String stringContent) { - attachmentData = stringContent.getBytes(StandardCharsets.UTF_8); - } - - if (attachmentData != null) { - // Check size limit (use default 10MB if request is null) - long maxSizeMB = - request != null ? request.getMaxAttachmentSizeMB() : 10L; - long maxSizeBytes = maxSizeMB * 1024 * 1024; - - if (attachmentData.length <= maxSizeBytes) { - attachment.setData(attachmentData); - attachment.setSizeBytes(attachmentData.length); - } else { - // For embedded images, always include data regardless of size - // to ensure inline display works - if (attachment.isEmbedded()) { - attachment.setData(attachmentData); - attachment.setSizeBytes(attachmentData.length); - } else { - // Still show attachment info even if too large - attachment.setSizeBytes(attachmentData.length); - } - } - } - } catch (Exception e) { - log.warn("Error extracting attachment data: {}", e.getMessage()); - } - } - - // Add attachment to the list for display (with or without data) - content.getAttachments().add(attachment); - } - } else if ((Boolean) isMimeType.invoke(part, "multipart/*")) { - // Handle nested multipart content - try { - Object multipartContent = getContent.invoke(part); - Class multipartClass = Class.forName("jakarta.mail.Multipart"); - if (multipartClass.isInstance(multipartContent)) { - processMultipartAdvanced(multipartContent, content, request); - } - } catch (Exception e) { - log.warn("Error processing multipart content: {}", e.getMessage()); - } - } - - } catch (Exception e) { - log.warn("Error processing multipart part: {}", e.getMessage()); - } - } - - private static String generateEnhancedEmailHtml(EmailContent content, EmlToPdfRequest request) { - StringBuilder html = new StringBuilder(); - - html.append("\n"); - html.append("\n"); - html.append("").append(escapeHtml(content.getSubject())).append("\n"); - html.append("\n"); - html.append("\n"); - - html.append("
\n"); - html.append("
\n"); - html.append("

").append(escapeHtml(content.getSubject())).append("

\n"); - html.append("
\n"); - html.append("
From: ") - .append(escapeHtml(content.getFrom())) - .append("
\n"); - html.append("
To: ") - .append(escapeHtml(content.getTo())) - .append("
\n"); - - if (content.getDate() != null) { - html.append("
Date: ") - .append(formatEmailDate(content.getDate())) - .append("
\n"); - } - html.append("
\n"); - - html.append("
\n"); - if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) { - html.append(processEmailHtmlBody(content.getHtmlBody(), content)); - } else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) { - html.append("
"); - html.append(convertTextToHtml(content.getTextBody())); - html.append("
"); - } else { - html.append("
"); - html.append("

No content available

"); - html.append("
"); - } - html.append("
\n"); - - if (content.getAttachmentCount() > 0 || !content.getAttachments().isEmpty()) { - html.append("
\n"); - int displayedAttachmentCount = - content.getAttachmentCount() > 0 - ? content.getAttachmentCount() - : content.getAttachments().size(); - html.append("

Attachments (").append(displayedAttachmentCount).append(")

\n"); - - if (!content.getAttachments().isEmpty()) { - for (EmailAttachment attachment : content.getAttachments()) { - // Create attachment info with paperclip emoji before filename - String uniqueId = generateUniqueAttachmentId(attachment.getFilename()); - attachment.setEmbeddedFilename( - attachment.getEmbeddedFilename() != null - ? attachment.getEmbeddedFilename() - : attachment.getFilename()); - - html.append("
") - .append("") - .append(MimeConstants.ATTACHMENT_MARKER) - .append(" ") - .append("") - .append(escapeHtml(safeMimeDecode(attachment.getFilename()))) - .append(""); - - String sizeStr = formatFileSize(attachment.getSizeBytes()); - html.append(" (").append(sizeStr); - if (attachment.getContentType() != null - && !attachment.getContentType().isEmpty()) { - html.append(", ").append(escapeHtml(attachment.getContentType())); - } - html.append(")
\n"); - } - } - - if (request.isIncludeAttachments()) { - html.append("
\n"); - html.append("

Attachments are embedded in the file.

\n"); - html.append("
\n"); - } else { - html.append("
\n"); - html.append( - "

Attachment information displayed - files not included in PDF.

\n"); - html.append("
\n"); - } - - html.append("
\n"); - } - - html.append("
\n"); - html.append(""); - - return html.toString(); - } - - private static byte[] attachFilesToPdf( - byte[] pdfBytes, - List attachments, - CustomPDFDocumentFactory pdfDocumentFactory) - throws IOException { - try (PDDocument document = pdfDocumentFactory.load(pdfBytes); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { - - if (attachments == null || attachments.isEmpty()) { - document.save(outputStream); - return outputStream.toByteArray(); - } - - List embeddedFiles = new ArrayList<>(); - - // Set up the embedded files name tree once - if (document.getDocumentCatalog().getNames() == null) { - document.getDocumentCatalog() - .setNames(new PDDocumentNameDictionary(document.getDocumentCatalog())); - } - - PDDocumentNameDictionary names = document.getDocumentCatalog().getNames(); - if (names.getEmbeddedFiles() == null) { - names.setEmbeddedFiles(new PDEmbeddedFilesNameTreeNode()); - } - - PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles(); - Map efMap = efTree.getNames(); - if (efMap == null) { - efMap = new HashMap<>(); - } - - // Embed each attachment directly into the PDF - for (EmailAttachment attachment : attachments) { - if (attachment.getData() == null || attachment.getData().length == 0) { - continue; - } - - try { - // Generate unique filename - String filename = attachment.getFilename(); - if (filename == null || filename.trim().isEmpty()) { - filename = "attachment_" + System.currentTimeMillis(); - if (attachment.getContentType() != null - && attachment.getContentType().contains("/")) { - String[] parts = attachment.getContentType().split("/"); - if (parts.length > 1) { - filename += "." + parts[1]; - } - } - } - - // Ensure unique filename - String uniqueFilename = getUniqueFilename(filename, embeddedFiles, efMap); - - // Create embedded file - PDEmbeddedFile embeddedFile = - new PDEmbeddedFile( - document, new ByteArrayInputStream(attachment.getData())); - embeddedFile.setSize(attachment.getData().length); - embeddedFile.setCreationDate(new GregorianCalendar()); - - // Create file specification - PDComplexFileSpecification fileSpec = new PDComplexFileSpecification(); - fileSpec.setFile(uniqueFilename); - fileSpec.setEmbeddedFile(embeddedFile); - if (attachment.getContentType() != null) { - embeddedFile.setSubtype(attachment.getContentType()); - fileSpec.setFileDescription("Email attachment: " + uniqueFilename); - } - - // Add to the map (but don't set it yet) - efMap.put(uniqueFilename, fileSpec); - embeddedFiles.add(uniqueFilename); - - // Store the filename for annotation creation - attachment.setEmbeddedFilename(uniqueFilename); - - } catch (Exception e) { - // Log error but continue with other attachments - log.warn("Failed to embed attachment: {}", attachment.getFilename(), e); - } - } - - // Set the complete map once at the end - if (!efMap.isEmpty()) { - efTree.setNames(efMap); - - // Set catalog viewer preferences to automatically show attachments pane - setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS); - } - - // Add attachment annotations to the first page for each embedded file - if (!embeddedFiles.isEmpty()) { - addAttachmentAnnotationsToDocument(document, attachments); - } - - document.save(outputStream); - return outputStream.toByteArray(); - } - } - - private static String getUniqueFilename( - String filename, - List embeddedFiles, - Map efMap) { - String uniqueFilename = filename; - int counter = 1; - while (embeddedFiles.contains(uniqueFilename) || efMap.containsKey(uniqueFilename)) { - String extension = ""; - String baseName = filename; - int lastDot = filename.lastIndexOf('.'); - if (lastDot > 0) { - extension = filename.substring(lastDot); - baseName = filename.substring(0, lastDot); - } - uniqueFilename = baseName + "_" + counter + extension; - counter++; - } - return uniqueFilename; - } - - private static void addAttachmentAnnotationsToDocument( - PDDocument document, List attachments) throws IOException { - if (document.getNumberOfPages() == 0 || attachments == null || attachments.isEmpty()) { - return; - } - - // 1. Find the screen position of all attachment markers - AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder(); - finder.setSortByPosition(true); // Process pages in order - finder.getText(document); - List markerPositions = finder.getPositions(); - - // 2. Warn if the number of markers and attachments don't match - if (markerPositions.size() != attachments.size()) { - log.warn( - "Found {} attachment markers, but there are {} attachments. Annotation count may be incorrect.", - markerPositions.size(), - attachments.size()); - } - - // 3. Create an invisible annotation over each found marker - int annotationsToAdd = Math.min(markerPositions.size(), attachments.size()); - for (int i = 0; i < annotationsToAdd; i++) { - MarkerPosition position = markerPositions.get(i); - EmailAttachment attachment = attachments.get(i); - - if (attachment.getEmbeddedFilename() != null) { - PDPage page = document.getPage(position.getPageIndex()); - addAttachmentAnnotationToPage( - document, page, attachment, position.getX(), position.getY()); - } - } - } - - private static void addAttachmentAnnotationToPage( - PDDocument document, PDPage page, EmailAttachment attachment, float x, float y) - throws IOException { - - PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment(); - - PDRectangle rect = getPdRectangle(page, x, y); - fileAnnotation.setRectangle(rect); - - // Remove visual appearance while keeping clickable functionality - try { - PDAppearanceDictionary appearance = new PDAppearanceDictionary(); - PDAppearanceStream normalAppearance = new PDAppearanceStream(document); - normalAppearance.setBBox(new PDRectangle(0, 0, 0, 0)); // Zero-size bounding box - - appearance.setNormalAppearance(normalAppearance); - fileAnnotation.setAppearance(appearance); - } catch (Exception e) { - // If appearance manipulation fails, just set it to null - fileAnnotation.setAppearance(null); - } - - // Set invisibility flags but keep it functional - fileAnnotation.setInvisible(true); - fileAnnotation.setHidden(false); // Must be false to remain clickable - fileAnnotation.setNoView(false); // Must be false to remain clickable - fileAnnotation.setPrinted(false); - - PDEmbeddedFilesNameTreeNode efTree = - document.getDocumentCatalog().getNames().getEmbeddedFiles(); - if (efTree != null) { - Map efMap = efTree.getNames(); - if (efMap != null) { - PDComplexFileSpecification fileSpec = efMap.get(attachment.getEmbeddedFilename()); - if (fileSpec != null) { - fileAnnotation.setFile(fileSpec); - } - } - } - - fileAnnotation.setContents("Click to open: " + attachment.getFilename()); - fileAnnotation.setAnnotationName("EmbeddedFile_" + attachment.getEmbeddedFilename()); - - page.getAnnotations().add(fileAnnotation); - - log.info( - "Added attachment annotation for '{}' on page {}", - attachment.getFilename(), - document.getPages().indexOf(page) + 1); - } - - private static @NotNull PDRectangle getPdRectangle(PDPage page, float x, float y) { - PDRectangle mediaBox = page.getMediaBox(); - float pdfY = mediaBox.getHeight() - y; - - float iconWidth = - StyleConstants.ATTACHMENT_ICON_WIDTH; // Keep original size for clickability - float iconHeight = - StyleConstants.ATTACHMENT_ICON_HEIGHT; // Keep original size for clickability - - // Keep the full-size rectangle so it remains clickable - return new PDRectangle( - x + StyleConstants.ANNOTATION_X_OFFSET, - pdfY - iconHeight + StyleConstants.ANNOTATION_Y_OFFSET, - iconWidth, - iconHeight); - } - - private static String formatEmailDate(Date date) { - if (date == null) return ""; - java.text.SimpleDateFormat formatter = - new java.text.SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a", Locale.ENGLISH); - return formatter.format(date); - } - - private static String formatFileSize(long bytes) { - if (bytes < FileSizeConstants.BYTES_IN_KB) { - return bytes + " B"; - } else if (bytes < FileSizeConstants.BYTES_IN_MB) { - return String.format("%.1f KB", bytes / (double) FileSizeConstants.BYTES_IN_KB); - } else if (bytes < FileSizeConstants.BYTES_IN_GB) { - return String.format("%.1f MB", bytes / (double) FileSizeConstants.BYTES_IN_MB); - } else { - return String.format("%.1f GB", bytes / (double) FileSizeConstants.BYTES_IN_GB); - } - } - - // MIME header decoding functionality for RFC 2047 encoded headers - moved to constants - - private static String decodeMimeHeader(String encodedText) { - if (encodedText == null || encodedText.trim().isEmpty()) { - return encodedText; - } - - try { - StringBuilder result = new StringBuilder(); - Matcher matcher = MimeConstants.MIME_ENCODED_PATTERN.matcher(encodedText); - int lastEnd = 0; - - while (matcher.find()) { - // Add any text before the encoded part - result.append(encodedText, lastEnd, matcher.start()); - - String charset = matcher.group(1); - String encoding = matcher.group(2).toUpperCase(); - String encodedValue = matcher.group(3); - - try { - String decodedValue; - if ("B".equals(encoding)) { - // Base64 decoding - byte[] decodedBytes = Base64.getDecoder().decode(encodedValue); - decodedValue = new String(decodedBytes, Charset.forName(charset)); - } else if ("Q".equals(encoding)) { - // Quoted-printable decoding - decodedValue = decodeQuotedPrintable(encodedValue, charset); - } else { - // Unknown encoding, keep original - decodedValue = matcher.group(0); - } - result.append(decodedValue); - } catch (Exception e) { - log.warn("Failed to decode MIME header part: {}", matcher.group(0), e); - // If decoding fails, keep the original encoded text - result.append(matcher.group(0)); - } - - lastEnd = matcher.end(); - } - - // Add any remaining text after the last encoded part - result.append(encodedText.substring(lastEnd)); - - return result.toString(); - } catch (Exception e) { - log.warn("Error decoding MIME header: {}", encodedText, e); - return encodedText; // Return original if decoding fails - } - } - - private static String decodeQuotedPrintable(String encodedText, String charset) { - StringBuilder result = new StringBuilder(); - for (int i = 0; i < encodedText.length(); i++) { - char c = encodedText.charAt(i); - switch (c) { - case '=' -> { - if (i + 2 < encodedText.length()) { - String hex = encodedText.substring(i + 1, i + 3); - try { - int value = Integer.parseInt(hex, 16); - result.append((char) value); - i += 2; // Skip the hex digits - } catch (NumberFormatException e) { - // If hex parsing fails, keep the original character - result.append(c); - } - } else { - result.append(c); - } - } - case '_' -> // In RFC 2047, underscore represents space - result.append(' '); - default -> result.append(c); - } - } - - // Convert bytes to proper charset - byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1); - return new String(bytes, Charset.forName(charset)); - } - - private static String safeMimeDecode(String headerValue) { - if (headerValue == null) { - return ""; - } - - try { - if (isJakartaMailAvailable()) { - // Use Jakarta Mail's MimeUtility for proper MIME decoding - Class mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility"); - Method decodeText = mimeUtilityClass.getMethod("decodeText", String.class); - return (String) decodeText.invoke(null, headerValue.trim()); - } else { - // Fallback to basic MIME decoding - return decodeMimeHeader(headerValue.trim()); - } - } catch (Exception e) { - log.warn("Failed to decode MIME header, using original: {}", headerValue, e); - return headerValue; - } - } - - private static boolean isValidJakartaMailPart(Object part) { - if (part == null) return false; - - try { - // Check if the object implements jakarta.mail.Part interface - Class partInterface = Class.forName("jakarta.mail.Part"); - if (!partInterface.isInstance(part)) { - return false; - } - - // Additional check for MimePart - try { - Class mimePartInterface = Class.forName("jakarta.mail.internet.MimePart"); - return mimePartInterface.isInstance(part); - } catch (ClassNotFoundException e) { - // MimePart not available, but Part is sufficient - return true; - } - } catch (ClassNotFoundException e) { - log.debug("Jakarta Mail Part interface not available for validation"); - return false; - } - } - - private static boolean isValidJakartaMailMultipart(Object multipart) { - if (multipart == null) return false; - - try { - // Check if the object implements jakarta.mail.Multipart interface - Class multipartInterface = Class.forName("jakarta.mail.Multipart"); - if (!multipartInterface.isInstance(multipart)) { - return false; - } - - // Additional check for MimeMultipart - try { - Class mimeMultipartClass = Class.forName("jakarta.mail.internet.MimeMultipart"); - if (mimeMultipartClass.isInstance(multipart)) { - log.debug("Found MimeMultipart instance for enhanced processing"); - return true; - } - } catch (ClassNotFoundException e) { - log.debug("MimeMultipart not available, using base Multipart interface"); - } - - return true; - } catch (ClassNotFoundException e) { - log.debug("Jakarta Mail Multipart interface not available for validation"); - return false; - } - } - - @Data - public static class EmailContent { - private String subject; - private String from; - private String to; - private Date date; - private String htmlBody; - private String textBody; - private int attachmentCount; - private List attachments = new ArrayList<>(); - - public void setHtmlBody(String htmlBody) { - this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null; - } - - public void setTextBody(String textBody) { - this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null; - } - } - - @Data - public static class EmailAttachment { - private String filename; - private String contentType; - private byte[] data; - private boolean embedded; - private String embeddedFilename; - private long sizeBytes; - - // New fields for advanced processing - private String contentId; - private String disposition; - private String transferEncoding; - - // Custom setter to maintain size calculation logic - public void setData(byte[] data) { - this.data = data; - if (data != null) { - this.sizeBytes = data.length; - } - } - } - - @Data - public static class MarkerPosition { - private int pageIndex; - private float x; - private float y; - private String character; - - public MarkerPosition(int pageIndex, float x, float y, String character) { - this.pageIndex = pageIndex; - this.x = x; - this.y = y; - this.character = character; - } - } - - public static class AttachmentMarkerPositionFinder - extends org.apache.pdfbox.text.PDFTextStripper { - @Getter private final List positions = new ArrayList<>(); - private int currentPageIndex; - protected boolean sortByPosition; - private boolean isInAttachmentSection; - private boolean attachmentSectionFound; - - public AttachmentMarkerPositionFinder() { - super(); - this.currentPageIndex = 0; - this.sortByPosition = false; - this.isInAttachmentSection = false; - this.attachmentSectionFound = false; - } - - @Override - protected void startPage(org.apache.pdfbox.pdmodel.PDPage page) throws IOException { - super.startPage(page); - } - - @Override - protected void endPage(org.apache.pdfbox.pdmodel.PDPage page) throws IOException { - currentPageIndex++; - super.endPage(page); - } - - @Override - protected void writeString( - String string, List textPositions) - throws IOException { - // Check if we are entering or exiting the attachment section - String lowerString = string.toLowerCase(); - - // Look for attachment section start marker - if (lowerString.contains("attachments (")) { - isInAttachmentSection = true; - attachmentSectionFound = true; - } - - // Look for attachment section end markers (common patterns that indicate end of - // attachments) - if (isInAttachmentSection - && (lowerString.contains("") - || lowerString.contains("") - || (attachmentSectionFound - && lowerString.trim().isEmpty() - && string.length() > 50))) { - isInAttachmentSection = false; - } - - // Only look for markers if we are in the attachment section - if (isInAttachmentSection) { - String attachmentMarker = MimeConstants.ATTACHMENT_MARKER; - for (int i = 0; (i = string.indexOf(attachmentMarker, i)) != -1; i++) { - if (i < textPositions.size()) { - org.apache.pdfbox.text.TextPosition textPosition = textPositions.get(i); - MarkerPosition position = - new MarkerPosition( - currentPageIndex, - textPosition.getXDirAdj(), - textPosition.getYDirAdj(), - attachmentMarker); - positions.add(position); - } - } - } - super.writeString(string, textPositions); - } - - @Override - public void setSortByPosition(boolean sortByPosition) { - this.sortByPosition = sortByPosition; - } - } } diff --git a/app/common/src/main/java/stirling/software/common/util/PdfAttachmentHandler.java b/app/common/src/main/java/stirling/software/common/util/PdfAttachmentHandler.java new file mode 100644 index 000000000..2478aad94 --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/util/PdfAttachmentHandler.java @@ -0,0 +1,680 @@ +package stirling.software.common.util; + +import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; +import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; +import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; +import org.jetbrains.annotations.NotNull; +import org.springframework.web.multipart.MultipartFile; + +import lombok.Data; +import lombok.Getter; +import lombok.experimental.UtilityClass; + +import stirling.software.common.service.CustomPDFDocumentFactory; + +@UtilityClass +public class PdfAttachmentHandler { + // Note: This class is designed for EML attachments, not general PDF attachments. + + private static final String ATTACHMENT_MARKER = "@"; + private static final float ATTACHMENT_ICON_WIDTH = 12f; + private static final float ATTACHMENT_ICON_HEIGHT = 14f; + private static final float ANNOTATION_X_OFFSET = 2f; + private static final float ANNOTATION_Y_OFFSET = 10f; + + public static byte[] attachFilesToPdf( + byte[] pdfBytes, + List attachments, + CustomPDFDocumentFactory pdfDocumentFactory) + throws IOException { + + if (attachments == null || attachments.isEmpty()) { + return pdfBytes; + } + + try (PDDocument document = pdfDocumentFactory.load(pdfBytes); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + + List multipartAttachments = new ArrayList<>(attachments.size()); + for (int i = 0; i < attachments.size(); i++) { + EmlParser.EmailAttachment attachment = attachments.get(i); + if (attachment.getData() != null && attachment.getData().length > 0) { + String embeddedFilename = + attachment.getFilename() != null + ? attachment.getFilename() + : ("attachment_" + i); + attachment.setEmbeddedFilename(embeddedFilename); + multipartAttachments.add(createMultipartFile(attachment)); + } + } + + if (!multipartAttachments.isEmpty()) { + Map indexToFilenameMap = + addAttachmentsToDocumentWithMapping( + document, multipartAttachments, attachments); + setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS); + addAttachmentAnnotationsToDocumentWithMapping( + document, attachments, indexToFilenameMap); + } + + document.save(outputStream); + return outputStream.toByteArray(); + } catch (RuntimeException e) { + throw new IOException( + "Invalid PDF structure or processing error: " + e.getMessage(), e); + } catch (Exception e) { + throw new IOException("Error attaching files to PDF: " + e.getMessage(), e); + } + } + + private static MultipartFile createMultipartFile(EmlParser.EmailAttachment attachment) { + return new MultipartFile() { + @Override + public @NotNull String getName() { + return "attachment"; + } + + @Override + public String getOriginalFilename() { + return attachment.getFilename() != null + ? attachment.getFilename() + : "attachment_" + System.currentTimeMillis(); + } + + @Override + public String getContentType() { + return attachment.getContentType() != null + ? attachment.getContentType() + : "application/octet-stream"; + } + + @Override + public boolean isEmpty() { + return attachment.getData() == null || attachment.getData().length == 0; + } + + @Override + public long getSize() { + return attachment.getData() != null ? attachment.getData().length : 0; + } + + @Override + public byte @NotNull [] getBytes() { + return attachment.getData() != null ? attachment.getData() : new byte[0]; + } + + @Override + public @NotNull InputStream getInputStream() { + byte[] data = attachment.getData(); + return new ByteArrayInputStream(data != null ? data : new byte[0]); + } + + @Override + public void transferTo(@NotNull File dest) throws IOException, IllegalStateException { + try (FileOutputStream fos = new FileOutputStream(dest)) { + byte[] data = attachment.getData(); + if (data != null) { + fos.write(data); + } + } + } + }; + } + + private static String ensureUniqueFilename(String filename, Set existingNames) { + if (!existingNames.contains(filename)) { + return filename; + } + + String baseName; + String extension = ""; + int lastDot = filename.lastIndexOf('.'); + if (lastDot > 0) { + baseName = filename.substring(0, lastDot); + extension = filename.substring(lastDot); + } else { + baseName = filename; + } + + int counter = 1; + String uniqueName; + do { + uniqueName = baseName + "_" + counter + extension; + counter++; + } while (existingNames.contains(uniqueName)); + + return uniqueName; + } + + private static @NotNull PDRectangle calculateAnnotationRectangle( + PDPage page, float x, float y) { + PDRectangle cropBox = page.getCropBox(); + + // ISO 32000-1:2008 Section 8.3: PDF coordinate system transforms + int rotation = page.getRotation(); + float pdfX = x; + float pdfY = cropBox.getHeight() - y; + + switch (rotation) { + case 90 -> { + float temp = pdfX; + pdfX = pdfY; + pdfY = cropBox.getWidth() - temp; + } + case 180 -> { + pdfX = cropBox.getWidth() - pdfX; + pdfY = y; + } + case 270 -> { + float temp = pdfX; + pdfX = cropBox.getHeight() - pdfY; + pdfY = temp; + } + default -> {} + } + + float iconHeight = ATTACHMENT_ICON_HEIGHT; + float paddingX = 2.0f; + float paddingY = 2.0f; + + PDRectangle rect = + new PDRectangle( + pdfX + ANNOTATION_X_OFFSET + paddingX, + pdfY - iconHeight + ANNOTATION_Y_OFFSET + paddingY, + ATTACHMENT_ICON_WIDTH, + iconHeight); + + PDRectangle mediaBox = page.getMediaBox(); + if (rect.getLowerLeftX() < mediaBox.getLowerLeftX() + || rect.getLowerLeftY() < mediaBox.getLowerLeftY() + || rect.getUpperRightX() > mediaBox.getUpperRightX() + || rect.getUpperRightY() > mediaBox.getUpperRightY()) { + + float adjustedX = + Math.max( + mediaBox.getLowerLeftX(), + Math.min( + rect.getLowerLeftX(), + mediaBox.getUpperRightX() - rect.getWidth())); + float adjustedY = + Math.max( + mediaBox.getLowerLeftY(), + Math.min( + rect.getLowerLeftY(), + mediaBox.getUpperRightY() - rect.getHeight())); + rect = new PDRectangle(adjustedX, adjustedY, rect.getWidth(), rect.getHeight()); + } + + return rect; + } + + public static String processInlineImages( + String htmlContent, EmlParser.EmailContent emailContent) { + if (htmlContent == null || emailContent == null) return htmlContent; + + Map contentIdMap = new HashMap<>(); + for (EmlParser.EmailAttachment attachment : emailContent.getAttachments()) { + if (attachment.isEmbedded() + && attachment.getContentId() != null + && attachment.getData() != null) { + contentIdMap.put(attachment.getContentId(), attachment); + } + } + + if (contentIdMap.isEmpty()) return htmlContent; + + Pattern cidPattern = + Pattern.compile( + "(?i)]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>", + Pattern.CASE_INSENSITIVE); + Matcher matcher = cidPattern.matcher(htmlContent); + + StringBuilder result = new StringBuilder(); + while (matcher.find()) { + String contentId = matcher.group(1); + EmlParser.EmailAttachment attachment = contentIdMap.get(contentId); + + if (attachment != null && attachment.getData() != null) { + String mimeType = + EmlProcessingUtils.detectMimeType( + attachment.getFilename(), attachment.getContentType()); + + String base64Data = Base64.getEncoder().encodeToString(attachment.getData()); + String dataUri = "data:" + mimeType + ";base64," + base64Data; + + String replacement = + matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri); + matcher.appendReplacement(result, Matcher.quoteReplacement(replacement)); + } else { + matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0))); + } + } + matcher.appendTail(result); + + return result.toString(); + } + + public static String formatEmailDate(Date date) { + if (date == null) return ""; + + SimpleDateFormat formatter = + new SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a z", Locale.ENGLISH); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + return formatter.format(date); + } + + @Data + public static class MarkerPosition { + private int pageIndex; + private float x; + private float y; + private String character; + private String filename; + + public MarkerPosition(int pageIndex, float x, float y, String character, String filename) { + this.pageIndex = pageIndex; + this.x = x; + this.y = y; + this.character = character; + this.filename = filename; + } + } + + public static class AttachmentMarkerPositionFinder extends PDFTextStripper { + @Getter private final List positions = new ArrayList<>(); + private int currentPageIndex; + protected boolean sortByPosition; + private boolean isInAttachmentSection; + private boolean attachmentSectionFound; + private final StringBuilder currentText = new StringBuilder(); + + private static final Pattern ATTACHMENT_SECTION_PATTERN = + Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE); + + private static final Pattern FILENAME_PATTERN = + Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)"); + + public AttachmentMarkerPositionFinder() { + super(); + this.currentPageIndex = 0; + this.sortByPosition = false; // Disable sorting to preserve document order + this.isInAttachmentSection = false; + this.attachmentSectionFound = false; + } + + @Override + public String getText(PDDocument document) throws IOException { + super.getText(document); + + if (sortByPosition) { + positions.sort( + (a, b) -> { + int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex()); + if (pageCompare != 0) return pageCompare; + return Float.compare( + b.getY(), a.getY()); // Descending Y per PDF coordinate system + }); + } + + return ""; // Return empty string as we only need positions + } + + @Override + protected void startPage(PDPage page) throws IOException { + super.startPage(page); + } + + @Override + protected void endPage(PDPage page) throws IOException { + currentPageIndex++; + super.endPage(page); + } + + @Override + protected void writeString(String string, List textPositions) + throws IOException { + String lowerString = string.toLowerCase(); + + if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) { + isInAttachmentSection = true; + attachmentSectionFound = true; + } + + if (isInAttachmentSection + && (lowerString.contains("") + || lowerString.contains("") + || (attachmentSectionFound + && lowerString.trim().isEmpty() + && string.length() > 50))) { + isInAttachmentSection = false; + } + + if (isInAttachmentSection) { + currentText.append(string); + + for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) { + if (i < textPositions.size()) { + TextPosition textPosition = textPositions.get(i); + + String filename = extractFilenameAfterMarker(string, i); + + MarkerPosition position = + new MarkerPosition( + currentPageIndex, + textPosition.getXDirAdj(), + textPosition.getYDirAdj(), + ATTACHMENT_MARKER, + filename); + positions.add(position); + } + } + } + super.writeString(string, textPositions); + } + + @Override + public void setSortByPosition(boolean sortByPosition) { + this.sortByPosition = sortByPosition; + } + + private String extractFilenameAfterMarker(String text, int markerIndex) { + String afterMarker = text.substring(markerIndex + 1); + + Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker); + if (matcher.find()) { + return matcher.group(1); + } + + String[] parts = afterMarker.split("[\\s\\(\\)]+"); + for (String part : parts) { + part = part.trim(); + if (part.length() > 3 && part.contains(".")) { + return part; + } + } + + return null; + } + } + + private static Map addAttachmentsToDocumentWithMapping( + PDDocument document, + List attachments, + List originalAttachments) + throws IOException { + + PDDocumentCatalog catalog = document.getDocumentCatalog(); + + if (catalog == null) { + throw new IOException("PDF document catalog is not accessible"); + } + + PDDocumentNameDictionary documentNames = catalog.getNames(); + if (documentNames == null) { + documentNames = new PDDocumentNameDictionary(catalog); + catalog.setNames(documentNames); + } + + PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles(); + if (embeddedFilesTree == null) { + embeddedFilesTree = new PDEmbeddedFilesNameTreeNode(); + documentNames.setEmbeddedFiles(embeddedFilesTree); + } + + Map existingNames = embeddedFilesTree.getNames(); + if (existingNames == null) { + existingNames = new HashMap<>(); + } + + Map indexToFilenameMap = new HashMap<>(); + + for (int i = 0; i < attachments.size(); i++) { + MultipartFile attachment = attachments.get(i); + String filename = attachment.getOriginalFilename(); + if (filename == null || filename.trim().isEmpty()) { + filename = "attachment_" + i; + } + + String normalizedFilename = + isAscii(filename) + ? filename + : java.text.Normalizer.normalize( + filename, java.text.Normalizer.Form.NFC); + String uniqueFilename = + ensureUniqueFilename(normalizedFilename, existingNames.keySet()); + + indexToFilenameMap.put(i, uniqueFilename); + + PDEmbeddedFile embeddedFile = new PDEmbeddedFile(document, attachment.getInputStream()); + embeddedFile.setSize((int) attachment.getSize()); + + GregorianCalendar currentTime = new GregorianCalendar(); + embeddedFile.setCreationDate(currentTime); + embeddedFile.setModDate(currentTime); + + String contentType = attachment.getContentType(); + if (contentType != null && !contentType.trim().isEmpty()) { + embeddedFile.setSubtype(contentType); + } + + PDComplexFileSpecification fileSpecification = new PDComplexFileSpecification(); + fileSpecification.setFile(uniqueFilename); + fileSpecification.setFileUnicode(uniqueFilename); + fileSpecification.setEmbeddedFile(embeddedFile); + fileSpecification.setEmbeddedFileUnicode(embeddedFile); + + existingNames.put(uniqueFilename, fileSpecification); + } + + embeddedFilesTree.setNames(existingNames); + documentNames.setEmbeddedFiles(embeddedFilesTree); + catalog.setNames(documentNames); + + return indexToFilenameMap; + } + + private static void addAttachmentAnnotationsToDocumentWithMapping( + PDDocument document, + List attachments, + Map indexToFilenameMap) + throws IOException { + + if (document.getNumberOfPages() == 0 || attachments == null || attachments.isEmpty()) { + return; + } + + AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder(); + finder.setSortByPosition(false); // Keep document order to maintain pairing + finder.getText(document); + List markerPositions = finder.getPositions(); + + int annotationsToAdd = Math.min(markerPositions.size(), attachments.size()); + + for (int i = 0; i < annotationsToAdd; i++) { + MarkerPosition position = markerPositions.get(i); + + String filenameNearMarker = position.getFilename(); + + EmlParser.EmailAttachment matchingAttachment = + findAttachmentByFilename(attachments, filenameNearMarker); + + if (matchingAttachment != null) { + String embeddedFilename = + findEmbeddedFilenameForAttachment(matchingAttachment, indexToFilenameMap); + + if (embeddedFilename != null) { + PDPage page = document.getPage(position.getPageIndex()); + addAttachmentAnnotationToPageWithMapping( + document, + page, + matchingAttachment, + embeddedFilename, + position.getX(), + position.getY(), + i); + } else { + // No embedded filename found for attachment + } + } else { + // No matching attachment found for filename near marker + } + } + } + + private static EmlParser.EmailAttachment findAttachmentByFilename( + List attachments, String targetFilename) { + if (targetFilename == null || targetFilename.trim().isEmpty()) { + return null; + } + + String normalizedTarget = normalizeFilename(targetFilename); + + // First try exact match + for (EmlParser.EmailAttachment attachment : attachments) { + if (attachment.getFilename() != null) { + String normalizedAttachment = normalizeFilename(attachment.getFilename()); + if (normalizedAttachment.equals(normalizedTarget)) { + return attachment; + } + } + } + + // Then try contains match + for (EmlParser.EmailAttachment attachment : attachments) { + if (attachment.getFilename() != null) { + String normalizedAttachment = normalizeFilename(attachment.getFilename()); + if (normalizedAttachment.contains(normalizedTarget) + || normalizedTarget.contains(normalizedAttachment)) { + return attachment; + } + } + } + + return null; + } + + private static String findEmbeddedFilenameForAttachment( + EmlParser.EmailAttachment attachment, Map indexToFilenameMap) { + + String attachmentFilename = attachment.getFilename(); + if (attachmentFilename == null) { + return null; + } + + for (Map.Entry entry : indexToFilenameMap.entrySet()) { + String embeddedFilename = entry.getValue(); + if (embeddedFilename != null + && (embeddedFilename.equals(attachmentFilename) + || embeddedFilename.contains(attachmentFilename) + || attachmentFilename.contains(embeddedFilename))) { + return embeddedFilename; + } + } + + return null; + } + + private static String normalizeFilename(String filename) { + if (filename == null) return ""; + return filename.toLowerCase() + .trim() + .replaceAll("\\s+", " ") + .replaceAll("[^a-zA-Z0-9._-]", ""); + } + + private static void addAttachmentAnnotationToPageWithMapping( + PDDocument document, + PDPage page, + EmlParser.EmailAttachment attachment, + String embeddedFilename, + float x, + float y, + int attachmentIndex) + throws IOException { + + PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment(); + + PDRectangle rect = calculateAnnotationRectangle(page, x, y); + fileAnnotation.setRectangle(rect); + + fileAnnotation.setPrinted(false); + fileAnnotation.setHidden(false); + fileAnnotation.setNoView(false); + fileAnnotation.setNoZoom(true); + fileAnnotation.setNoRotate(true); + + try { + PDAppearanceDictionary appearance = new PDAppearanceDictionary(); + PDAppearanceStream normalAppearance = new PDAppearanceStream(document); + normalAppearance.setBBox(new PDRectangle(0, 0, rect.getWidth(), rect.getHeight())); + appearance.setNormalAppearance(normalAppearance); + fileAnnotation.setAppearance(appearance); + } catch (RuntimeException e) { + fileAnnotation.setAppearance(null); + } + + PDEmbeddedFilesNameTreeNode efTree = + document.getDocumentCatalog().getNames().getEmbeddedFiles(); + if (efTree != null) { + Map efMap = efTree.getNames(); + if (efMap != null) { + PDComplexFileSpecification fileSpec = efMap.get(embeddedFilename); + if (fileSpec != null) { + fileAnnotation.setFile(fileSpec); + } else { + // Could not find embedded file + } + } + } + + fileAnnotation.setContents( + "Attachment " + (attachmentIndex + 1) + ": " + attachment.getFilename()); + fileAnnotation.setAnnotationName( + "EmbeddedFile_" + attachmentIndex + "_" + embeddedFilename); + + page.getAnnotations().add(fileAnnotation); + } + + private static boolean isAscii(String str) { + if (str == null) return true; + for (int i = 0; i < str.length(); i++) { + if (str.charAt(i) > 127) { + return false; + } + } + return true; + } +}