2023-05-22 20:32:50 +01:00
import argparse
2023-05-01 21:57:48 +01:00
import sys
import cv2
import numpy as np
import os
def find_photo_boundaries ( image , background_color , tolerance = 30 , min_area = 10000 , min_contour_area = 500 ) :
mask = cv2 . inRange ( image , background_color - tolerance , background_color + tolerance )
mask = cv2 . bitwise_not ( mask )
kernel = np . ones ( ( 5 , 5 ) , np . uint8 )
mask = cv2 . dilate ( mask , kernel , iterations = 2 )
contours , _ = cv2 . findContours ( mask , cv2 . RETR_EXTERNAL , cv2 . CHAIN_APPROX_SIMPLE )
photo_boundaries = [ ]
for contour in contours :
x , y , w , h = cv2 . boundingRect ( contour )
area = w * h
contour_area = cv2 . contourArea ( contour )
if area > = min_area and contour_area > = min_contour_area :
photo_boundaries . append ( ( x , y , w , h ) )
return photo_boundaries
def estimate_background_color ( image , sample_points = 5 ) :
h , w , _ = image . shape
points = [
( 0 , 0 ) ,
( w - 1 , 0 ) ,
( w - 1 , h - 1 ) ,
( 0 , h - 1 ) ,
( w / / 2 , h / / 2 ) ,
]
colors = [ ]
for x , y in points :
colors . append ( image [ y , x ] )
return np . median ( colors , axis = 0 )
2023-05-22 20:32:50 +01:00
def auto_rotate ( image , angle_threshold = 1 ) :
2023-05-01 21:57:48 +01:00
gray = cv2 . cvtColor ( image , cv2 . COLOR_BGR2GRAY )
2023-05-22 20:32:50 +01:00
edges = cv2 . Canny ( gray , 50 , 150 , apertureSize = 3 )
lines = cv2 . HoughLines ( edges , 1 , np . pi / 180 , 200 )
2023-05-01 21:57:48 +01:00
2023-05-22 20:32:50 +01:00
if lines is None :
2023-05-01 21:57:48 +01:00
return image
2023-05-22 20:32:50 +01:00
# compute the median angle of the lines
angles = [ ]
for rho , theta in lines [ : , 0 ] :
angles . append ( ( theta * 180 ) / np . pi - 90 )
2024-02-11 11:47:00 -05:00
2023-05-22 20:32:50 +01:00
angle = np . median ( angles )
2024-02-11 11:47:00 -05:00
2023-05-01 21:57:48 +01:00
if abs ( angle ) < angle_threshold :
return image
( h , w ) = image . shape [ : 2 ]
center = ( w / / 2 , h / / 2 )
M = cv2 . getRotationMatrix2D ( center , angle , 1.0 )
return cv2 . warpAffine ( image , M , ( w , h ) , flags = cv2 . INTER_CUBIC , borderMode = cv2 . BORDER_REPLICATE )
2023-05-22 20:32:50 +01:00
2023-05-01 21:57:48 +01:00
def crop_borders ( image , border_color , tolerance = 30 ) :
mask = cv2 . inRange ( image , border_color - tolerance , border_color + tolerance )
2024-02-11 11:47:00 -05:00
2023-05-01 21:57:48 +01:00
contours , _ = cv2 . findContours ( mask , cv2 . RETR_EXTERNAL , cv2 . CHAIN_APPROX_SIMPLE )
if len ( contours ) == 0 :
return image
largest_contour = max ( contours , key = cv2 . contourArea )
x , y , w , h = cv2 . boundingRect ( largest_contour )
2024-02-11 11:47:00 -05:00
2023-05-01 21:57:48 +01:00
return image [ y : y + h , x : x + w ]
2024-02-11 11:47:00 -05:00
2023-05-01 21:57:48 +01:00
def split_photos ( input_file , output_directory , tolerance = 30 , min_area = 10000 , min_contour_area = 500 , angle_threshold = 10 , border_size = 0 ) :
image = cv2 . imread ( input_file )
background_color = estimate_background_color ( image )
# Add a constant border around the image
image = cv2 . copyMakeBorder ( image , border_size , border_size , border_size , border_size , cv2 . BORDER_CONSTANT , value = background_color )
photo_boundaries = find_photo_boundaries ( image , background_color , tolerance )
if not os . path . exists ( output_directory ) :
os . makedirs ( output_directory )
# Get the input file's base name without the extension
input_file_basename = os . path . splitext ( os . path . basename ( input_file ) ) [ 0 ]
for idx , ( x , y , w , h ) in enumerate ( photo_boundaries ) :
cropped_image = image [ y : y + h , x : x + w ]
cropped_image = auto_rotate ( cropped_image , angle_threshold )
exception handling and exception improvements (#3858)
# Description of Changes
This pull request introduces several improvements to enhance error
handling, internationalization, and documentation in the codebase. The
key changes include the addition of `ExceptionUtils` and `I18nUtils`
utility classes for consistent exception handling and internationalized
messages, updates to documentation paths, and modifications to existing
methods to integrate the new utilities.
### Error Handling Enhancements:
* **Added `ExceptionUtils` utility class**: Provides standardized
methods for creating and handling exceptions with internationalized
error messages, including specific handling for PDF corruption,
encryption issues, and other file-related errors.
* **Integrated `ExceptionUtils` into `CustomPDFDocumentFactory`**:
Updated `loadFromFile` and `loadFromBytes` methods to log and handle
exceptions using `ExceptionUtils`, ensuring consistent error handling
across PDF operations.
[[1]](diffhunk://#diff-10208c1fc2e04631a8cf2a2a99b2a1160e532e75a7b840ad752f3b0130b89851R358-R363)
[[2]](diffhunk://#diff-10208c1fc2e04631a8cf2a2a99b2a1160e532e75a7b840ad752f3b0130b89851R375-R381)
* **Updated `FileToPdf` to use `ExceptionUtils`**: Replaced direct
exception throwing with `ExceptionUtils.createHtmlFileRequiredException`
for unsupported file formats.
### Internationalization Improvements:
* **Added `I18nUtils` utility class**: Centralized access to Spring's
`MessageSource` for retrieving localized messages, enabling consistent
internationalization across the application.
### Documentation Updates:
* **Updated documentation paths in `CONTRIBUTING.md` and `README.md`**:
Changed paths to reference the new `devGuide` folder for developer
documentation and translation guides.
[[1]](diffhunk://#diff-eca12c0a30e25b4b46522ebf89465a03ba72a03f540796c979137931d8f92055L28-R28)
[[2]](diffhunk://#diff-eca12c0a30e25b4b46522ebf89465a03ba72a03f540796c979137931d8f92055L40-R51)
[[3]](diffhunk://#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5L171-L174)
---
## Checklist
### General
- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
---------
Co-authored-by: a <a>
2025-07-02 16:51:45 +01:00
# Remove the added border, but ensure we don't create an empty image
if border_size > 0 and cropped_image . shape [ 0 ] > 2 * border_size and cropped_image . shape [ 1 ] > 2 * border_size :
cropped_image = cropped_image [ border_size : - border_size , border_size : - border_size ]
# Check if the cropped image is valid before saving
if cropped_image . size == 0 or cropped_image . shape [ 0 ] == 0 or cropped_image . shape [ 1 ] == 0 :
print ( f " Warning: Skipping empty image for region { idx + 1 } " )
continue
2023-05-01 21:57:48 +01:00
output_path = os . path . join ( output_directory , f " { input_file_basename } _ { idx + 1 } .png " )
cv2 . imwrite ( output_path , cropped_image )
print ( f " Saved { output_path } " )
if __name__ == " __main__ " :
2023-05-22 20:32:50 +01:00
parser = argparse . ArgumentParser ( description = " Split photos in an image " )
parser . add_argument ( " input_file " , help = " The input scanned image containing multiple photos. " )
parser . add_argument ( " output_directory " , help = " The directory where the result images should be placed. " )
parser . add_argument ( " --tolerance " , type = int , default = 30 , help = " Determines the range of color variation around the estimated background color (default: 30). " )
parser . add_argument ( " --min_area " , type = int , default = 10000 , help = " Sets the minimum area threshold for a photo (default: 10000). " )
parser . add_argument ( " --min_contour_area " , type = int , default = 500 , help = " Sets the minimum contour area threshold for a photo (default: 500). " )
parser . add_argument ( " --angle_threshold " , type = int , default = 10 , help = " Sets the minimum absolute angle required for the image to be rotated (default: 10). " )
parser . add_argument ( " --border_size " , type = int , default = 0 , help = " Sets the size of the border added and removed to prevent white borders in the output (default: 0). " )
2024-02-11 11:47:00 -05:00
2023-05-22 20:32:50 +01:00
args = parser . parse_args ( )
split_photos ( args . input_file , args . output_directory , tolerance = args . tolerance , min_area = args . min_area , min_contour_area = args . min_contour_area , angle_threshold = args . angle_threshold , border_size = args . border_size )