arunavabasucom's picture
feat: adding all dependecies
fba6da1 unverified
import cv2
import re
import os
import glob
import sys
from fpdf import FPDF
from pdf2image import convert_from_path
from PIL import Image as PILImage
class Converter:
def __init__(self):
self.a4_w_mm = 210
self.a4_h_mm = 297
self.dpi = 200.0
def invert_image(self, i_input, i_output):
"""Inverts a given image."""
image = cv2.imread(i_input)
print("Inverting image: {}".format(i_input))
if image is None:
print("Error reading image: {}".format(i_input))
return
image = ~image
cv2.imwrite(i_output, image)
def pdf_to_img_all(self, file_path, o_dir):
"""Converts all PDF pages to JPEG images."""
if not os.path.exists(o_dir):
os.makedirs(o_dir)
pages = convert_from_path(file_path, dpi=self.dpi)
for i, image in enumerate(pages):
output_path = os.path.join(o_dir, f"{i+1}.jpeg")
image.save(output_path, 'JPEG', quality=95)
print("Saved image: {}".format(output_path))
def get_scaled_dimensions(self, width_pixels, height_pixels):
"""Calculate scaled dimensions maintaining aspect ratio."""
width_ratio = width_pixels / height_pixels
if width_ratio > 1: # Landscape
w = self.a4_h_mm
h = self.a4_h_mm / width_ratio
return 'L', w, h
else: # Portrait
h = self.a4_h_mm
w = self.a4_h_mm * width_ratio
return 'P', w, h
def img_to_pdf(self, i_dir, o_dir, filename, invert_pages):
"""Combines images into PDF, inverting specified pages."""
pdf = FPDF(unit="mm", format='A4')
pdf.set_auto_page_break(auto=False, margin=0)
pdf.set_margins(0, 0, 0)
# Collect and sort image files
filepaths = []
for filepath in glob.iglob(os.path.join(i_dir, '*.jpeg')):
filepaths.append(filepath)
pages = []
for path in filepaths:
base = os.path.basename(path)
match = re.search(r'(\d+)\.jpeg', base)
if match:
pages.append((int(match.group(1)), path))
pages.sort(key=lambda x: x[0])
# Process each page
for page_num, img_path in pages:
if page_num in invert_pages:
self.invert_image(img_path, img_path)
else:
print("Keeping original image: {}".format(img_path))
with PILImage.open(img_path) as img:
width_pixels, height_pixels = img.size
# Get scaled dimensions and orientation
orientation, w, h = self.get_scaled_dimensions(width_pixels, height_pixels)
# Add page with proper orientation
pdf.add_page(orientation=orientation)
# Center image on page
x = (self.a4_w_mm if orientation == 'P' else self.a4_h_mm - w) / 2
y = (self.a4_h_mm if orientation == 'P' else self.a4_w_mm - h) / 2
# Place image
pdf.image(img_path, x=x, y=y, w=w, h=h)
print(f"Added {img_path} to PDF ({orientation})")
# Save output
if not os.path.exists(o_dir):
os.makedirs(o_dir)
output_pdf = os.path.join(o_dir, filename)
pdf.output(output_pdf, "F")
print("Generated PDF: {}".format(output_pdf))
def parse_page_ranges(range_str):
"""Converts range string to list of page numbers."""
pages = set()
parts = range_str.split(',')
for part in parts:
part = part.strip()
if '-' in part:
start, end = part.split('-')
pages.update(range(int(start), int(end) + 1))
else:
pages.add(int(part))
return sorted(pages)
if __name__ == "__main__":
converter = Converter()
pdf_file = 'input.pdf'
img_dir = 'images'
output_dir = 'output'
output_pdf_name = 'result.pdf'
page_range_str = "1-12,14-20,22-32,56,66-78,82-97"
pages_to_invert = parse_page_ranges(page_range_str)
converter.pdf_to_img_all(pdf_file, img_dir)
converter.img_to_pdf(img_dir, output_dir, output_pdf_name, pages_to_invert)