PDF-To-JPG-Gradio5 / pdf_to_jpg_gradio5_en.py
innoai's picture
Upload 2 files
e3c0b60 verified
raw
history blame
5.35 kB
# pdf_to_jpg_gradio.py
import gradio as gr
import io
from PIL import Image, ImageDraw
from pdf2image import convert_from_bytes
def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_color=(200, 200, 200)):
"""
将 PDF 文件的字节数据(pdf_bytes)转化为一张包含所有页面的长图。
参数:
pdf_bytes: PDF 文件的字节内容
output_size: 输出尺寸,支持 Original / A4 / A3
dpi: 像素密度,默认 96
line_color: 分割线颜色,默认为浅灰色 (200,200,200)
返回:
combined_image: 拼接完成的 PIL Image 对象
"""
# 将 PDF 字节流转换为一系列 PIL Image
images = convert_from_bytes(pdf_bytes)
# PDF 页数
num_pages = len(images)
# 定义可选尺寸(单位:像素),如果用户选择 Original,则以第一页图像的尺寸为准
sizes = {
"Original": images[0].size, # 使用第一张页面的大小作为原始尺寸
"A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
"A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
}
# 获取用户选定的尺寸
output_width, output_height_per_page = sizes[output_size]
# 拼接后整体高度 = 单页高度 * 页数
output_height = output_height_per_page * num_pages
# 创建空白图像并填充白色背景
combined_image = Image.new("RGB", (output_width, output_height), "white")
# 依次将每一页的图像粘贴到空白图像上
y_offset = 0
for img in images:
img_resized = img.resize((output_width, output_height_per_page))
combined_image.paste(img_resized, (0, y_offset))
y_offset += output_height_per_page
# 在每页之间画一条浅灰色的分割线
draw = ImageDraw.Draw(combined_image)
line_position = output_height_per_page
for i in range(num_pages - 1):
draw.line([(0, line_position * (i + 1)), (output_width, line_position * (i + 1))],
fill=line_color, width=2)
return combined_image
def pdf_to_jpg(pdf_file, output_size):
"""
将上传的 PDF 文件转换为长图,并返回 (用于显示的图像, 用于下载的文件)
参数:
pdf_file: Gradio 上传的 PDF 文件对象
output_size: 用户选择的输出尺寸(Original / A4 / A3)
返回:
(display_image, download_file)
display_image: PIL Image 或 None
download_file: (filename, file_bytes) 元组,用于提供文件下载
"""
if pdf_file is None:
return None, None
# 读取 PDF 文件的字节内容
pdf_bytes = pdf_file
# 调用拼接函数
combined_image = convert_pdf_to_combined_image(pdf_bytes, output_size=output_size)
# 转为字节流以便输出
buffered = io.BytesIO()
combined_image.save(buffered, format="JPEG")
# 返回给 Gradio 的第一项是显示的图像,第二项则用于下载
return combined_image, ("converted.jpg", buffered.getvalue())
# ========== Gradio 应用界面部分 ==========
# 下面仅将原先中文界面改为英文,功能和逻辑保持不变
with gr.Blocks(title="PDF to JPG Converter - High Quality PDF Merging",
css=".gradio-container {max-width: 800px; margin: 0 auto;}") as demo:
# 标题
gr.Markdown("<h1 style='text-align: center;'>PDF to JPG Online Converter</h1>")
# 简介及使用说明(英文)
gr.Markdown("""
<p style='text-align: center; font-size: 16px;'>
Welcome to our PDF to JPG Online Converter! This tool supports merging all PDF pages into one single long image for easy viewing and sharing.<br/>
<b>SEO Summary:</b> Supports Original, A4, and A3 page sizes for stitching, fast conversion, high-quality output, and free to use.
</p>
""")
# 使用示例或说明(英文)
gr.Markdown("""
<hr/>
<h3>How to Use:</h3>
<ol>
<li>Upload your PDF file in the "Choose PDF File" section below.</li>
<li>Select your desired output size (Original, A4, or A3) from the dropdown menu.</li>
<li>Click the "Convert" button and wait a few seconds to preview the result.</li>
<li>Click the "Download File" button to save the JPG image to your local device.</li>
</ol>
<hr/>
""")
with gr.Row():
pdf_input = gr.File(label="Choose PDF File", file_types=[".pdf"], type="binary")
size_dropdown = gr.Dropdown(choices=["Original", "A4", "A3"],
value="A4",
label="Choose Output Size")
convert_button = gr.Button("Convert")
# 输出:展示图像 + 提供下载
with gr.Row():
image_output = gr.Image(label="Preview")
download_output = gr.File(label="Download File")
# 当按钮被点击时,调用 pdf_to_jpg 函数,输出图像和可下载文件
convert_button.click(fn=pdf_to_jpg,
inputs=[pdf_input, size_dropdown],
outputs=[image_output, download_output])
# 启动 Gradio
if __name__ == "__main__":
demo.launch(ssr_mode=True, show_api=False, show_error=True)