基于Python打造一个PDF手写模拟器

2026-04-10 16:46:01发布 0次浏览

详情描述

PDF手写模拟器

下面我将创建一个PDF手写模拟器，它能将PDF文档转换为具有手写效果的图像或PDF。

import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageFilter
import fitz  # PyMuPDF
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from io import BytesIO
import cv2
from scipy import interpolate
import matplotlib.pyplot as plt
from matplotlib import font_manager
import warnings
warnings.filterwarnings('ignore')

class HandwritingSimulator:
    def __init__(self):
        """初始化手写模拟器"""
        # 设置中文字体路径（根据实际情况修改）
        self.chinese_font_paths = self._find_chinese_fonts()
        self.english_font_paths = self._find_english_fonts()

        # 手写风格参数
        self.style_params = {
            'base_font_size': 24,
            'size_variation': 0.2,  # 字体大小变化范围
            'rotation_variation': 2,  # 旋转角度变化范围
            'color_variation': 10,  # 颜色变化范围
            'line_spacing_variation': 0.1,  # 行间距变化
            'word_spacing_variation': 0.1,  # 字间距变化
            'jitter_intensity': 0.5,  # 抖动强度
            'ink_bleed': 0.3,  # 墨水扩散效果
            'background_texture': True,  # 背景纹理
        }

    def _find_chinese_fonts(self):
        """查找系统中的中文字体"""
        font_paths = []
        # 常见的中文字体路径
        common_paths = [
            '/System/Library/Fonts/PingFang.ttc',  # macOS
            '/System/Library/Fonts/STHeiti Light.ttc',  # macOS
            'C:/Windows/Fonts/simhei.ttf',  # Windows
            'C:/Windows/Fonts/simsun.ttc',  # Windows
            '/usr/share/fonts/truetype/wqy/wqy-microhei.ttc',  # Linux
        ]

        # 尝试查找字体
        for path in common_paths:
            if os.path.exists(path):
                font_paths.append(path)

        # 如果没有找到，使用默认字体
        if not font_paths:
            font_paths.append(None)

        return font_paths

    def _find_english_fonts(self):
        """查找手写风格的英文字体"""
        font_paths = []
        # 常见的手写风格字体
        common_paths = [
            '/System/Library/Fonts/Apple Chancery.ttf',  # macOS
            '/System/Library/Fonts/Comic Sans MS.ttf',  # macOS
            'C:/Windows/Fonts/comic.ttf',  # Windows
            'C:/Windows/Fonts/BRUSHSCI.ttf',  # Windows
            '/usr/share/fonts/truetype/msttcorefonts/Comic_Sans_MS.ttf',  # Linux
        ]

        for path in common_paths:
            if os.path.exists(path):
                font_paths.append(path)

        # 如果没有找到，使用默认字体
        if not font_paths:
            font_paths.append(None)

        return font_paths

    def pdf_to_images(self, pdf_path, dpi=150):
        """将PDF转换为图像列表"""
        images = []
        doc = fitz.open(pdf_path)

        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
            img_data = pix.tobytes("ppm")
            img = Image.open(BytesIO(img_data)).convert("RGB")
            images.append(img)

        doc.close()
        return images

    def apply_handwriting_effect(self, text, is_chinese=False):
        """将文本转换为手写效果图像"""
        # 选择字体
        if is_chinese and self.chinese_font_paths[0]:
            font_path = random.choice(self.chinese_font_paths)
        else:
            font_path = random.choice(self.english_font_paths)

        # 基础字体大小
        base_size = self.style_params['base_font_size']
        size_variation = self.style_params['size_variation']

        try:
            if font_path:
                font = ImageFont.truetype(font_path, base_size)
            else:
                font = ImageFont.load_default()
        except:
            font = ImageFont.load_default()

        # 分割文本为字符
        chars = list(text)
        char_images = []

        # 为每个字符创建独立图像
        for char in chars:
            # 随机变化字体大小
            size = int(base_size * (1 + random.uniform(-size_variation, size_variation)))

            try:
                if font_path:
                    char_font = ImageFont.truetype(font_path, size)
                else:
                    char_font = ImageFont.load_default().font_variant(size=size)
            except:
                char_font = ImageFont.load_default().font_variant(size=size)

            # 估计字符尺寸
            bbox = char_font.getbbox(char)
            char_width = bbox[2] - bbox[0] + 10
            char_height = bbox[3] - bbox[1] + 10

            # 创建字符图像
            char_img = Image.new('RGBA', (char_width, char_height), (255, 255, 255, 0))
            draw = ImageDraw.Draw(char_img)

            # 随机颜色变化（稍微偏蓝或偏灰的黑色）
            color_variation = self.style_params['color_variation']
            r = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
            g = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
            b = max(0, min(255, 0 + random.randint(-color_variation, color_variation//2)))
            color = (r, g, b)

            # 随机旋转
            rotation = random.uniform(-self.style_params['rotation_variation'], 
                                     self.style_params['rotation_variation'])

            # 绘制字符
            draw.text((5, 5), char, fill=color, font=char_font)

            # 应用旋转
            if abs(rotation) > 0.5:
                char_img = char_img.rotate(rotation, expand=True, fillcolor=(255, 255, 255, 0))

            char_images.append(char_img)

        return char_images

    def create_handwritten_line(self, text, is_chinese=False, line_width=800):
        """创建手写风格的一行文本"""
        char_images = self.apply_handwriting_effect(text, is_chinese)

        # 计算总宽度和最大高度
        total_width = sum(img.width for img in char_images)
        max_height = max(img.height for img in char_images) if char_images else 0

        # 添加字符间距变化
        spacing_variation = self.style_params['word_spacing_variation']
        char_spacings = []

        for i in range(len(char_images)):
            if i == 0:
                char_spacings.append(0)
            else:
                base_spacing = char_images[i-1].width // 4
                variation = int(base_spacing * random.uniform(-spacing_variation, spacing_variation))
                char_spacings.append(base_spacing + variation)

        total_width += sum(char_spacings)

        # 创建行图像
        line_img = Image.new('RGBA', (total_width + 20, max_height + 20), (255, 255, 255, 0))
        x_offset = 10

        # 放置字符
        for i, (char_img, spacing) in enumerate(zip(char_images, char_spacings)):
            x_offset += spacing
            y_offset = 10 + random.randint(-2, 2)  # 垂直方向轻微抖动
            line_img.paste(char_img, (x_offset, y_offset), char_img)
            x_offset += char_img.width

        # 裁剪图像
        line_img = line_img.crop(line_img.getbbox()) if line_img.getbbox() else line_img

        return line_img

    def add_ink_effects(self, image):
        """添加墨水效果（扩散、抖动）"""
        # 转换为灰度图像进行边缘检测
        if image.mode != 'L':
            gray = image.convert('L')
        else:
            gray = image.copy()

        # 添加轻微模糊模拟墨水扩散
        if self.style_params['ink_bleed'] > 0:
            blur_radius = self.style_params['ink_bleed'] * 2
            blurred = gray.filter(ImageFilter.GaussianBlur(blur_radius))

            # 混合原始和模糊图像
            result = Image.blend(gray, blurred, alpha=0.3)
        else:
            result = gray

        # 转换为RGB并添加颜色变化
        result = result.convert('RGB')

        return result

    def add_paper_texture(self, image):
        """添加纸张纹理"""
        if not self.style_params['background_texture']:
            return image

        # 创建纸张纹理
        width, height = image.size
        texture = Image.new('RGB', (width, height), color=(248, 246, 240))

        # 添加轻微噪声
        noise = np.random.normal(0, 3, (height, width, 3)).astype(np.uint8)
        noise_img = Image.fromarray(noise, 'RGB')

        # 混合纹理和噪声
        texture = Image.blend(texture, noise_img, alpha=0.05)

        # 将纹理与图像混合
        if image.mode == 'RGBA':
            # 分离Alpha通道
            rgb = image.convert('RGB')
            alpha = image.split()[-1]

            # 混合RGB通道与纹理
            blended_rgb = Image.blend(texture, rgb, alpha=0.9)

            # 重新组合
            result = Image.merge('RGBA', (*blended_rgb.split(), alpha))
        else:
            result = Image.blend(texture, image, alpha=0.9)

        return result

    def add_handwriting_artifacts(self, image):
        """添加手写伪影（笔压变化、墨水不足等）"""
        width, height = image.size

        # 转换为numpy数组进行处理
        img_array = np.array(image)

        # 添加轻微的亮度变化模拟笔压
        if len(img_array.shape) == 3:
            pressure_mask = np.random.normal(1.0, 0.05, (height, width, 1))
            img_array = img_array * pressure_mask
            img_array = np.clip(img_array, 0, 255).astype(np.uint8)

        result = Image.fromarray(img_array)

        return result

    def convert_page_to_handwriting(self, image):
        """将单页图像转换为手写效果"""
        # 这里简化处理：在实际应用中，应该使用OCR识别文本并重新渲染
        # 但为了演示，我们将对整个图像应用手写效果滤镜

        # 应用墨水效果
        result = self.add_ink_effects(image)

        # 添加纸张纹理
        result = self.add_paper_texture(result)

        # 添加手写伪影
        result = self.add_handwriting_artifacts(result)

        return result

    def create_sample_handwriting(self, output_path="handwriting_sample.png"):
        """创建手写样本"""
        # 示例文本
        chinese_text = "基于Python的PDF手写模拟器，可以生成自然的手写效果。"
        english_text = "This is a Python-based PDF handwriting simulator that creates natural-looking handwritten documents."

        # 创建手写行
        chinese_line = self.create_handwritten_line(chinese_text, is_chinese=True)
        english_line = self.create_handwritten_line(english_text, is_chinese=False)

        # 计算总尺寸
        line_spacing = int(self.style_params['base_font_size'] * 1.5)
        total_height = chinese_line.height + english_line.height + line_spacing * 3
        max_width = max(chinese_line.width, english_line.width)

        # 创建最终图像
        result = Image.new('RGB', (max_width + 40, total_height), color=(248, 246, 240))

        # 添加标题
        draw = ImageDraw.Draw(result)
        try:
            if self.chinese_font_paths[0]:
                title_font = ImageFont.truetype(self.chinese_font_paths[0], 28)
            else:
                title_font = ImageFont.load_default()
        except:
            title_font = ImageFont.load_default()

        draw.text((20, 20), "手写效果示例", fill=(50, 50, 50), font=title_font)

        # 放置中文行
        chinese_y = 80
        result.paste(chinese_line, (20, chinese_y), chinese_line)

        # 放置英文行
        english_y = chinese_y + chinese_line.height + line_spacing
        result.paste(english_line, (20, english_y), english_line)

        # 添加纸张纹理
        result = self.add_paper_texture(result)

        # 保存结果
        result.save(output_path, "PNG", quality=95)
        print(f"手写样本已保存到: {output_path}")

        return result

    def process_pdf(self, pdf_path, output_path="handwritten_output.pdf"):
        """处理整个PDF文件"""
        print(f"正在处理PDF文件: {pdf_path}")

        # 将PDF转换为图像
        print("将PDF转换为图像...")
        images = self.pdf_to_images(pdf_path)

        # 处理每一页
        print(f"处理 {len(images)} 页...")
        processed_images = []

        for i, img in enumerate(images):
            print(f"处理第 {i+1}/{len(images)} 页...")
            processed_img = self.convert_page_to_handwriting(img)
            processed_images.append(processed_img)

        # 保存为PDF
        print("生成PDF文件...")
        self.images_to_pdf(processed_images, output_path)

        print(f"处理完成! 输出文件: {output_path}")

        return processed_images

    def images_to_pdf(self, images, output_path):
        """将图像列表保存为PDF"""
        if not images:
            return

        # 使用ReportLab创建PDF
        c = canvas.Canvas(output_path, pagesize=letter)
        width, height = letter

        for i, img in enumerate(images):
            # 将PIL图像转换为ReportLab兼容格式
            img_buffer = BytesIO()
            img.save(img_buffer, format="PNG")
            img_buffer.seek(0)

            # 添加图像到PDF页面
            c.drawImage(img_buffer, 0, 0, width, height, preserveAspectRatio=True)

            # 如果不是最后一页，添加新页面
            if i < len(images) - 1:
                c.showPage()

        c.save()

    def interactive_demo(self):
        """交互式演示"""
        print("=" * 50)
        print("PDF手写模拟器 - 交互式演示")
        print("=" * 50)

        while True:
            print("\n请选择操作:")
            print("1. 创建手写样本")
            print("2. 处理PDF文件")
            print("3. 调整手写参数")
            print("4. 退出")

            choice = input("\n请输入选项 (1-4): ").strip()

            if choice == "1":
                output_file = input("输入输出文件名 (默认: handwriting_sample.png): ").strip()
                if not output_file:
                    output_file = "handwriting_sample.png"
                self.create_sample_handwriting(output_file)

            elif choice == "2":
                pdf_file = input("输入PDF文件路径: ").strip()
                if not os.path.exists(pdf_file):
                    print("文件不存在!")
                    continue

                output_file = input("输入输出PDF文件名 (默认: handwritten_output.pdf): ").strip()
                if not output_file:
                    output_file = "handwritten_output.pdf"

                self.process_pdf(pdf_file, output_file)

            elif choice == "3":
                self.adjust_parameters()

            elif choice == "4":
                print("感谢使用PDF手写模拟器!")
                break
            else:
                print("无效选项，请重新选择")

    def adjust_parameters(self):
        """调整手写效果参数"""
        print("\n当前参数:")
        for key, value in self.style_params.items():
            print(f"  {key}: {value}")

        print("\n可以调整的参数:")
        print("  1. 字体大小变化 (size_variation)")
        print("  2. 旋转角度变化 (rotation_variation)")
        print("  3. 颜色变化 (color_variation)")
        print("  4. 抖动强度 (jitter_intensity)")
        print("  5. 墨水扩散 (ink_bleed)")
        print("  6. 背景纹理 (background_texture)")

        param_choice = input("\n选择要调整的参数 (1-6): ").strip()

        param_map = {
            "1": "size_variation",
            "2": "rotation_variation",
            "3": "color_variation",
            "4": "jitter_intensity",
            "5": "ink_bleed",
            "6": "background_texture"
        }

        if param_choice in param_map:
            param_name = param_map[param_choice]
            current_value = self.style_params[param_name]

            if param_name == "background_texture":
                new_value = not current_value
                self.style_params[param_name] = new_value
                print(f"{param_name} 已设置为: {new_value}")
            else:
                try:
                    new_value = float(input(f"输入新值 (当前: {current_value}): "))
                    self.style_params[param_name] = new_value
                    print(f"{param_name} 已设置为: {new_value}")
                except ValueError:
                    print("无效输入，保持原值")
        else:
            print("无效选项")

# 使用示例
if __name__ == "__main__":
    # 创建手写模拟器实例
    simulator = HandwritingSimulator()

    # 创建手写样本
    print("正在创建手写样本...")
    simulator.create_sample_handwriting()

    # 启动交互式演示
    simulator.interactive_demo()

使用说明

安装依赖

首先需要安装必要的Python库：

pip install PyMuPDF pillow reportlab opencv-python matplotlib numpy scipy

主要功能

手写效果模拟：

字体大小随机变化
字符旋转角度变化
墨水颜色微妙变化
字符间距和行间距变化
垂直方向轻微抖动

纸张效果：

纸张纹理模拟
轻微噪声添加
自然纸张颜色

墨水效果：

墨水扩散效果
笔压变化模拟
墨水不足效果

PDF处理：

PDF转图像
批量处理多页PDF
输出为手写风格的PDF

使用方法

直接运行演示：

python pdf_handwriting_simulator.py

处理PDF文件：

simulator = HandwritingSimulator()
simulator.process_pdf("input.pdf", "handwritten_output.pdf")

创建手写样本：

simulator.create_sample_handwriting("my_handwriting.png")

注意事项

字体路径需要根据系统进行调整对于中文字体，可能需要安装额外的中文字体文件处理大文件可能需要较长时间和较多内存实际应用中可能需要集成OCR功能来识别文本并重新渲染

这个模拟器提供了基础的手写效果，可以根据需要进一步调整参数以获得更自然的效果。