欢迎光临石四片叶子网
详情描述
PDF手写模拟器

下面我将创建一个PDF手写模拟器,它能将PDF文档转换为具有手写效果的图像或PDF。

import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageFilter
import fitz  # PyMuPDF
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from io import BytesIO
import cv2
from scipy import interpolate
import matplotlib.pyplot as plt
from matplotlib import font_manager
import warnings
warnings.filterwarnings('ignore')

class HandwritingSimulator:
    def __init__(self):
        """初始化手写模拟器"""
        # 设置中文字体路径(根据实际情况修改)
        self.chinese_font_paths = self._find_chinese_fonts()
        self.english_font_paths = self._find_english_fonts()

        # 手写风格参数
        self.style_params = {
            'base_font_size': 24,
            'size_variation': 0.2,  # 字体大小变化范围
            'rotation_variation': 2,  # 旋转角度变化范围
            'color_variation': 10,  # 颜色变化范围
            'line_spacing_variation': 0.1,  # 行间距变化
            'word_spacing_variation': 0.1,  # 字间距变化
            'jitter_intensity': 0.5,  # 抖动强度
            'ink_bleed': 0.3,  # 墨水扩散效果
            'background_texture': True,  # 背景纹理
        }

    def _find_chinese_fonts(self):
        """查找系统中的中文字体"""
        font_paths = []
        # 常见的中文字体路径
        common_paths = [
            '/System/Library/Fonts/PingFang.ttc',  # macOS
            '/System/Library/Fonts/STHeiti Light.ttc',  # macOS
            'C:/Windows/Fonts/simhei.ttf',  # Windows
            'C:/Windows/Fonts/simsun.ttc',  # Windows
            '/usr/share/fonts/truetype/wqy/wqy-microhei.ttc',  # Linux
        ]

        # 尝试查找字体
        for path in common_paths:
            if os.path.exists(path):
                font_paths.append(path)

        # 如果没有找到,使用默认字体
        if not font_paths:
            font_paths.append(None)

        return font_paths

    def _find_english_fonts(self):
        """查找手写风格的英文字体"""
        font_paths = []
        # 常见的手写风格字体
        common_paths = [
            '/System/Library/Fonts/Apple Chancery.ttf',  # macOS
            '/System/Library/Fonts/Comic Sans MS.ttf',  # macOS
            'C:/Windows/Fonts/comic.ttf',  # Windows
            'C:/Windows/Fonts/BRUSHSCI.ttf',  # Windows
            '/usr/share/fonts/truetype/msttcorefonts/Comic_Sans_MS.ttf',  # Linux
        ]

        for path in common_paths:
            if os.path.exists(path):
                font_paths.append(path)

        # 如果没有找到,使用默认字体
        if not font_paths:
            font_paths.append(None)

        return font_paths

    def pdf_to_images(self, pdf_path, dpi=150):
        """将PDF转换为图像列表"""
        images = []
        doc = fitz.open(pdf_path)

        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
            img_data = pix.tobytes("ppm")
            img = Image.open(BytesIO(img_data)).convert("RGB")
            images.append(img)

        doc.close()
        return images

    def apply_handwriting_effect(self, text, is_chinese=False):
        """将文本转换为手写效果图像"""
        # 选择字体
        if is_chinese and self.chinese_font_paths[0]:
            font_path = random.choice(self.chinese_font_paths)
        else:
            font_path = random.choice(self.english_font_paths)

        # 基础字体大小
        base_size = self.style_params['base_font_size']
        size_variation = self.style_params['size_variation']

        try:
            if font_path:
                font = ImageFont.truetype(font_path, base_size)
            else:
                font = ImageFont.load_default()
        except:
            font = ImageFont.load_default()

        # 分割文本为字符
        chars = list(text)
        char_images = []

        # 为每个字符创建独立图像
        for char in chars:
            # 随机变化字体大小
            size = int(base_size * (1 + random.uniform(-size_variation, size_variation)))

            try:
                if font_path:
                    char_font = ImageFont.truetype(font_path, size)
                else:
                    char_font = ImageFont.load_default().font_variant(size=size)
            except:
                char_font = ImageFont.load_default().font_variant(size=size)

            # 估计字符尺寸
            bbox = char_font.getbbox(char)
            char_width = bbox[2] - bbox[0] + 10
            char_height = bbox[3] - bbox[1] + 10

            # 创建字符图像
            char_img = Image.new('RGBA', (char_width, char_height), (255, 255, 255, 0))
            draw = ImageDraw.Draw(char_img)

            # 随机颜色变化(稍微偏蓝或偏灰的黑色)
            color_variation = self.style_params['color_variation']
            r = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
            g = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
            b = max(0, min(255, 0 + random.randint(-color_variation, color_variation//2)))
            color = (r, g, b)

            # 随机旋转
            rotation = random.uniform(-self.style_params['rotation_variation'], 
                                     self.style_params['rotation_variation'])

            # 绘制字符
            draw.text((5, 5), char, fill=color, font=char_font)

            # 应用旋转
            if abs(rotation) > 0.5:
                char_img = char_img.rotate(rotation, expand=True, fillcolor=(255, 255, 255, 0))

            char_images.append(char_img)

        return char_images

    def create_handwritten_line(self, text, is_chinese=False, line_width=800):
        """创建手写风格的一行文本"""
        char_images = self.apply_handwriting_effect(text, is_chinese)

        # 计算总宽度和最大高度
        total_width = sum(img.width for img in char_images)
        max_height = max(img.height for img in char_images) if char_images else 0

        # 添加字符间距变化
        spacing_variation = self.style_params['word_spacing_variation']
        char_spacings = []

        for i in range(len(char_images)):
            if i == 0:
                char_spacings.append(0)
            else:
                base_spacing = char_images[i-1].width // 4
                variation = int(base_spacing * random.uniform(-spacing_variation, spacing_variation))
                char_spacings.append(base_spacing + variation)

        total_width += sum(char_spacings)

        # 创建行图像
        line_img = Image.new('RGBA', (total_width + 20, max_height + 20), (255, 255, 255, 0))
        x_offset = 10

        # 放置字符
        for i, (char_img, spacing) in enumerate(zip(char_images, char_spacings)):
            x_offset += spacing
            y_offset = 10 + random.randint(-2, 2)  # 垂直方向轻微抖动
            line_img.paste(char_img, (x_offset, y_offset), char_img)
            x_offset += char_img.width

        # 裁剪图像
        line_img = line_img.crop(line_img.getbbox()) if line_img.getbbox() else line_img

        return line_img

    def add_ink_effects(self, image):
        """添加墨水效果(扩散、抖动)"""
        # 转换为灰度图像进行边缘检测
        if image.mode != 'L':
            gray = image.convert('L')
        else:
            gray = image.copy()

        # 添加轻微模糊模拟墨水扩散
        if self.style_params['ink_bleed'] > 0:
            blur_radius = self.style_params['ink_bleed'] * 2
            blurred = gray.filter(ImageFilter.GaussianBlur(blur_radius))

            # 混合原始和模糊图像
            result = Image.blend(gray, blurred, alpha=0.3)
        else:
            result = gray

        # 转换为RGB并添加颜色变化
        result = result.convert('RGB')

        return result

    def add_paper_texture(self, image):
        """添加纸张纹理"""
        if not self.style_params['background_texture']:
            return image

        # 创建纸张纹理
        width, height = image.size
        texture = Image.new('RGB', (width, height), color=(248, 246, 240))

        # 添加轻微噪声
        noise = np.random.normal(0, 3, (height, width, 3)).astype(np.uint8)
        noise_img = Image.fromarray(noise, 'RGB')

        # 混合纹理和噪声
        texture = Image.blend(texture, noise_img, alpha=0.05)

        # 将纹理与图像混合
        if image.mode == 'RGBA':
            # 分离Alpha通道
            rgb = image.convert('RGB')
            alpha = image.split()[-1]

            # 混合RGB通道与纹理
            blended_rgb = Image.blend(texture, rgb, alpha=0.9)

            # 重新组合
            result = Image.merge('RGBA', (*blended_rgb.split(), alpha))
        else:
            result = Image.blend(texture, image, alpha=0.9)

        return result

    def add_handwriting_artifacts(self, image):
        """添加手写伪影(笔压变化、墨水不足等)"""
        width, height = image.size

        # 转换为numpy数组进行处理
        img_array = np.array(image)

        # 添加轻微的亮度变化模拟笔压
        if len(img_array.shape) == 3:
            pressure_mask = np.random.normal(1.0, 0.05, (height, width, 1))
            img_array = img_array * pressure_mask
            img_array = np.clip(img_array, 0, 255).astype(np.uint8)

        result = Image.fromarray(img_array)

        return result

    def convert_page_to_handwriting(self, image):
        """将单页图像转换为手写效果"""
        # 这里简化处理:在实际应用中,应该使用OCR识别文本并重新渲染
        # 但为了演示,我们将对整个图像应用手写效果滤镜

        # 应用墨水效果
        result = self.add_ink_effects(image)

        # 添加纸张纹理
        result = self.add_paper_texture(result)

        # 添加手写伪影
        result = self.add_handwriting_artifacts(result)

        return result

    def create_sample_handwriting(self, output_path="handwriting_sample.png"):
        """创建手写样本"""
        # 示例文本
        chinese_text = "基于Python的PDF手写模拟器,可以生成自然的手写效果。"
        english_text = "This is a Python-based PDF handwriting simulator that creates natural-looking handwritten documents."

        # 创建手写行
        chinese_line = self.create_handwritten_line(chinese_text, is_chinese=True)
        english_line = self.create_handwritten_line(english_text, is_chinese=False)

        # 计算总尺寸
        line_spacing = int(self.style_params['base_font_size'] * 1.5)
        total_height = chinese_line.height + english_line.height + line_spacing * 3
        max_width = max(chinese_line.width, english_line.width)

        # 创建最终图像
        result = Image.new('RGB', (max_width + 40, total_height), color=(248, 246, 240))

        # 添加标题
        draw = ImageDraw.Draw(result)
        try:
            if self.chinese_font_paths[0]:
                title_font = ImageFont.truetype(self.chinese_font_paths[0], 28)
            else:
                title_font = ImageFont.load_default()
        except:
            title_font = ImageFont.load_default()

        draw.text((20, 20), "手写效果示例", fill=(50, 50, 50), font=title_font)

        # 放置中文行
        chinese_y = 80
        result.paste(chinese_line, (20, chinese_y), chinese_line)

        # 放置英文行
        english_y = chinese_y + chinese_line.height + line_spacing
        result.paste(english_line, (20, english_y), english_line)

        # 添加纸张纹理
        result = self.add_paper_texture(result)

        # 保存结果
        result.save(output_path, "PNG", quality=95)
        print(f"手写样本已保存到: {output_path}")

        return result

    def process_pdf(self, pdf_path, output_path="handwritten_output.pdf"):
        """处理整个PDF文件"""
        print(f"正在处理PDF文件: {pdf_path}")

        # 将PDF转换为图像
        print("将PDF转换为图像...")
        images = self.pdf_to_images(pdf_path)

        # 处理每一页
        print(f"处理 {len(images)} 页...")
        processed_images = []

        for i, img in enumerate(images):
            print(f"处理第 {i+1}/{len(images)} 页...")
            processed_img = self.convert_page_to_handwriting(img)
            processed_images.append(processed_img)

        # 保存为PDF
        print("生成PDF文件...")
        self.images_to_pdf(processed_images, output_path)

        print(f"处理完成! 输出文件: {output_path}")

        return processed_images

    def images_to_pdf(self, images, output_path):
        """将图像列表保存为PDF"""
        if not images:
            return

        # 使用ReportLab创建PDF
        c = canvas.Canvas(output_path, pagesize=letter)
        width, height = letter

        for i, img in enumerate(images):
            # 将PIL图像转换为ReportLab兼容格式
            img_buffer = BytesIO()
            img.save(img_buffer, format="PNG")
            img_buffer.seek(0)

            # 添加图像到PDF页面
            c.drawImage(img_buffer, 0, 0, width, height, preserveAspectRatio=True)

            # 如果不是最后一页,添加新页面
            if i < len(images) - 1:
                c.showPage()

        c.save()

    def interactive_demo(self):
        """交互式演示"""
        print("=" * 50)
        print("PDF手写模拟器 - 交互式演示")
        print("=" * 50)

        while True:
            print("\n请选择操作:")
            print("1. 创建手写样本")
            print("2. 处理PDF文件")
            print("3. 调整手写参数")
            print("4. 退出")

            choice = input("\n请输入选项 (1-4): ").strip()

            if choice == "1":
                output_file = input("输入输出文件名 (默认: handwriting_sample.png): ").strip()
                if not output_file:
                    output_file = "handwriting_sample.png"
                self.create_sample_handwriting(output_file)

            elif choice == "2":
                pdf_file = input("输入PDF文件路径: ").strip()
                if not os.path.exists(pdf_file):
                    print("文件不存在!")
                    continue

                output_file = input("输入输出PDF文件名 (默认: handwritten_output.pdf): ").strip()
                if not output_file:
                    output_file = "handwritten_output.pdf"

                self.process_pdf(pdf_file, output_file)

            elif choice == "3":
                self.adjust_parameters()

            elif choice == "4":
                print("感谢使用PDF手写模拟器!")
                break
            else:
                print("无效选项,请重新选择")

    def adjust_parameters(self):
        """调整手写效果参数"""
        print("\n当前参数:")
        for key, value in self.style_params.items():
            print(f"  {key}: {value}")

        print("\n可以调整的参数:")
        print("  1. 字体大小变化 (size_variation)")
        print("  2. 旋转角度变化 (rotation_variation)")
        print("  3. 颜色变化 (color_variation)")
        print("  4. 抖动强度 (jitter_intensity)")
        print("  5. 墨水扩散 (ink_bleed)")
        print("  6. 背景纹理 (background_texture)")

        param_choice = input("\n选择要调整的参数 (1-6): ").strip()

        param_map = {
            "1": "size_variation",
            "2": "rotation_variation",
            "3": "color_variation",
            "4": "jitter_intensity",
            "5": "ink_bleed",
            "6": "background_texture"
        }

        if param_choice in param_map:
            param_name = param_map[param_choice]
            current_value = self.style_params[param_name]

            if param_name == "background_texture":
                new_value = not current_value
                self.style_params[param_name] = new_value
                print(f"{param_name} 已设置为: {new_value}")
            else:
                try:
                    new_value = float(input(f"输入新值 (当前: {current_value}): "))
                    self.style_params[param_name] = new_value
                    print(f"{param_name} 已设置为: {new_value}")
                except ValueError:
                    print("无效输入,保持原值")
        else:
            print("无效选项")

# 使用示例
if __name__ == "__main__":
    # 创建手写模拟器实例
    simulator = HandwritingSimulator()

    # 创建手写样本
    print("正在创建手写样本...")
    simulator.create_sample_handwriting()

    # 启动交互式演示
    simulator.interactive_demo()

使用说明

安装依赖

首先需要安装必要的Python库:

pip install PyMuPDF pillow reportlab opencv-python matplotlib numpy scipy

主要功能

手写效果模拟

  • 字体大小随机变化
  • 字符旋转角度变化
  • 墨水颜色微妙变化
  • 字符间距和行间距变化
  • 垂直方向轻微抖动

纸张效果

  • 纸张纹理模拟
  • 轻微噪声添加
  • 自然纸张颜色

墨水效果

  • 墨水扩散效果
  • 笔压变化模拟
  • 墨水不足效果

PDF处理

  • PDF转图像
  • 批量处理多页PDF
  • 输出为手写风格的PDF

使用方法

直接运行演示

python pdf_handwriting_simulator.py

处理PDF文件

simulator = HandwritingSimulator()
simulator.process_pdf("input.pdf", "handwritten_output.pdf")

创建手写样本

simulator.create_sample_handwriting("my_handwriting.png")

注意事项

字体路径需要根据系统进行调整 对于中文字体,可能需要安装额外的中文字体文件 处理大文件可能需要较长时间和较多内存 实际应用中可能需要集成OCR功能来识别文本并重新渲染

这个模拟器提供了基础的手写效果,可以根据需要进一步调整参数以获得更自然的效果。