下面我将创建一个PDF手写模拟器,它能将PDF文档转换为具有手写效果的图像或PDF。
import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageFilter
import fitz # PyMuPDF
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from io import BytesIO
import cv2
from scipy import interpolate
import matplotlib.pyplot as plt
from matplotlib import font_manager
import warnings
warnings.filterwarnings('ignore')
class HandwritingSimulator:
def __init__(self):
"""初始化手写模拟器"""
# 设置中文字体路径(根据实际情况修改)
self.chinese_font_paths = self._find_chinese_fonts()
self.english_font_paths = self._find_english_fonts()
# 手写风格参数
self.style_params = {
'base_font_size': 24,
'size_variation': 0.2, # 字体大小变化范围
'rotation_variation': 2, # 旋转角度变化范围
'color_variation': 10, # 颜色变化范围
'line_spacing_variation': 0.1, # 行间距变化
'word_spacing_variation': 0.1, # 字间距变化
'jitter_intensity': 0.5, # 抖动强度
'ink_bleed': 0.3, # 墨水扩散效果
'background_texture': True, # 背景纹理
}
def _find_chinese_fonts(self):
"""查找系统中的中文字体"""
font_paths = []
# 常见的中文字体路径
common_paths = [
'/System/Library/Fonts/PingFang.ttc', # macOS
'/System/Library/Fonts/STHeiti Light.ttc', # macOS
'C:/Windows/Fonts/simhei.ttf', # Windows
'C:/Windows/Fonts/simsun.ttc', # Windows
'/usr/share/fonts/truetype/wqy/wqy-microhei.ttc', # Linux
]
# 尝试查找字体
for path in common_paths:
if os.path.exists(path):
font_paths.append(path)
# 如果没有找到,使用默认字体
if not font_paths:
font_paths.append(None)
return font_paths
def _find_english_fonts(self):
"""查找手写风格的英文字体"""
font_paths = []
# 常见的手写风格字体
common_paths = [
'/System/Library/Fonts/Apple Chancery.ttf', # macOS
'/System/Library/Fonts/Comic Sans MS.ttf', # macOS
'C:/Windows/Fonts/comic.ttf', # Windows
'C:/Windows/Fonts/BRUSHSCI.ttf', # Windows
'/usr/share/fonts/truetype/msttcorefonts/Comic_Sans_MS.ttf', # Linux
]
for path in common_paths:
if os.path.exists(path):
font_paths.append(path)
# 如果没有找到,使用默认字体
if not font_paths:
font_paths.append(None)
return font_paths
def pdf_to_images(self, pdf_path, dpi=150):
"""将PDF转换为图像列表"""
images = []
doc = fitz.open(pdf_path)
for page_num in range(len(doc)):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
img_data = pix.tobytes("ppm")
img = Image.open(BytesIO(img_data)).convert("RGB")
images.append(img)
doc.close()
return images
def apply_handwriting_effect(self, text, is_chinese=False):
"""将文本转换为手写效果图像"""
# 选择字体
if is_chinese and self.chinese_font_paths[0]:
font_path = random.choice(self.chinese_font_paths)
else:
font_path = random.choice(self.english_font_paths)
# 基础字体大小
base_size = self.style_params['base_font_size']
size_variation = self.style_params['size_variation']
try:
if font_path:
font = ImageFont.truetype(font_path, base_size)
else:
font = ImageFont.load_default()
except:
font = ImageFont.load_default()
# 分割文本为字符
chars = list(text)
char_images = []
# 为每个字符创建独立图像
for char in chars:
# 随机变化字体大小
size = int(base_size * (1 + random.uniform(-size_variation, size_variation)))
try:
if font_path:
char_font = ImageFont.truetype(font_path, size)
else:
char_font = ImageFont.load_default().font_variant(size=size)
except:
char_font = ImageFont.load_default().font_variant(size=size)
# 估计字符尺寸
bbox = char_font.getbbox(char)
char_width = bbox[2] - bbox[0] + 10
char_height = bbox[3] - bbox[1] + 10
# 创建字符图像
char_img = Image.new('RGBA', (char_width, char_height), (255, 255, 255, 0))
draw = ImageDraw.Draw(char_img)
# 随机颜色变化(稍微偏蓝或偏灰的黑色)
color_variation = self.style_params['color_variation']
r = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
g = max(0, min(255, 0 + random.randint(-color_variation, color_variation)))
b = max(0, min(255, 0 + random.randint(-color_variation, color_variation//2)))
color = (r, g, b)
# 随机旋转
rotation = random.uniform(-self.style_params['rotation_variation'],
self.style_params['rotation_variation'])
# 绘制字符
draw.text((5, 5), char, fill=color, font=char_font)
# 应用旋转
if abs(rotation) > 0.5:
char_img = char_img.rotate(rotation, expand=True, fillcolor=(255, 255, 255, 0))
char_images.append(char_img)
return char_images
def create_handwritten_line(self, text, is_chinese=False, line_width=800):
"""创建手写风格的一行文本"""
char_images = self.apply_handwriting_effect(text, is_chinese)
# 计算总宽度和最大高度
total_width = sum(img.width for img in char_images)
max_height = max(img.height for img in char_images) if char_images else 0
# 添加字符间距变化
spacing_variation = self.style_params['word_spacing_variation']
char_spacings = []
for i in range(len(char_images)):
if i == 0:
char_spacings.append(0)
else:
base_spacing = char_images[i-1].width // 4
variation = int(base_spacing * random.uniform(-spacing_variation, spacing_variation))
char_spacings.append(base_spacing + variation)
total_width += sum(char_spacings)
# 创建行图像
line_img = Image.new('RGBA', (total_width + 20, max_height + 20), (255, 255, 255, 0))
x_offset = 10
# 放置字符
for i, (char_img, spacing) in enumerate(zip(char_images, char_spacings)):
x_offset += spacing
y_offset = 10 + random.randint(-2, 2) # 垂直方向轻微抖动
line_img.paste(char_img, (x_offset, y_offset), char_img)
x_offset += char_img.width
# 裁剪图像
line_img = line_img.crop(line_img.getbbox()) if line_img.getbbox() else line_img
return line_img
def add_ink_effects(self, image):
"""添加墨水效果(扩散、抖动)"""
# 转换为灰度图像进行边缘检测
if image.mode != 'L':
gray = image.convert('L')
else:
gray = image.copy()
# 添加轻微模糊模拟墨水扩散
if self.style_params['ink_bleed'] > 0:
blur_radius = self.style_params['ink_bleed'] * 2
blurred = gray.filter(ImageFilter.GaussianBlur(blur_radius))
# 混合原始和模糊图像
result = Image.blend(gray, blurred, alpha=0.3)
else:
result = gray
# 转换为RGB并添加颜色变化
result = result.convert('RGB')
return result
def add_paper_texture(self, image):
"""添加纸张纹理"""
if not self.style_params['background_texture']:
return image
# 创建纸张纹理
width, height = image.size
texture = Image.new('RGB', (width, height), color=(248, 246, 240))
# 添加轻微噪声
noise = np.random.normal(0, 3, (height, width, 3)).astype(np.uint8)
noise_img = Image.fromarray(noise, 'RGB')
# 混合纹理和噪声
texture = Image.blend(texture, noise_img, alpha=0.05)
# 将纹理与图像混合
if image.mode == 'RGBA':
# 分离Alpha通道
rgb = image.convert('RGB')
alpha = image.split()[-1]
# 混合RGB通道与纹理
blended_rgb = Image.blend(texture, rgb, alpha=0.9)
# 重新组合
result = Image.merge('RGBA', (*blended_rgb.split(), alpha))
else:
result = Image.blend(texture, image, alpha=0.9)
return result
def add_handwriting_artifacts(self, image):
"""添加手写伪影(笔压变化、墨水不足等)"""
width, height = image.size
# 转换为numpy数组进行处理
img_array = np.array(image)
# 添加轻微的亮度变化模拟笔压
if len(img_array.shape) == 3:
pressure_mask = np.random.normal(1.0, 0.05, (height, width, 1))
img_array = img_array * pressure_mask
img_array = np.clip(img_array, 0, 255).astype(np.uint8)
result = Image.fromarray(img_array)
return result
def convert_page_to_handwriting(self, image):
"""将单页图像转换为手写效果"""
# 这里简化处理:在实际应用中,应该使用OCR识别文本并重新渲染
# 但为了演示,我们将对整个图像应用手写效果滤镜
# 应用墨水效果
result = self.add_ink_effects(image)
# 添加纸张纹理
result = self.add_paper_texture(result)
# 添加手写伪影
result = self.add_handwriting_artifacts(result)
return result
def create_sample_handwriting(self, output_path="handwriting_sample.png"):
"""创建手写样本"""
# 示例文本
chinese_text = "基于Python的PDF手写模拟器,可以生成自然的手写效果。"
english_text = "This is a Python-based PDF handwriting simulator that creates natural-looking handwritten documents."
# 创建手写行
chinese_line = self.create_handwritten_line(chinese_text, is_chinese=True)
english_line = self.create_handwritten_line(english_text, is_chinese=False)
# 计算总尺寸
line_spacing = int(self.style_params['base_font_size'] * 1.5)
total_height = chinese_line.height + english_line.height + line_spacing * 3
max_width = max(chinese_line.width, english_line.width)
# 创建最终图像
result = Image.new('RGB', (max_width + 40, total_height), color=(248, 246, 240))
# 添加标题
draw = ImageDraw.Draw(result)
try:
if self.chinese_font_paths[0]:
title_font = ImageFont.truetype(self.chinese_font_paths[0], 28)
else:
title_font = ImageFont.load_default()
except:
title_font = ImageFont.load_default()
draw.text((20, 20), "手写效果示例", fill=(50, 50, 50), font=title_font)
# 放置中文行
chinese_y = 80
result.paste(chinese_line, (20, chinese_y), chinese_line)
# 放置英文行
english_y = chinese_y + chinese_line.height + line_spacing
result.paste(english_line, (20, english_y), english_line)
# 添加纸张纹理
result = self.add_paper_texture(result)
# 保存结果
result.save(output_path, "PNG", quality=95)
print(f"手写样本已保存到: {output_path}")
return result
def process_pdf(self, pdf_path, output_path="handwritten_output.pdf"):
"""处理整个PDF文件"""
print(f"正在处理PDF文件: {pdf_path}")
# 将PDF转换为图像
print("将PDF转换为图像...")
images = self.pdf_to_images(pdf_path)
# 处理每一页
print(f"处理 {len(images)} 页...")
processed_images = []
for i, img in enumerate(images):
print(f"处理第 {i+1}/{len(images)} 页...")
processed_img = self.convert_page_to_handwriting(img)
processed_images.append(processed_img)
# 保存为PDF
print("生成PDF文件...")
self.images_to_pdf(processed_images, output_path)
print(f"处理完成! 输出文件: {output_path}")
return processed_images
def images_to_pdf(self, images, output_path):
"""将图像列表保存为PDF"""
if not images:
return
# 使用ReportLab创建PDF
c = canvas.Canvas(output_path, pagesize=letter)
width, height = letter
for i, img in enumerate(images):
# 将PIL图像转换为ReportLab兼容格式
img_buffer = BytesIO()
img.save(img_buffer, format="PNG")
img_buffer.seek(0)
# 添加图像到PDF页面
c.drawImage(img_buffer, 0, 0, width, height, preserveAspectRatio=True)
# 如果不是最后一页,添加新页面
if i < len(images) - 1:
c.showPage()
c.save()
def interactive_demo(self):
"""交互式演示"""
print("=" * 50)
print("PDF手写模拟器 - 交互式演示")
print("=" * 50)
while True:
print("\n请选择操作:")
print("1. 创建手写样本")
print("2. 处理PDF文件")
print("3. 调整手写参数")
print("4. 退出")
choice = input("\n请输入选项 (1-4): ").strip()
if choice == "1":
output_file = input("输入输出文件名 (默认: handwriting_sample.png): ").strip()
if not output_file:
output_file = "handwriting_sample.png"
self.create_sample_handwriting(output_file)
elif choice == "2":
pdf_file = input("输入PDF文件路径: ").strip()
if not os.path.exists(pdf_file):
print("文件不存在!")
continue
output_file = input("输入输出PDF文件名 (默认: handwritten_output.pdf): ").strip()
if not output_file:
output_file = "handwritten_output.pdf"
self.process_pdf(pdf_file, output_file)
elif choice == "3":
self.adjust_parameters()
elif choice == "4":
print("感谢使用PDF手写模拟器!")
break
else:
print("无效选项,请重新选择")
def adjust_parameters(self):
"""调整手写效果参数"""
print("\n当前参数:")
for key, value in self.style_params.items():
print(f" {key}: {value}")
print("\n可以调整的参数:")
print(" 1. 字体大小变化 (size_variation)")
print(" 2. 旋转角度变化 (rotation_variation)")
print(" 3. 颜色变化 (color_variation)")
print(" 4. 抖动强度 (jitter_intensity)")
print(" 5. 墨水扩散 (ink_bleed)")
print(" 6. 背景纹理 (background_texture)")
param_choice = input("\n选择要调整的参数 (1-6): ").strip()
param_map = {
"1": "size_variation",
"2": "rotation_variation",
"3": "color_variation",
"4": "jitter_intensity",
"5": "ink_bleed",
"6": "background_texture"
}
if param_choice in param_map:
param_name = param_map[param_choice]
current_value = self.style_params[param_name]
if param_name == "background_texture":
new_value = not current_value
self.style_params[param_name] = new_value
print(f"{param_name} 已设置为: {new_value}")
else:
try:
new_value = float(input(f"输入新值 (当前: {current_value}): "))
self.style_params[param_name] = new_value
print(f"{param_name} 已设置为: {new_value}")
except ValueError:
print("无效输入,保持原值")
else:
print("无效选项")
# 使用示例
if __name__ == "__main__":
# 创建手写模拟器实例
simulator = HandwritingSimulator()
# 创建手写样本
print("正在创建手写样本...")
simulator.create_sample_handwriting()
# 启动交互式演示
simulator.interactive_demo()
首先需要安装必要的Python库:
pip install PyMuPDF pillow reportlab opencv-python matplotlib numpy scipy
手写效果模拟:
纸张效果:
墨水效果:
PDF处理:
直接运行演示:
python pdf_handwriting_simulator.py
处理PDF文件:
simulator = HandwritingSimulator()
simulator.process_pdf("input.pdf", "handwritten_output.pdf")
创建手写样本:
simulator.create_sample_handwriting("my_handwriting.png")
这个模拟器提供了基础的手写效果,可以根据需要进一步调整参数以获得更自然的效果。