- 实现 API 端点 /api/config 用于获取和保存配置 - 添加配置信息脱敏功能,保护 API 密钥等敏感数据 - 集成配置验证逻辑,确保时区等参数有效性 - 在前端添加完整的配置管理页面界面 - 实现配置项的动态编辑和保存功能 - 添加连接设置功能用于 WebSocket 连接配置 - 提供多标签页界面分别管理不同配置模块 - 实现配置变更后的实时预览和保存确认
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
import fitz
|
||
from docx import Document
|
||
from docx.shared import Pt, Cm, RGBColor
|
||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||
|
||
pdf_path = r'C:\Users\qwer\.picobot\media\ws\aa56c052-ea10-4bc1-aed4-7d06770b6fd9_夜读 _ 明白了这4点,就不难养出有主体性的孩子.pdf'
|
||
output_path = r'C:\Users\qwer\.picobot\media\夜读_明白了这4点_就不难养出有主体性的孩子.docx'
|
||
|
||
pdf_doc = fitz.open(pdf_path)
|
||
doc = Document()
|
||
|
||
# 页面边距
|
||
for section in doc.sections:
|
||
section.top_margin = Cm(2.54)
|
||
section.bottom_margin = Cm(2.54)
|
||
section.left_margin = Cm(3.18)
|
||
section.right_margin = Cm(3.18)
|
||
|
||
# 正文样式
|
||
style = doc.styles['Normal']
|
||
style.font.name = '宋体'
|
||
style.font.size = Pt(12)
|
||
style.paragraph_format.line_spacing = 1.5
|
||
style.paragraph_format.first_line_indent = Pt(24)
|
||
|
||
def add_run(paragraph, text, bold=False, size=None, color=None, italic=False, font_name=None):
|
||
run = paragraph.add_run(text)
|
||
run.bold = bold
|
||
if size: run.font.size = Pt(size)
|
||
if color: run.font.color.rgb = RGBColor(*color)
|
||
run.italic = italic
|
||
if font_name: run.font.name = font_name
|
||
return run
|
||
|
||
# 收集所有文本
|
||
full_text = []
|
||
for i, page in enumerate(pdf_doc):
|
||
text = page.get_text().strip()
|
||
if text:
|
||
full_text.append(text)
|
||
|
||
all_text = '\n'.join(full_text)
|
||
lines = [l.strip() for l in all_text.split('\n') if l.strip()]
|
||
|
||
# 定义段落标记
|
||
sections_headers = ['塑教育,提倡积极养育', '懂互动,给予丰盈幸福',
|
||
'有边界,养出人生底气', '稳情绪,才能赢得孩子']
|
||
skip_lines = ['南方都市报电商官方账号。', '南都甄选', '公众号']
|
||
|
||
first = True
|
||
for line in lines:
|
||
# 跳过广告行
|
||
if line in skip_lines:
|
||
continue
|
||
|
||
# 主标题
|
||
if first:
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
p.paragraph_format.space_after = Pt(12)
|
||
add_run(p, line, bold=True, size=22, font_name='黑体')
|
||
first = False
|
||
|
||
# 引用句(引号开头结尾、较短)
|
||
elif (line.startswith('"') and line.endswith('"')) or \
|
||
(line.startswith('"') and line.endswith('"') and len(line) < 60):
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
p.paragraph_format.space_before = Pt(6)
|
||
p.paragraph_format.space_after = Pt(6)
|
||
add_run(p, line, italic=True, size=12, color=(102, 102, 102))
|
||
|
||
# 日期来源
|
||
elif line in ['2026年6月14日 22:28 广东', '南方都市报']:
|
||
p = doc.add_paragraph()
|
||
p.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
add_run(p, line, size=10.5, color=(128, 128, 128))
|
||
|
||
# 4个小标题
|
||
elif line in sections_headers:
|
||
p = doc.add_paragraph()
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
p.paragraph_format.space_before = Pt(18)
|
||
p.paragraph_format.space_after = Pt(6)
|
||
add_run(p, line, bold=True, size=15, font_name='黑体')
|
||
|
||
# 作者信息
|
||
elif any(line.startswith(x) for x in ['作者:', '统筹:', '图片:', '投稿邮箱:']):
|
||
p = doc.add_paragraph()
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
add_run(p, line, size=10.5, color=(128, 128, 128))
|
||
|
||
# 末尾信息
|
||
elif '转载自' in line or '把世界当成' in line:
|
||
p = doc.add_paragraph()
|
||
p.paragraph_format.first_line_indent = Pt(0)
|
||
p.paragraph_format.space_before = Pt(6)
|
||
add_run(p, line, size=10.5, color=(102, 102, 102))
|
||
|
||
else:
|
||
doc.add_paragraph(line)
|
||
|
||
pdf_doc.close()
|
||
doc.save(output_path)
|
||
print('转换完成!')
|