- 移除 TodoItem 中的 priority、created_at 和 updated_at 字段 - 强制每个任务都必须有唯一 id,且由用户负责生成 - 修改合并模式逻辑,merge=true 下保留未提及的旧任务 - 支持已完成和已取消任务重新激活(状态改回 pending 或 in_progress) - 禁止 in_progress 状态退回到 pending,必须标记为 completed 或 cancelled - 优化状态转换校验,允许特定状态间合法切换 - 简化任务变更消息,移除详细的新增/更新/移除统计 - 更新文档和示例,明确 id 必须由用户生成和使用 - 修复和补充测试,增强状态转换和合并模式验证 - 调整任务时间戳生成逻辑,统一使用当前时间及索引 - 该变更提供更合理的任务状态机械及管理模式,提升稳定性和易用性
971 lines
36 KiB
Python
971 lines
36 KiB
Python
#!/usr/bin/env python3
|
||
# Copyright (c) 2026 Lark Technologies Pte. Ltd.
|
||
# SPDX-License-Identifier: MIT
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
import sys
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
|
||
SKILL_ROOT = Path(__file__).resolve().parent.parent
|
||
ASSETS_DIR = SKILL_ROOT / "assets"
|
||
REFERENCES_DIR = SKILL_ROOT / "references"
|
||
TEMPLATES_DIR = ASSETS_DIR / "templates"
|
||
CATALOG_PATH = REFERENCES_DIR / "template-catalog.md"
|
||
DEFAULT_INDEX_PATH = REFERENCES_DIR / "template-index.json"
|
||
LIGHTWEIGHT_INDEX_SCHEMA_VERSION = "1.1.0"
|
||
|
||
|
||
class TemplateToolError(Exception):
|
||
pass
|
||
|
||
|
||
def fail(message: str) -> None:
|
||
raise TemplateToolError(message)
|
||
|
||
|
||
def read_file(file_path: str | Path) -> str:
|
||
return Path(file_path).read_text(encoding="utf-8")
|
||
|
||
|
||
def normalize_whitespace(value: str) -> str:
|
||
return re.sub(r"\s+", " ", value).strip()
|
||
|
||
|
||
def strip_markdown(value: str) -> str:
|
||
return normalize_whitespace(
|
||
re.sub(r"^-\s*", "", re.sub(r"`([^`]+)`", r"\1", value.replace("**", "")))
|
||
)
|
||
|
||
|
||
def strip_xml(value: str) -> str:
|
||
stripped = re.sub(r"<!\[CDATA\[([\s\S]*?)\]\]>", r"\1", value)
|
||
stripped = re.sub(r"<[^>]+>", " ", stripped)
|
||
stripped = stripped.replace(" ", " ")
|
||
stripped = stripped.replace("&", "&")
|
||
stripped = stripped.replace("<", "<")
|
||
stripped = stripped.replace(">", ">")
|
||
stripped = stripped.replace(""", '"')
|
||
stripped = stripped.replace("'", "'")
|
||
return normalize_whitespace(stripped)
|
||
|
||
|
||
def compact_object(value: dict[str, Any]) -> dict[str, Any]:
|
||
return {key: entry for key, entry in value.items() if entry is not None}
|
||
|
||
|
||
def tokenize_query(value: str) -> list[str]:
|
||
normalized = normalize_whitespace(value.lower())
|
||
if not normalized:
|
||
return []
|
||
|
||
def append_unique(target: list[str], token: str) -> None:
|
||
token = token.strip()
|
||
if token and token not in target:
|
||
target.append(token)
|
||
|
||
tokens: list[str] = []
|
||
for item in [item.strip() for item in re.split(r"[\s,/|,。;;::()()【】\[\]《》<>]+", normalized) if item.strip()]:
|
||
append_unique(tokens, item)
|
||
|
||
# Chinese prompts are often complete sentences without separators, e.g.
|
||
# "帮我做一个季度工作汇报PPT". Add CJK n-grams so domain phrases such as
|
||
# "工作汇报" or "季度复盘" can still match catalog metadata.
|
||
for phrase in re.findall(r"[\u3400-\u9fff]+", normalized):
|
||
if len(phrase) < 2:
|
||
continue
|
||
max_size = min(6, len(phrase))
|
||
for size in range(max_size, 1, -1):
|
||
for start in range(0, len(phrase) - size + 1):
|
||
append_unique(tokens, phrase[start : start + size])
|
||
|
||
synonym_tokens = {
|
||
"浅色": ["light"],
|
||
"白底": ["light"],
|
||
"明亮": ["light"],
|
||
"深色": ["dark"],
|
||
"黑底": ["dark"],
|
||
"暗色": ["dark"],
|
||
"多彩": ["colorful"],
|
||
"活泼": ["colorful", "casual"],
|
||
"正式": ["formal"],
|
||
"商务": ["formal"],
|
||
"轻松": ["casual"],
|
||
"创意": ["creative"],
|
||
}
|
||
for token in list(tokens):
|
||
for keyword, aliases in synonym_tokens.items():
|
||
if keyword in token:
|
||
for alias in aliases:
|
||
append_unique(tokens, alias)
|
||
|
||
unique_tokens: list[str] = []
|
||
for token in tokens or [normalized]:
|
||
if token not in unique_tokens:
|
||
unique_tokens.append(token)
|
||
return unique_tokens
|
||
|
||
|
||
def parse_range_spec(range_spec: str) -> list[int]:
|
||
if not range_spec or not range_spec.strip():
|
||
fail("range is required")
|
||
numbers: set[int] = set()
|
||
for part in range_spec.split(","):
|
||
trimmed = part.strip()
|
||
if not trimmed:
|
||
continue
|
||
match = re.fullmatch(r"(\d+)(?:-(\d+))?", trimmed)
|
||
if not match:
|
||
fail(f"invalid range token: {trimmed}")
|
||
start = int(match.group(1))
|
||
end = int(match.group(2) or match.group(1))
|
||
if start < 1 or end < start:
|
||
fail(f"invalid range token: {trimmed}")
|
||
for index in range(start, end + 1):
|
||
numbers.add(index)
|
||
return sorted(numbers)
|
||
|
||
|
||
def compress_numbers(numbers: list[int]) -> str:
|
||
if not numbers:
|
||
return ""
|
||
parts: list[str] = []
|
||
start = numbers[0]
|
||
previous = numbers[0]
|
||
for current in numbers[1:]:
|
||
if current == previous + 1:
|
||
previous = current
|
||
continue
|
||
parts.append(f"{start}" if start == previous else f"{start}-{previous}")
|
||
start = current
|
||
previous = current
|
||
parts.append(f"{start}" if start == previous else f"{start}-{previous}")
|
||
return ",".join(parts)
|
||
|
||
|
||
def slice_array(items: list[Any], limit: int) -> list[Any]:
|
||
return items[: max(limit, 0)]
|
||
|
||
|
||
def count_tag(xml: str, tag_name: str) -> int:
|
||
return len(re.findall(fr"<{tag_name}\b", xml))
|
||
|
||
|
||
def extract_attribute(tag_source: str, name: str) -> str | None:
|
||
match = re.search(fr'{re.escape(name)}="([^"]+)"', tag_source)
|
||
return match.group(1) if match else None
|
||
|
||
|
||
def extract_numeric_attribute(tag_source: str, name: str) -> int | float | None:
|
||
raw = extract_attribute(tag_source, name)
|
||
if raw is None:
|
||
return None
|
||
try:
|
||
value = float(raw)
|
||
except ValueError:
|
||
return None
|
||
return int(value) if value.is_integer() else value
|
||
|
||
|
||
def sort_regions(regions: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||
return sorted(regions, key=lambda region: (region["y"], region["x"], region["kind"]))
|
||
|
||
|
||
def extract_slide_regions(slide_xml: str) -> list[dict[str, Any]]:
|
||
regions: list[dict[str, Any]] = []
|
||
for match in re.finditer(r"<shape\b([^>]*)>([\s\S]*?)</shape>", slide_xml):
|
||
attrs, content = match.group(1), match.group(2)
|
||
x = extract_numeric_attribute(attrs, "topLeftX")
|
||
y = extract_numeric_attribute(attrs, "topLeftY")
|
||
width = extract_numeric_attribute(attrs, "width")
|
||
height = extract_numeric_attribute(attrs, "height")
|
||
if all(value is not None for value in [x, y, width, height]):
|
||
text_type_match = re.search(r'textType="([^"]+)"', content)
|
||
regions.append(
|
||
{
|
||
"kind": "shape",
|
||
"type": extract_attribute(attrs, "type") or "shape",
|
||
"text_type": text_type_match.group(1) if text_type_match else None,
|
||
"x": x,
|
||
"y": y,
|
||
"width": width,
|
||
"height": height,
|
||
"area": width * height,
|
||
}
|
||
)
|
||
|
||
for match in re.finditer(r"<(img|table|chart)\b([^>]*)/?>", slide_xml):
|
||
kind, attrs = match.group(1), match.group(2)
|
||
x = extract_numeric_attribute(attrs, "topLeftX")
|
||
y = extract_numeric_attribute(attrs, "topLeftY")
|
||
width = extract_numeric_attribute(attrs, "width")
|
||
height = extract_numeric_attribute(attrs, "height")
|
||
if all(value is not None for value in [x, y, width, height]):
|
||
regions.append(
|
||
{
|
||
"kind": kind,
|
||
"type": kind,
|
||
"text_type": None,
|
||
"x": x,
|
||
"y": y,
|
||
"width": width,
|
||
"height": height,
|
||
"area": width * height,
|
||
}
|
||
)
|
||
return sort_regions(regions)
|
||
|
||
|
||
def build_bbox_summary(
|
||
regions: list[dict[str, Any]], slide_width: int | float | None, slide_height: int | float | None
|
||
) -> dict[str, Any]:
|
||
if not regions:
|
||
return {
|
||
"region_count": 0,
|
||
"occupied_bounds": None,
|
||
"regions": [],
|
||
"canvas": compact_object({"width": slide_width, "height": slide_height}),
|
||
}
|
||
|
||
min_x = regions[0]["x"]
|
||
min_y = regions[0]["y"]
|
||
max_x = regions[0]["x"] + regions[0]["width"]
|
||
max_y = regions[0]["y"] + regions[0]["height"]
|
||
for region in regions:
|
||
min_x = min(min_x, region["x"])
|
||
min_y = min(min_y, region["y"])
|
||
max_x = max(max_x, region["x"] + region["width"])
|
||
max_y = max(max_y, region["y"] + region["height"])
|
||
|
||
return {
|
||
"region_count": len(regions),
|
||
"canvas": compact_object({"width": slide_width, "height": slide_height}),
|
||
"occupied_bounds": {
|
||
"x": min_x,
|
||
"y": min_y,
|
||
"width": max_x - min_x,
|
||
"height": max_y - min_y,
|
||
},
|
||
"regions": slice_array(
|
||
[
|
||
compact_object(
|
||
{
|
||
"id": f'{region["kind"]}-{index + 1}',
|
||
"kind": region["kind"],
|
||
"type": region["type"],
|
||
"text_type": region.get("text_type"),
|
||
"x": region["x"],
|
||
"y": region["y"],
|
||
"width": region["width"],
|
||
"height": region["height"],
|
||
}
|
||
)
|
||
for index, region in enumerate(regions)
|
||
],
|
||
8,
|
||
),
|
||
}
|
||
|
||
|
||
def build_editable_regions(regions: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||
sorted_regions = sorted(regions, key=lambda region: region["area"], reverse=True)
|
||
return slice_array(
|
||
[
|
||
compact_object(
|
||
{
|
||
"id": f'{region["kind"]}-{index + 1}',
|
||
"kind": region["kind"],
|
||
"role": "image"
|
||
if region["kind"] == "img"
|
||
else region.get("text_type") or region["type"] or region["kind"],
|
||
"x": region["x"],
|
||
"y": region["y"],
|
||
"width": region["width"],
|
||
"height": region["height"],
|
||
}
|
||
)
|
||
for index, region in enumerate(sorted_regions)
|
||
],
|
||
8,
|
||
)
|
||
|
||
|
||
def detect_slide_layout_tags(
|
||
slide_xml: str, regions: list[dict[str, Any]], slide_width: int | float | None, slide_height: int | float | None
|
||
) -> list[str]:
|
||
tags: set[str] = set()
|
||
text_regions = [region for region in regions if region["kind"] == "shape" and region["type"] == "text"]
|
||
image_regions = [region for region in regions if region["kind"] == "img"]
|
||
table_regions = [region for region in regions if region["kind"] == "table"]
|
||
|
||
if table_regions:
|
||
tags.add("comparison-table")
|
||
|
||
if "<fillImg" in slide_xml:
|
||
tags.add("image-backed")
|
||
if text_regions:
|
||
tags.add("full-bleed-image-caption")
|
||
|
||
if text_regions and len(image_regions) == 1:
|
||
biggest_text = sorted(text_regions, key=lambda region: region["area"], reverse=True)[0]
|
||
biggest_image = image_regions[0]
|
||
if (
|
||
slide_width and biggest_image["width"] >= slide_width * 0.75
|
||
) or (
|
||
slide_height and biggest_image["height"] >= slide_height * 0.75
|
||
):
|
||
tags.add("full-bleed-image-caption")
|
||
elif biggest_text["x"] <= biggest_image["x"]:
|
||
tags.add("hero-text-left-image-right")
|
||
else:
|
||
tags.add("hero-image-left-text-right")
|
||
|
||
if len(text_regions) >= 4 and not image_regions:
|
||
distinct_x = len({round(region["x"] / 20) for region in text_regions})
|
||
distinct_y = len({round(region["y"] / 20) for region in text_regions})
|
||
if distinct_x >= 2 and distinct_y >= 2:
|
||
tags.add("2x2-metric-cards")
|
||
|
||
if len(text_regions) >= 2 and not image_regions:
|
||
width = slide_width or 960
|
||
left = any(region["x"] < width / 2 - 40 for region in text_regions)
|
||
right = any(region["x"] + region["width"] > width / 2 + 40 for region in text_regions)
|
||
if left and right:
|
||
tags.add("two-column-text")
|
||
|
||
if len(text_regions) <= 2 and not image_regions:
|
||
top_most = sorted(text_regions, key=lambda region: region["y"])[0] if text_regions else None
|
||
if top_most and top_most["y"] <= 120 and top_most["height"] <= 140:
|
||
tags.add("section-divider")
|
||
|
||
if not tags and text_regions:
|
||
tags.add("text-focused")
|
||
|
||
return sorted(tags)
|
||
|
||
|
||
def parse_theme_summary(theme_xml: str | None) -> dict[str, Any]:
|
||
if not theme_xml:
|
||
return {"has_theme_node": False, "text_styles": []}
|
||
|
||
text_styles_block = re.search(r"<textStyles>([\s\S]*?)</textStyles>", theme_xml)
|
||
text_styles: list[dict[str, Any]] = []
|
||
if text_styles_block:
|
||
for match in re.finditer(r"<(title|headline|sub-headline|body|caption)\b([^>]*)/?>", text_styles_block.group(1)):
|
||
text_styles.append(
|
||
compact_object(
|
||
{
|
||
"text_type": match.group(1),
|
||
"font_color": extract_attribute(match.group(2), "fontColor"),
|
||
"font_size": extract_attribute(match.group(2), "fontSize"),
|
||
"font_family": extract_attribute(match.group(2), "fontFamily"),
|
||
}
|
||
)
|
||
)
|
||
return {"has_theme_node": True, "text_styles": text_styles}
|
||
|
||
|
||
def extract_background_hint(slide_xml: str) -> str | None:
|
||
fill_color_match = re.search(r'<fillColor color="([^"]+)"', slide_xml)
|
||
if fill_color_match:
|
||
return fill_color_match.group(1)
|
||
fill_img_match = re.search(r'<fillImg src="([^"]+)"', slide_xml)
|
||
if fill_img_match:
|
||
return f"fillImg:{fill_img_match.group(1)}"
|
||
return None
|
||
|
||
|
||
def extract_title_hint(slide_xml: str) -> dict[str, str] | None:
|
||
type_priority = {"title": 5, "headline": 4, "sub-headline": 3, "body": 2, "caption": 1}
|
||
content_pattern = re.compile(
|
||
r'<content\b([^>]*)textType="(title|headline|sub-headline|body|caption)"([^>]*)>([\s\S]*?)</content>'
|
||
)
|
||
candidates: list[dict[str, Any]] = []
|
||
for match in content_pattern.finditer(slide_xml):
|
||
attrs = f"{match.group(1)} {match.group(3)}"
|
||
text = strip_xml(match.group(4))
|
||
if text:
|
||
candidates.append(
|
||
{
|
||
"text_type": match.group(2),
|
||
"text": text[:80],
|
||
"font_size": int(extract_attribute(attrs, "fontSize") or "0"),
|
||
"priority": type_priority.get(match.group(2), 0),
|
||
}
|
||
)
|
||
candidates.sort(key=lambda item: (-item["priority"], -item["font_size"], -len(item["text"])))
|
||
if candidates:
|
||
return {"text_type": candidates[0]["text_type"], "text": candidates[0]["text"]}
|
||
return None
|
||
|
||
|
||
def summarize_slide(slide_xml: str, slide_number: int, presentation_info: dict[str, Any] | None = None) -> dict[str, Any]:
|
||
presentation_info = presentation_info or {}
|
||
raw_width = presentation_info.get("width")
|
||
raw_height = presentation_info.get("height")
|
||
slide_width = int(float(raw_width)) if raw_width else None
|
||
slide_height = int(float(raw_height)) if raw_height else None
|
||
regions = extract_slide_regions(slide_xml)
|
||
return {
|
||
"slide_number": slide_number,
|
||
"title_hint": extract_title_hint(slide_xml),
|
||
"background_hint": extract_background_hint(slide_xml),
|
||
"layout_tags": detect_slide_layout_tags(slide_xml, regions, slide_width, slide_height),
|
||
"bbox_summary": build_bbox_summary(regions, slide_width, slide_height),
|
||
"editable_regions": build_editable_regions(regions),
|
||
"element_counts": {
|
||
"shape": count_tag(slide_xml, "shape"),
|
||
"img": count_tag(slide_xml, "img"),
|
||
"table": count_tag(slide_xml, "table"),
|
||
"chart": count_tag(slide_xml, "chart"),
|
||
"icon": count_tag(slide_xml, "icon"),
|
||
"line": count_tag(slide_xml, "line"),
|
||
"polyline": count_tag(slide_xml, "polyline"),
|
||
},
|
||
}
|
||
|
||
|
||
def aggregate_slides(slide_summaries: list[dict[str, Any]]) -> dict[str, Any]:
|
||
totals = {"shape": 0, "img": 0, "table": 0, "chart": 0, "icon": 0, "line": 0, "polyline": 0}
|
||
title_hints: list[str] = []
|
||
background_hints: list[str] = []
|
||
layout_tags: list[str] = []
|
||
for slide in slide_summaries:
|
||
for key, value in slide["element_counts"].items():
|
||
totals[key] += value
|
||
if slide.get("title_hint") and slide["title_hint"]["text"] not in title_hints:
|
||
title_hints.append(slide["title_hint"]["text"])
|
||
if slide.get("background_hint") and slide["background_hint"] not in background_hints:
|
||
background_hints.append(slide["background_hint"])
|
||
for tag in slide.get("layout_tags") or []:
|
||
if tag not in layout_tags:
|
||
layout_tags.append(tag)
|
||
return {
|
||
"slide_count": len(slide_summaries),
|
||
"title_hints": slice_array(title_hints, 4),
|
||
"background_hints": slice_array(background_hints, 4),
|
||
"layout_tags": layout_tags,
|
||
"element_totals": totals,
|
||
}
|
||
|
||
|
||
def parse_template_xml(template_path: str | Path) -> dict[str, Any]:
|
||
xml = read_file(template_path)
|
||
presentation_match = re.search(r"<presentation\b([^>]*)>", xml)
|
||
if not presentation_match:
|
||
fail(f"template missing presentation root: {template_path}")
|
||
opening_tag = presentation_match.group(0)
|
||
title_xml_match = re.search(r"<title>[\s\S]*?</title>", xml)
|
||
theme_xml_match = re.search(r"<theme>[\s\S]*?</theme>", xml)
|
||
slides = re.findall(r"<slide\b[\s\S]*?</slide>", xml)
|
||
slide_summaries = [
|
||
summarize_slide(
|
||
slide_xml,
|
||
index + 1,
|
||
{
|
||
"width": extract_attribute(presentation_match.group(1), "width"),
|
||
"height": extract_attribute(presentation_match.group(1), "height"),
|
||
},
|
||
)
|
||
for index, slide_xml in enumerate(slides)
|
||
]
|
||
return {
|
||
"xml": xml,
|
||
"opening_tag": opening_tag,
|
||
"width": extract_attribute(presentation_match.group(1), "width"),
|
||
"height": extract_attribute(presentation_match.group(1), "height"),
|
||
"title_xml": title_xml_match.group(0) if title_xml_match else None,
|
||
"title_text": strip_xml(title_xml_match.group(0)) if title_xml_match else None,
|
||
"theme_xml": theme_xml_match.group(0) if theme_xml_match else None,
|
||
"theme_summary": parse_theme_summary(theme_xml_match.group(0) if theme_xml_match else None),
|
||
"slides": slides,
|
||
"slide_summaries": slide_summaries,
|
||
}
|
||
|
||
|
||
def finalize_catalog_entry(entry: dict[str, Any] | None) -> dict[str, Any] | None:
|
||
if not entry:
|
||
return None
|
||
filename_stem = re.sub(r"\.xml$", "", entry["filename"])
|
||
template_id = f'{entry["category"]}--{filename_stem}'
|
||
return {
|
||
"template_id": template_id,
|
||
"filename": f"{template_id}.xml",
|
||
"category": entry["category"],
|
||
"category_label": entry["category_label"],
|
||
"scene": entry["scene"],
|
||
"is_general_template": entry["is_general_template"],
|
||
"catalog_slide_count": entry["catalog_slide_count"],
|
||
"tone": entry["tone"],
|
||
"formality": entry["formality"],
|
||
"palette": entry["palette"],
|
||
"structure": entry["structure"],
|
||
"page_types": entry["page_types"],
|
||
"use_cases": entry["use_cases"],
|
||
"ranges": entry["ranges"],
|
||
}
|
||
|
||
|
||
def parse_catalog(catalog_path: str | Path = CATALOG_PATH) -> list[dict[str, Any]]:
|
||
lines = read_file(catalog_path).splitlines()
|
||
entries: list[dict[str, Any]] = []
|
||
current_category: str | None = None
|
||
current_category_label: str | None = None
|
||
current_entry: dict[str, Any] | None = None
|
||
|
||
def push_current() -> None:
|
||
nonlocal current_entry
|
||
finalized = finalize_catalog_entry(current_entry)
|
||
if finalized:
|
||
entries.append(finalized)
|
||
current_entry = None
|
||
|
||
for raw_line in lines:
|
||
line = raw_line.rstrip()
|
||
if line.startswith("## 快速匹配索引"):
|
||
break
|
||
|
||
category_match = re.match(r"^##\s+([a-z]+)\s+—\s+(.+)$", line)
|
||
if category_match:
|
||
push_current()
|
||
current_category = category_match.group(1)
|
||
current_category_label = category_match.group(2).strip()
|
||
continue
|
||
|
||
template_match = re.match(r"^###\s+(⭐\s+)?([^ ]+\.xml)\s+—\s+(.+)$", line)
|
||
if template_match:
|
||
push_current()
|
||
current_entry = {
|
||
"category": current_category,
|
||
"category_label": current_category_label,
|
||
"filename": template_match.group(2).strip(),
|
||
"scene": template_match.group(3).strip(),
|
||
"is_general_template": bool(template_match.group(1)),
|
||
"catalog_slide_count": None,
|
||
"tone": None,
|
||
"formality": None,
|
||
"palette": None,
|
||
"structure": None,
|
||
"page_types": None,
|
||
"use_cases": None,
|
||
"ranges": [],
|
||
}
|
||
continue
|
||
|
||
if not current_entry:
|
||
continue
|
||
|
||
plain = strip_markdown(line)
|
||
slide_count_match = re.search(r"(\d+)\s*页", plain)
|
||
if slide_count_match:
|
||
current_entry["catalog_slide_count"] = int(slide_count_match.group(1))
|
||
|
||
tone_match = re.search(r"色调:([^|]+)\|\s*正式度:(.+)$", plain)
|
||
if tone_match:
|
||
current_entry["tone"] = tone_match.group(1).strip()
|
||
current_entry["formality"] = tone_match.group(2).strip()
|
||
continue
|
||
|
||
if plain.startswith("配色:"):
|
||
current_entry["palette"] = plain[len("配色:") :].strip()
|
||
continue
|
||
|
||
if plain.startswith("结构:"):
|
||
current_entry["structure"] = plain[len("结构:") :].strip()
|
||
continue
|
||
|
||
if plain.startswith("页面类型:"):
|
||
current_entry["page_types"] = plain[len("页面类型:") :].strip()
|
||
continue
|
||
|
||
if plain.startswith("页型索引"):
|
||
_, _, ranges_raw = plain.partition(":")
|
||
ranges: list[dict[str, Any]] = []
|
||
for item in [part.strip() for part in ranges_raw.split("|") if part.strip()]:
|
||
match = re.match(r"^(.+?)\s+([0-9,\-\s无]+)$", item)
|
||
if not match:
|
||
ranges.append({"label": item, "range": "", "slide_numbers": []})
|
||
continue
|
||
range_text = normalize_whitespace(match.group(2))
|
||
range_text = "" if range_text == "无" else range_text
|
||
ranges.append(
|
||
{
|
||
"label": match.group(1).strip(),
|
||
"range": range_text,
|
||
"slide_numbers": parse_range_spec(range_text) if range_text else [],
|
||
}
|
||
)
|
||
current_entry["ranges"] = ranges
|
||
continue
|
||
|
||
if plain.startswith("适用:"):
|
||
current_entry["use_cases"] = plain[len("适用:") :].strip()
|
||
|
||
push_current()
|
||
return entries
|
||
|
||
|
||
def build_search_text(entry: dict[str, Any]) -> str:
|
||
values: list[str] = [
|
||
entry.get("template_id"),
|
||
entry.get("category"),
|
||
entry.get("category_label"),
|
||
entry.get("scene"),
|
||
entry.get("tone"),
|
||
entry.get("formality"),
|
||
entry.get("palette"),
|
||
entry.get("structure"),
|
||
entry.get("page_types"),
|
||
*(entry.get("layout_tags") or []),
|
||
entry.get("use_cases"),
|
||
*[f'{entry_range["label"]} {entry_range["range"]}' for entry_range in entry.get("ranges", [])],
|
||
]
|
||
return " ".join(str(value) for value in values if value).lower()
|
||
|
||
|
||
def build_index_data() -> dict[str, Any]:
|
||
catalog_entries = parse_catalog()
|
||
templates: list[dict[str, Any]] = []
|
||
for entry in catalog_entries:
|
||
template_path = TEMPLATES_DIR / entry["filename"]
|
||
xml_info = parse_template_xml(template_path)
|
||
layout_tags = sorted({tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])})
|
||
templates.append(
|
||
{
|
||
"template_id": entry["template_id"],
|
||
"category": entry["category"],
|
||
"category_label": entry["category_label"],
|
||
"scene": entry["scene"],
|
||
"tone": entry["tone"],
|
||
"formality": entry["formality"],
|
||
"is_general_template": entry["is_general_template"],
|
||
"slide_count": len(xml_info["slides"]),
|
||
"presentation_title": xml_info["title_text"],
|
||
"palette": entry["palette"],
|
||
"structure": entry["structure"],
|
||
"page_types": entry["page_types"],
|
||
"layout_tags": layout_tags,
|
||
"use_cases": entry["use_cases"],
|
||
"ranges": [{"label": entry_range["label"], "range": entry_range["range"]} for entry_range in entry["ranges"]],
|
||
}
|
||
)
|
||
|
||
return {
|
||
"schema_version": LIGHTWEIGHT_INDEX_SCHEMA_VERSION,
|
||
"generated_at": datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z"),
|
||
"template_count": len(templates),
|
||
"templates": templates,
|
||
}
|
||
|
||
|
||
def load_index(index_path: str | Path = DEFAULT_INDEX_PATH) -> dict[str, Any]:
|
||
index_path = Path(index_path)
|
||
if index_path.exists():
|
||
existing = json.loads(read_file(index_path))
|
||
first_template = existing.get("templates", [None])[0] if existing.get("templates") else None
|
||
if first_template and first_template.get("layout_tags") and "bbox_summary" not in first_template:
|
||
return existing
|
||
return build_index_data()
|
||
|
||
|
||
def catalog_filename(entry: dict[str, Any]) -> str:
|
||
return f'{entry["template_id"].split("--", 1)[-1]}.xml'
|
||
|
||
|
||
def build_external_template_entry(template_path: Path) -> dict[str, Any]:
|
||
xml_info = parse_template_xml(template_path)
|
||
return {
|
||
"template_id": template_path.stem,
|
||
"scene": None,
|
||
"tone": None,
|
||
"formality": None,
|
||
"slide_count": len(xml_info["slides"]),
|
||
"presentation_title": xml_info["title_text"],
|
||
"palette": None,
|
||
"structure": None,
|
||
"page_types": [],
|
||
"layout_tags": sorted(
|
||
{tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])}
|
||
),
|
||
"use_cases": None,
|
||
"theme_summary": xml_info["theme_summary"],
|
||
"ranges": [],
|
||
}
|
||
|
||
|
||
def find_template_entry(
|
||
index_data: dict[str, Any],
|
||
selector: str,
|
||
*,
|
||
fail_on_ambiguous: bool = True,
|
||
) -> dict[str, Any] | None:
|
||
normalized = re.sub(r"\.xml$", "", selector)
|
||
matches = [
|
||
entry
|
||
for entry in index_data["templates"]
|
||
if entry["template_id"] == normalized
|
||
or f'{entry["template_id"]}.xml' == selector
|
||
or catalog_filename(entry) == selector
|
||
or catalog_filename(entry) == f"{normalized}.xml"
|
||
]
|
||
if len(matches) > 1 and fail_on_ambiguous:
|
||
template_ids = ", ".join(entry["template_id"] for entry in matches)
|
||
fail(f"template selector is ambiguous: {selector}; use one of: {template_ids}")
|
||
if len(matches) > 1:
|
||
return None
|
||
return matches[0] if matches else None
|
||
|
||
|
||
def resolve_template_reference(index_data: dict[str, Any], template_selector: str) -> dict[str, Any]:
|
||
if not template_selector:
|
||
fail("template selector is required")
|
||
|
||
input_path = Path(template_selector)
|
||
as_path = input_path
|
||
if not input_path.is_absolute():
|
||
as_path = (Path.cwd() / as_path).resolve()
|
||
if as_path.exists():
|
||
entry = find_template_entry(
|
||
index_data,
|
||
as_path.name,
|
||
fail_on_ambiguous=False,
|
||
) or build_external_template_entry(as_path)
|
||
return {"entry": entry, "path": as_path}
|
||
if input_path.is_absolute() or input_path.parent != Path("."):
|
||
fail(f"template not found: {template_selector}")
|
||
|
||
selector_name = Path(template_selector).name
|
||
entry = find_template_entry(index_data, selector_name)
|
||
if entry:
|
||
return {"entry": entry, "path": get_template_path(entry)}
|
||
|
||
fail(f"template not found: {template_selector}")
|
||
|
||
|
||
def resolve_template_entry(index_data: dict[str, Any], template_selector: str) -> dict[str, Any]:
|
||
return resolve_template_reference(index_data, template_selector)["entry"]
|
||
|
||
|
||
def resolve_range_selection(entry: dict[str, Any], options: dict[str, Any]) -> dict[str, Any]:
|
||
if options.get("label"):
|
||
matched_range = next((item for item in entry["ranges"] if item["label"] == options["label"]), None)
|
||
if not matched_range:
|
||
fail(f'range label not found: {options["label"]}')
|
||
slide_numbers = parse_range_spec(matched_range["range"]) if matched_range["range"] else []
|
||
if not slide_numbers:
|
||
fail(f'range label has no slides: {options["label"]}')
|
||
return {"label": matched_range["label"], "range": matched_range["range"], "slide_numbers": slide_numbers}
|
||
|
||
if not options.get("range"):
|
||
fail("either --range or --label is required")
|
||
slide_numbers = parse_range_spec(options["range"])
|
||
return {"label": None, "range": compress_numbers(slide_numbers), "slide_numbers": slide_numbers}
|
||
|
||
|
||
def get_template_path(entry: dict[str, Any]) -> Path:
|
||
return TEMPLATES_DIR / f'{entry["template_id"]}.xml'
|
||
|
||
|
||
def summarize_selection(index_data: dict[str, Any], template_selector: str, options: dict[str, Any]) -> dict[str, Any]:
|
||
reference = resolve_template_reference(index_data, template_selector)
|
||
entry = reference["entry"]
|
||
selection = resolve_range_selection(entry, options)
|
||
xml_info = parse_template_xml(reference["path"])
|
||
slide_summaries = [
|
||
xml_info["slide_summaries"][slide_number - 1]
|
||
for slide_number in selection["slide_numbers"]
|
||
if 0 < slide_number <= len(xml_info["slide_summaries"])
|
||
]
|
||
return {
|
||
"template": {
|
||
"template_id": entry["template_id"],
|
||
"scene": entry["scene"],
|
||
"tone": entry["tone"],
|
||
"formality": entry["formality"],
|
||
"slide_count": len(xml_info["slides"]),
|
||
"presentation_title": xml_info["title_text"],
|
||
"palette": entry["palette"],
|
||
"structure": entry["structure"],
|
||
"page_types": entry["page_types"],
|
||
"layout_tags": sorted({tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])}),
|
||
"use_cases": entry["use_cases"],
|
||
},
|
||
"selection": selection,
|
||
"theme_summary": xml_info["theme_summary"],
|
||
"summary": aggregate_slides(slide_summaries),
|
||
"slides": slide_summaries,
|
||
}
|
||
|
||
|
||
def extract_selection_xml(index_data: dict[str, Any], template_selector: str, options: dict[str, Any]) -> str:
|
||
reference = resolve_template_reference(index_data, template_selector)
|
||
entry = reference["entry"]
|
||
selection = resolve_range_selection(entry, options)
|
||
xml_info = parse_template_xml(reference["path"])
|
||
selected_slides: list[str] = []
|
||
for slide_number in selection["slide_numbers"]:
|
||
if slide_number - 1 >= len(xml_info["slides"]) or slide_number <= 0:
|
||
fail(f"slide {slide_number} is out of range for {entry['template_id']}")
|
||
selected_slides.append(xml_info["slides"][slide_number - 1])
|
||
|
||
chunks = [xml_info["opening_tag"]]
|
||
if xml_info["title_xml"]:
|
||
chunks.append(f' {xml_info["title_xml"]}')
|
||
if xml_info["theme_xml"]:
|
||
chunks.append(f' {xml_info["theme_xml"]}')
|
||
chunks.extend(selected_slides)
|
||
chunks.append("</presentation>")
|
||
return "\n".join(chunks)
|
||
|
||
|
||
def search_templates(index_data: dict[str, Any], options: dict[str, Any]) -> list[dict[str, Any]]:
|
||
query = options.get("query", "") or ""
|
||
tokens = tokenize_query(query)
|
||
tone = options.get("tone")
|
||
formality = options.get("formality")
|
||
category = options.get("category")
|
||
layout_tag = options.get("layoutTag") or options.get("layout-tag")
|
||
limit = int(options.get("limit", 5))
|
||
|
||
ranked: list[dict[str, Any]] = []
|
||
for entry in index_data["templates"]:
|
||
if tone and entry["tone"] != tone:
|
||
continue
|
||
if formality and entry["formality"] != formality:
|
||
continue
|
||
if category and entry["category"] != category:
|
||
continue
|
||
if layout_tag and layout_tag not in (entry.get("layout_tags") or []):
|
||
continue
|
||
|
||
score = 0
|
||
if not query:
|
||
score = 1
|
||
else:
|
||
search_text = build_search_text(entry)
|
||
exact_id = entry["template_id"].lower() == query.lower()
|
||
if exact_id:
|
||
score += 100
|
||
for token in tokens:
|
||
if token in search_text:
|
||
score += len(token) * 10 if re.search(r"[\u3400-\u9fff]", token) else len(token) * 6
|
||
if entry.get("scene") and token in entry["scene"]:
|
||
score += 12
|
||
if entry.get("use_cases") and token in entry["use_cases"]:
|
||
score += 8
|
||
if entry.get("scene") and query in entry["scene"]:
|
||
score += 40
|
||
if entry.get("use_cases") and query in entry["use_cases"]:
|
||
score += 30
|
||
if score == 0:
|
||
continue
|
||
|
||
if entry.get("is_general_template"):
|
||
score -= 5
|
||
ranked.append(
|
||
{
|
||
"template_id": entry["template_id"],
|
||
"scene": entry["scene"],
|
||
"tone": entry["tone"],
|
||
"formality": entry["formality"],
|
||
"is_general_template": entry["is_general_template"],
|
||
"use_cases": entry["use_cases"],
|
||
"layout_tags": entry.get("layout_tags") or [],
|
||
"slide_count": entry["slide_count"],
|
||
"ranges": entry["ranges"],
|
||
"score": score,
|
||
}
|
||
)
|
||
|
||
ranked.sort(key=lambda item: (-item["score"], item["template_id"]))
|
||
return ranked[:limit]
|
||
|
||
|
||
def parse_cli_args(argv: list[str]) -> tuple[str | None, dict[str, Any]]:
|
||
if not argv:
|
||
return None, {}
|
||
command, *rest = argv
|
||
options: dict[str, Any] = {}
|
||
index = 0
|
||
while index < len(rest):
|
||
token = rest[index]
|
||
if not token.startswith("--"):
|
||
fail(f"unexpected argument: {token}")
|
||
key = token[2:]
|
||
next_token = rest[index + 1] if index + 1 < len(rest) else None
|
||
if next_token is None or next_token.startswith("--"):
|
||
options[key] = True
|
||
index += 1
|
||
continue
|
||
options[key] = next_token
|
||
index += 2
|
||
return command, options
|
||
|
||
|
||
def print_usage() -> None:
|
||
usage = [
|
||
"Usage:",
|
||
" python3 template_tool.py build-index [--out <path>]",
|
||
" python3 template_tool.py search --query <text> [--tone light|dark|colorful] [--formality formal|casual|creative] [--layout-tag <tag>] [--limit 3]",
|
||
" python3 template_tool.py summarize --template <template-id|path> (--range 1-2,5 | --label 封面)",
|
||
" python3 template_tool.py extract --template <template-id|path> (--range 1-2,5 | --label 封面) [--with-summary] [--out <path>]",
|
||
]
|
||
print("\n".join(usage), file=sys.stderr)
|
||
|
||
|
||
def write_json(value: Any) -> None:
|
||
print(json.dumps(value, ensure_ascii=False, indent=2))
|
||
|
||
|
||
def run_cli(argv: list[str] | None = None) -> None:
|
||
command, options = parse_cli_args(argv or sys.argv[1:])
|
||
if not command or command in {"--help", "help"}:
|
||
print_usage()
|
||
raise SystemExit(0)
|
||
|
||
if command == "build-index":
|
||
index_data = build_index_data()
|
||
output_path = Path(options["out"]).resolve() if options.get("out") else DEFAULT_INDEX_PATH
|
||
output_path.write_text(f'{json.dumps(index_data, ensure_ascii=False, indent=2)}\n', encoding="utf-8")
|
||
print(output_path)
|
||
return
|
||
|
||
if command == "search":
|
||
write_json(search_templates(load_index(), options))
|
||
return
|
||
|
||
if command == "summarize":
|
||
write_json(summarize_selection(load_index(), options.get("template"), options))
|
||
return
|
||
|
||
if command == "extract":
|
||
index_data = load_index()
|
||
xml = extract_selection_xml(index_data, options.get("template"), options)
|
||
if options.get("with-summary"):
|
||
summary = summarize_selection(index_data, options.get("template"), options)
|
||
write_json({"xml": xml, "selection": summary["selection"], "summary": summary["summary"], "slides": summary["slides"]})
|
||
return
|
||
if options.get("out"):
|
||
output_path = Path(options["out"]).resolve()
|
||
output_path.write_text(f"{xml}\n", encoding="utf-8")
|
||
print(output_path)
|
||
return
|
||
sys.stdout.write(f"{xml}\n")
|
||
return
|
||
|
||
print_usage()
|
||
fail(f"unknown command: {command}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
try:
|
||
run_cli()
|
||
except TemplateToolError as error:
|
||
print(f"template-tool error: {error}", file=sys.stderr)
|
||
raise SystemExit(1) from error
|