feat: 实现Word文档标题格式自动调整功能

- 新增WordFormatter工具,支持多种编号格式(中文/阿拉伯/罗马/字母)
- 支持用户自定义格式输入(如"一. 一.1 一.1.1")
- 支持从参考Word文档提取格式
- 在project new命令中集成格式调整交互
- 新增UserFeedbackNode支持目录优化循环
- 优化TocAgent工作流,支持用户反馈优化

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
sladro 2025-09-30 15:39:39 +08:00
parent 922c43e65b
commit 5d16d8badc
7 changed files with 812 additions and 156 deletions

View File

@ -5,8 +5,8 @@
"technical_count": 6,
"commercial_count": 3,
"deviation_count": 0,
"chapter_count": 8,
"execution_time": 259.9994738101959,
"chapter_count": 7,
"execution_time": 415.01412439346313,
"warnings": [
"AI审查: 5条优化建议"
],
@ -58,79 +58,29 @@
"deviation_items": [],
"chapters": [
{
"id": "evaluation_index",
"id": "chapter_1",
"title": "评标索引表(技术评分完全对应)",
"level": 1,
"score": null,
"template_placeholder": "{{evaluation_index_content}}",
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_1_after_sales_service",
"title": "售后服务",
"level": 1,
"score": 3.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_warranty_service",
"title": "质保期服务承诺",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_warranty_scope",
"title": "质保期限及范围",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_2_service_response",
"title": "质保期内服务响应机制",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_3_fault_handling",
"title": "质保期故障处理流程",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_4_service_guarantee",
"title": "质保期服务保障措施",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
}
]
},
{
"id": "chapter_1_equipment_specification",
"title": "设备规格",
"id": "chapter_2",
"title": "设备规格 (8.0分)",
"level": 1,
"score": 8.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_hardware_config",
"id": "chapter_2_1",
"title": "硬件配置方案",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_core_hardware",
"id": "chapter_2_1_1",
"title": "核心硬件技术参数",
"level": 3,
"score": 0.0,
@ -138,7 +88,7 @@
"children": []
},
{
"id": "chapter_3_2_performance_indicators",
"id": "chapter_2_1_2",
"title": "设备性能指标说明",
"level": 3,
"score": 0.0,
@ -146,16 +96,16 @@
"children": []
},
{
"id": "chapter_3_3_hardware_compatibility",
"title": "硬件兼容性与扩展性",
"id": "chapter_2_1_3",
"title": "硬件兼容性分析",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_4_config_optimization",
"title": "硬件配置优化方案",
"id": "chapter_2_1_4",
"title": "配置方案优势阐述",
"level": 3,
"score": 0.0,
"template_placeholder": null,
@ -166,71 +116,62 @@
]
},
{
"id": "chapter_1_software_design",
"title": "软件功能设计",
"id": "chapter_3",
"title": "技术方案 (13.0分)",
"level": 1,
"score": 0.0,
"score": 13.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_software_functions",
"id": "chapter_3_1",
"title": "软件功能设计",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_core_modules",
"title": "核心功能模块",
"id": "chapter_3_1_1",
"title": "核心功能模块设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_2_user_interaction",
"title": "用户交互功能",
"id": "chapter_3_1_2",
"title": "用户交互界面设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_3_data_management",
"title": "数据管理功能",
"id": "chapter_3_1_3",
"title": "数据处理与分析功能",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_4_system_management",
"title": "系统管理功能",
"id": "chapter_3_1_4",
"title": "系统集成与接口设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
}
]
},
{
"id": "chapter_1_system_architecture",
"title": "系统架构设计",
"level": 1,
"score": 0.0,
"template_placeholder": null,
"children": [
},
{
"id": "chapter_2_1_architecture_design",
"id": "chapter_3_2",
"title": "系统架构设计",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_overall_architecture",
"id": "chapter_3_2_1",
"title": "总体架构设计",
"level": 3,
"score": 0.0,
@ -238,16 +179,16 @@
"children": []
},
{
"id": "chapter_3_2_technology_selection",
"title": "技术架构选型",
"id": "chapter_3_2_2",
"title": "技术选型与框架设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_3_deployment_architecture",
"title": "系统部署架构",
"id": "chapter_3_2_3",
"title": "性能与扩展性设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
@ -258,87 +199,54 @@
]
},
{
"id": "chapter_1_project_implementation",
"title": "项目实施方案",
"level": 1,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_project_plan",
"title": "项目计划",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_2_2_implementation_team",
"title": "实施团队",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_2_3_schedule_arrangement",
"title": "进度安排",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
},
{
"id": "chapter_1_quality_security",
"title": "质量安全",
"id": "chapter_4",
"title": "质量安全 (5.0分)",
"level": 1,
"score": 5.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_network_security",
"id": "chapter_4_1",
"title": "网络安全防护体系",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_border_security",
"title": "网络边界安全防护",
"id": "chapter_4_1_1",
"title": "网络安全架构设计",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_2_intrusion_detection",
"title": "入侵检测与防御系统",
"id": "chapter_4_1_2",
"title": "网络安全防护措施",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_3_security_audit",
"title": "网络安全审计与监控",
"id": "chapter_4_1_3",
"title": "网络安全监控与预警",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_4_access_control",
"title": "网络访问控制策略",
"id": "chapter_4_1_4",
"title": "网络安全应急响应",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_5_security_emergency",
"title": "网络安全应急预案",
"id": "chapter_4_1_5",
"title": "网络安全管理制度",
"level": 3,
"score": 0.0,
"template_placeholder": null,
@ -349,21 +257,21 @@
]
},
{
"id": "chapter_1_compliance_response",
"title": "合规响应",
"id": "chapter_5",
"title": "合规响应 (3.0分)",
"level": 1,
"score": 3.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_2_1_technical_strength",
"id": "chapter_5_1",
"title": "技术实力",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_3_1_core_team",
"id": "chapter_5_1_1",
"title": "核心技术团队介绍",
"level": 3,
"score": 0.0,
@ -371,24 +279,214 @@
"children": []
},
{
"id": "chapter_3_2_technical_certification",
"title": "技术资质与认证",
"id": "chapter_5_1_2",
"title": "技术研发能力展示",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_3_rd_capability",
"title": "技术研发能力",
"id": "chapter_5_1_3",
"title": "技术设备与工具配置",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_3_4_implementation_experience",
"title": "技术实施经验",
"id": "chapter_5_1_4",
"title": "技术成果与专利情况",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_5_1_5",
"title": "技术质量管理体系",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
}
]
},
{
"id": "chapter_6",
"title": "培训服务",
"level": 1,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_6_1",
"title": "培训计划",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_6_2",
"title": "培训内容",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_6_3",
"title": "培训方式",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
},
{
"id": "chapter_7",
"title": "售后服务 (3.0分)",
"level": 1,
"score": 3.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_7_1",
"title": "质保期服务承诺",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_7_1_1",
"title": "质保期限与范围",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_1_2",
"title": "质保期内服务内容",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_1_3",
"title": "质保期响应机制",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_1_4",
"title": "质保期满后服务延续方案",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
},
{
"id": "chapter_7_2",
"title": "技术支持服务",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_7_2_1",
"title": "技术支持团队配置",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_2_2",
"title": "技术支持响应时间",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_2_3",
"title": "技术支持服务方式",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
},
{
"id": "chapter_7_3",
"title": "维护服务",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_7_3_1",
"title": "定期维护计划",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_3_2",
"title": "故障处理流程",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_3_3",
"title": "备品备件保障",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
}
]
},
{
"id": "chapter_7_4",
"title": "客户服务保障",
"level": 2,
"score": 0.0,
"template_placeholder": null,
"children": [
{
"id": "chapter_7_4_1",
"title": "客户服务热线",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_4_2",
"title": "客户满意度调查",
"level": 3,
"score": 0.0,
"template_placeholder": null,
"children": []
},
{
"id": "chapter_7_4_3",
"title": "客户投诉处理机制",
"level": 3,
"score": 0.0,
"template_placeholder": null,

View File

@ -18,6 +18,8 @@ from ...nodes.toc import (
AdjustChaptersNode,
FinalizeChaptersNode
)
from ...nodes.toc.user_feedback import UserFeedbackNode
from ...nodes.toc.optimize_with_feedback import OptimizeWithFeedbackNode
from ...nodes.toc.base_mixins import WorkflowUtilsMixin
logger = logging.getLogger(__name__)
@ -49,7 +51,9 @@ class TocAgentBuilder(AgentBuilder):
.add_node(ReviewStructureNode()) \
.add_node(ApplyReviewSuggestionsNode()) \
.add_node(AdjustChaptersNode()) \
.add_node(FinalizeChaptersNode())
.add_node(FinalizeChaptersNode()) \
.add_node(UserFeedbackNode()) \
.add_node(OptimizeWithFeedbackNode())
# 设置入口点
builder.set_entry("group_criteria")
@ -99,8 +103,28 @@ class TocAgentBuilder(AgentBuilder):
{"continue": "finalize_chapters", "end": "END"}
)
# 最后一个节点直接结束
self.add_edge("finalize_chapters", "END")
# finalize_chapters连接到user_feedback
self.add_conditional_edge(
"finalize_chapters",
should_continue,
{"continue": "user_feedback", "end": "END"}
)
# user_feedback的条件边
def check_needs_optimization(state: Dict[str, Any]) -> str:
"""检查是否需要根据用户反馈进行优化"""
if state.get("needs_optimization", False):
return "optimize"
return "end"
self.add_conditional_edge(
"user_feedback",
check_needs_optimization,
{"optimize": "optimize_with_feedback", "end": "END"}
)
# 优化后循环回finalize_chapters
self.add_edge("optimize_with_feedback", "finalize_chapters")
class TocAgent(BaseAgent, BaseAgentFactory):

View File

@ -282,9 +282,54 @@ def new(mode: str, presets: str):
if not success:
console.print("⚠️ Word模板生成失败但项目已创建", style="yellow")
else:
# Word生成成功后询问是否调整标题格式
if click.confirm("\n📝 是否需要调整Word文档的标题编号格式", default=False):
from ..tools.word_formatter import WordFormatter
console.print("\n请选择格式来源:", style="blue")
console.print(" 1. 默认格式 (1. / 1.1 / 1.1.1)")
console.print(" 2. 自定义输入格式")
console.print(" 3. 从参考Word文件提取")
format_choice = click.prompt("请选择", type=click.IntRange(1, 3), default=1)
format_dict = None
if format_choice == 1:
format_dict = WordFormatter.get_default_format()
console.print("✓ 使用默认格式", style="green")
elif format_choice == 2:
format_input = click.prompt(
"请输入格式规则例如1. 章 1.1 节 1.1.1 小节)",
default="1. 1.1 1.1.1"
)
format_dict = WordFormatter.parse_user_format(format_input)
console.print(f"✓ 解析格式: {format_dict}", style="green")
elif format_choice == 3:
reference_file = click.prompt("请输入参考Word文档路径", type=str)
if Path(reference_file).exists():
format_dict = WordFormatter.extract_format_from_word(reference_file)
console.print(f"✓ 提取格式: {format_dict}", style="green")
else:
console.print("⚠️ 文件不存在,使用默认格式", style="yellow")
format_dict = WordFormatter.get_default_format()
# 应用格式
with console.status("正在调整标题格式..."):
format_success = WordFormatter.apply_format_to_document(
str(template_file),
format_dict
)
if format_success:
console.print("✅ 标题格式调整成功!", style="green")
else:
console.print("⚠️ 格式调整失败", style="yellow")
# 显示创建结果
console.print("✅ 项目创建成功!", style="green")
console.print("\n✅ 项目创建成功!", style="green")
# 显示统计信息
console.print(f"\n💰 识别到商务评分项: {result.commercial_count}项(已排除)")

View File

@ -11,6 +11,8 @@ from .review_structure import ReviewStructureNode
from .apply_suggestions import ApplyReviewSuggestionsNode
from .adjust_chapters import AdjustChaptersNode
from .finalize_chapters import FinalizeChaptersNode
from .user_feedback import UserFeedbackNode
from .optimize_with_feedback import OptimizeWithFeedbackNode
# 辅助组件
from .factories import ChapterFactory
@ -32,6 +34,8 @@ __all__ = [
"ApplyReviewSuggestionsNode",
"AdjustChaptersNode",
"FinalizeChaptersNode",
"UserFeedbackNode",
"OptimizeWithFeedbackNode",
# 辅助组件
"ChapterFactory",

View File

@ -0,0 +1,138 @@
"""用户反馈收集节点
在目录生成完成后收集用户对目录结构的满意度和修改意见
"""
import logging
from typing import Dict, List, Any
from rich.console import Console
from ..base import BaseNode, NodeContext
from ...tools.parser import DocumentChapter
from .base_mixins import TocNodeBase
logger = logging.getLogger(__name__)
class UserFeedbackNode(BaseNode, TocNodeBase):
"""收集用户对目录的反馈"""
@property
def name(self) -> str:
return "user_feedback"
@property
def description(self) -> str:
return "收集用户对目录的反馈"
def execute(self, state: Dict[str, Any], context: NodeContext) -> Dict[str, Any]:
"""执行用户反馈收集"""
return self.safe_execute(self._do_user_feedback, state, "收集用户对目录的反馈")
def _do_user_feedback(self, state: Dict[str, Any]) -> Dict[str, Any]:
"""执行实际的用户反馈收集逻辑"""
# 验证必需字段
if not self.validate_required_fields(state, ["final_chapters"]):
raise ValueError("缺少最终章节数据")
final_chapters = state.get("final_chapters", [])
# 获取交互处理器
interaction_handler = state.get("interaction_handler")
if not interaction_handler:
# 无交互处理器时直接结束(静默模式或程序化模式)
logger.info("无交互处理器,跳过用户反馈收集")
return self._update_state(state,
should_continue=False,
user_feedback="",
needs_optimization=False)
# 创建Console实例展示目录结构
console = Console()
# 展示目录结构
console.print("\n" + "="*60, style="dim")
console.print("📚 生成的标书目录结构:", style="bold blue")
console.print("="*60, style="dim")
# 格式化并显示目录
toc_display = self._format_chapters_for_display(final_chapters)
console.print(toc_display)
console.print("="*60 + "\n", style="dim")
# 询问用户是否满意
satisfied = interaction_handler(
interaction_type="confirm",
prompt="您对上述生成的目录结构满意吗?",
default=True,
key="user_satisfied_with_toc"
)
if satisfied:
logger.info("用户对目录结构满意,结束工作流")
return self._update_state(state,
should_continue=False,
user_feedback="",
needs_optimization=False)
# 收集用户意见
feedback = interaction_handler(
interaction_type="text",
prompt="请输入您的修改意见(例如:调整章节顺序、增加缺失内容、优化章节标题等)",
key="user_toc_feedback"
)
if not feedback or feedback.strip() == "":
# 用户没有输入意见,默认满意
logger.info("用户未提供具体意见,默认满意")
return self._update_state(state,
should_continue=False,
user_feedback="",
needs_optimization=False)
logger.info(f"收集到用户反馈: {feedback[:50]}...")
return self._update_state(state,
should_continue=True,
user_feedback=feedback,
needs_optimization=True)
def _format_chapters_for_display(self, chapters: List[DocumentChapter], indent_level: int = 0) -> str:
"""格式化章节用于显示
Args:
chapters: 章节列表
indent_level: 缩进级别
Returns:
格式化的章节字符串
"""
lines = []
for i, chapter in enumerate(chapters, 1):
indent = " " * indent_level
# 根据层级使用不同的标记
if indent_level == 0:
marker = f"{i}."
elif indent_level == 1:
marker = f"{i})"
else:
marker = "-"
# 添加分值信息
score_text = f" ({chapter.score:.1f}分)" if chapter.score and chapter.score > 0 else ""
lines.append(f"{indent}{marker} {chapter.title}{score_text}")
# 递归处理子章节
if chapter.children:
child_text = self._format_chapters_for_display(
chapter.children,
indent_level + 1
)
lines.append(child_text)
return "\n".join(lines)

View File

@ -55,12 +55,17 @@ class WordProcessor:
return False
def _add_chapters_to_doc(self, doc: Document, chapters: List[DocumentChapter]):
"""递归添加章节到文档(不含编号)"""
"""递归添加章节到文档"""
for chapter in chapters:
# 提取标题文本(去除现有编号)
title_text = self._extract_title_text(chapter.title)
# 检查标题是否已包含编号(如 "1." "1.1" 开头)
title_text = chapter.title
has_numbering = self._check_has_numbering(title_text)
# 使用Word的标题样式让Word自动编号
# 如果没有编号,尝试提取纯标题文本
if not has_numbering:
title_text = self._extract_title_text(title_text)
# 使用Word的标题样式
if chapter.level <= 9: # Word支持Heading 1-9
heading = doc.add_heading(title_text, level=chapter.level)
else:
@ -94,6 +99,28 @@ class WordProcessor:
if chapter.level == 1:
doc.add_paragraph() # 添加空行
def _check_has_numbering(self, title: str) -> bool:
"""检查标题是否包含编号
Args:
title: 标题文本
Returns:
True 如果标题以编号开头
"""
import re
# 匹配各种编号格式
patterns = [
r'^(\d+(?:\.\d+)*\.?\s+)', # 1. / 1.1 / 1.1.1
r'^([一二三四五六七八九十]+、\s+)', # 中文数字
r'^(\([0-9]+\)\s+)', # (1)
r'^([A-Z]\.\s+)', # A.
]
for pattern in patterns:
if re.match(pattern, title):
return True
return False
def _extract_title_text(self, title: str) -> str:
"""提取标题文本,去除编号"""
import re

View File

@ -0,0 +1,320 @@
"""Word文档标题格式调整工具
读取已生成的Word文档根据用户输入或参考文件调整标题编号格式
"""
import logging
import re
from pathlib import Path
from typing import Dict, Optional
from enum import Enum
from docx import Document
logger = logging.getLogger(__name__)
class NumberingStyle(str, Enum):
"""编号样式枚举"""
ARABIC = "arabic" # 1, 2, 3
CHINESE = "chinese" # 一, 二, 三
ROMAN_UPPER = "roman_upper" # I, II, III
ROMAN_LOWER = "roman_lower" # i, ii, iii
LETTER_UPPER = "letter_upper" # A, B, C
LETTER_LOWER = "letter_lower" # a, b, c
# 中文数字映射
CHINESE_NUMBERS = {
'': 1, '': 2, '': 3, '': 4, '': 5,
'': 6, '': 7, '': 8, '': 9, '': 10
}
class WordFormatter:
"""Word文档格式调整器"""
@staticmethod
def parse_user_format(format_str: str) -> Dict[int, Dict[str, any]]:
"""解析用户输入的格式字符串
Args:
format_str: "一. 一.1 一.1.1" "1. 1.1 1.1.1"
Returns:
格式字典 {
1: {"style": "chinese", "suffix": ".", "pattern": "一."},
2: {"style": "arabic", "suffix": "", "pattern": "一.1"},
3: {"style": "arabic", "suffix": "", "pattern": "一.1.1"}
}
"""
format_dict = {}
# 匹配各种格式的编号
patterns = [
(r'([一二三四五六七八九十]+)([\.、]?)', NumberingStyle.CHINESE), # 一. 或 一、
(r'([IVXLCDM]+)([\.、]?)', NumberingStyle.ROMAN_UPPER), # I. 或 I、
(r'([ivxlcdm]+)([\.、]?)', NumberingStyle.ROMAN_LOWER), # i. 或 i、
(r'([A-Z])([\.、]?)', NumberingStyle.LETTER_UPPER), # A. 或 A、
(r'([a-z])([\.、]?)', NumberingStyle.LETTER_LOWER), # a. 或 a、
(r'(\d+(?:\.\d+)*)([\.、]?)', NumberingStyle.ARABIC), # 1. 或 1.1.1
]
# 按空格分割输入
parts = format_str.split()
for level, part in enumerate(parts, 1):
matched = False
for pattern, style in patterns:
match = re.match(f'^{pattern}$', part.strip())
if match:
number_part = match.group(1)
suffix = match.group(2) if match.lastindex >= 2 else ""
format_dict[level] = {
"style": style,
"suffix": suffix,
"pattern": part.strip()
}
matched = True
logger.info(f"识别第{level}级: {part} -> 样式:{style}, 后缀:{suffix}")
break
if not matched:
logger.warning(f"无法识别格式: {part}")
if not format_dict:
logger.warning(f"未识别到任何格式,使用默认")
return WordFormatter.get_default_format()
return format_dict
@staticmethod
def get_default_format() -> Dict[int, Dict[str, any]]:
"""默认的多级编号格式"""
return {
1: {"style": NumberingStyle.ARABIC, "suffix": ".", "pattern": "1."},
2: {"style": NumberingStyle.ARABIC, "suffix": "", "pattern": "1.1"},
3: {"style": NumberingStyle.ARABIC, "suffix": "", "pattern": "1.1.1"}
}
@staticmethod
def apply_format_to_document(
doc_path: str,
format_dict: Dict[int, Dict[str, any]],
output_path: Optional[str] = None
) -> bool:
"""应用格式到Word文档
Args:
doc_path: Word文档路径
format_dict: 格式字典
output_path: 输出路径None则覆盖原文件
Returns:
是否成功
"""
try:
doc = Document(doc_path)
# 为每个层级的标题添加编号
counters = {} # {level: current_number}
for para in doc.paragraphs:
if para.style.name.startswith('Heading'):
level = int(para.style.name.split()[-1])
# 更新计数器
counters[level] = counters.get(level, 0) + 1
# 重置更低级别的计数器
for lower_level in range(level + 1, 10):
if lower_level in counters:
counters[lower_level] = 0
# 获取格式配置
format_config = format_dict.get(level, format_dict.get(1))
# 构建编号
numbering = WordFormatter._build_numbering(
level,
counters,
format_config
)
# 移除旧编号并添加新编号
clean_text = WordFormatter._remove_numbering(para.text)
# 重要直接设置文本python-docx的限制
para.text = f"{numbering} {clean_text}"
logger.debug(f"设置标题: {para.text}")
# 保存文档
save_path = output_path or doc_path
doc.save(save_path)
logger.info(f"格式应用成功: {save_path}")
return True
except Exception as e:
logger.error(f"应用格式失败: {e}", exc_info=True)
return False
@staticmethod
def _build_numbering(
level: int,
counters: Dict[int, int],
format_config: Dict[str, any]
) -> str:
"""构建编号字符串
Args:
level: 当前层级
counters: 各层级计数器
format_config: 格式配置
Returns:
编号字符串 "一." "1.1.1"
"""
style = format_config.get("style", NumberingStyle.ARABIC)
suffix = format_config.get("suffix", ".")
# 收集当前层级及以上的编号
numbers = []
for lv in range(1, level + 1):
num = counters.get(lv, 1)
# 根据第一级的样式来决定各级的显示方式
if lv == 1:
# 第一级使用指定的样式
numbers.append(WordFormatter._format_number(num, style))
else:
# 其他层级使用阿拉伯数字
numbers.append(str(num))
# 组合编号
if level == 1:
# 一级标题:样式 + 后缀
return f"{numbers[0]}{suffix}"
else:
# 多级标题:一级样式.数字.数字
return '.'.join(numbers)
@staticmethod
def _format_number(num: int, style: NumberingStyle) -> str:
"""将数字转换为指定样式
Args:
num: 数字
style: 样式
Returns:
格式化后的字符串
"""
if style == NumberingStyle.CHINESE:
# 中文数字
chinese_map = ['', '', '', '', '', '', '', '', '', '', '']
if num <= 10:
return chinese_map[num]
else:
# 超过10简化处理
return f"{num}"
elif style == NumberingStyle.ROMAN_UPPER:
return WordFormatter._int_to_roman(num).upper()
elif style == NumberingStyle.ROMAN_LOWER:
return WordFormatter._int_to_roman(num).lower()
elif style == NumberingStyle.LETTER_UPPER:
if num <= 26:
return chr(64 + num) # A=65
else:
return f"({num})"
elif style == NumberingStyle.LETTER_LOWER:
if num <= 26:
return chr(96 + num) # a=97
else:
return f"({num})"
else: # ARABIC
return str(num)
@staticmethod
def _int_to_roman(num: int) -> str:
"""整数转罗马数字"""
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
syms = ['M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I']
roman_num = ''
for i in range(len(val)):
count = int(num / val[i])
roman_num += syms[i] * count
num -= val[i] * count
return roman_num
@staticmethod
def _remove_numbering(text: str) -> str:
"""移除标题中的编号"""
patterns = [
r'^(\d+(?:\.\d+)*\.?\s+)', # 1. / 1.1 / 1.1.1
r'^([一二三四五六七八九十]+[\.、]\s+)', # 一. / 一、
r'^([IVXLCDM]+[\.、]\s+)', # I. / I、
r'^([ivxlcdm]+[\.、]\s+)', # i. / i、
r'^([A-Z][\.、]\s+)', # A. / A、
r'^([a-z][\.、]\s+)', # a. / a、
r'^(\([0-9]+\)\s+)', # (1)
]
for pattern in patterns:
text = re.sub(pattern, '', text, flags=re.IGNORECASE)
return text.strip()
@staticmethod
def extract_format_from_word(reference_file: str) -> Dict[int, Dict[str, any]]:
"""从参考Word文档提取标题编号格式
Args:
reference_file: 参考Word文档路径
Returns:
格式字典
"""
try:
doc = Document(reference_file)
format_patterns = {}
for para in doc.paragraphs:
if para.style.name.startswith('Heading'):
level = int(para.style.name.split()[-1])
text = para.text.strip()
# 提取编号部分
for pattern_re, style in [
(r'^([一二三四五六七八九十]+)([\.、])', NumberingStyle.CHINESE),
(r'^([IVXLCDM]+)([\.、])', NumberingStyle.ROMAN_UPPER),
(r'^(\d+(?:\.\d+)*)([\.、]?)', NumberingStyle.ARABIC),
]:
match = re.match(pattern_re, text)
if match and level not in format_patterns:
number_part = match.group(1)
suffix = match.group(2) if match.lastindex >= 2 else ""
format_patterns[level] = {
"style": style,
"suffix": suffix,
"pattern": f"{number_part}{suffix}"
}
logger.info(f"从参考文档提取: 级别{level} = {number_part}{suffix}")
break
if not format_patterns:
logger.warning(f"未从 {reference_file} 提取到格式")
return WordFormatter.get_default_format()
return format_patterns
except Exception as e:
logger.error(f"提取Word格式失败: {e}")
return WordFormatter.get_default_format()