refactor: 使用Word原生编号功能替代手动编号

- 删除WordProcessor中的手动编号逻辑
- 添加_extract_title_text提取纯文本标题
- 修改TocGeneratorAgent生成不含编号的标题
- 让Word通过Heading样式自动管理编号
- 支持Word原生目录功能

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
sladro 2025-09-27 10:39:53 +08:00
parent 2ff96ea544
commit 24880dcb71
2 changed files with 49 additions and 92 deletions

View File

@ -138,7 +138,7 @@ def generate_first_level_node(state: TocGeneratorState) -> TocGeneratorState:
total_score = sum(c.max_score for c in criteria_list)
chapter_id = f"chapter_{chapter_index:02d}_{category}"
category_name = CATEGORY_NAMES.get(category, category)
chapter_title = f"{chapter_index}. {category_name}"
chapter_title = category_name
if total_score > 0:
chapter_title += f" ({total_score}分)"
@ -306,20 +306,12 @@ def finalize_chapters_node(state: TocGeneratorState) -> TocGeneratorState:
if not has_index:
index_chapter = DocumentChapter(
id="evaluation_index",
title="1. 评标索引表(技术评分完全对应)",
title="评标索引表(技术评分完全对应)", # 不包含编号
level=1,
template_placeholder="{{evaluation_index_content}}"
)
final_chapters.insert(0, index_chapter)
# 重新编号
for i, chapter in enumerate(final_chapters[1:], 2):
# 安全处理章节编号
title_parts = chapter.title.split(".")
if len(title_parts) >= 2:
old_number = title_parts[0]
chapter.title = chapter.title.replace(f"{old_number}.", f"{i}.", 1)
state["final_chapters"] = final_chapters
state["current_step"] = "finalize"
state["should_continue"] = False
@ -352,9 +344,9 @@ def _generate_ai_sub_chapters(criteria_list: List[ScoringCriteria],
{chr(10).join(criteria_info)}
生成要求
1. 为每个评分项生成对应的二级标题
2. 重要评分项可添加三级子标题
3. 格式规范符合标书要求
1. 为每个评分项生成对应的子标题名称不要包含编号
2. 重要评分项可添加三级子标题不要包含编号
3. 只返回标题文本编号由Word自动管理
返回JSON格式
{{
@ -381,23 +373,14 @@ def _generate_ai_sub_chapters(criteria_list: List[ScoringCriteria],
sub_chapters_data = result_data.get("sub_chapters", [])
sub_chapters = []
# 安全处理章节号提取
parent_parts = parent_chapter.title.split(".")
parent_number = parent_parts[0] if parent_parts else "1"
for i, sub_data in enumerate(sub_chapters_data, 1):
# 清理标题,移除可能的错误编号
# 直接使用标题文本,不添加编号
title = sub_data.get("title", f"子标题{i}")
if "." in title and title[0].isdigit():
parts = title.split(" ", 1)
if len(parts) > 1:
title = parts[1]
correct_title = f"{parent_number}.{i} {title}"
sub_chapter = DocumentChapter(
id=f"{parent_chapter.id}_sub_{i:02d}",
title=correct_title,
title=title, # 不添加编号
level=sub_data.get("level", 2),
score=sub_data.get("score", 0),
template_placeholder=f"{{{{{parent_chapter.id}_sub_{i:02d}_content}}}}"
@ -406,16 +389,10 @@ def _generate_ai_sub_chapters(criteria_list: List[ScoringCriteria],
# 处理三级标题
for j, child_data in enumerate(sub_data.get("children", []), 1):
child_title = child_data.get("title", f"三级标题{j}")
if "." in child_title and child_title[0].isdigit():
parts = child_title.split(" ", 1)
if len(parts) > 1:
child_title = parts[1]
correct_child_title = f"{parent_number}.{i}.{j} {child_title}"
child_chapter = DocumentChapter(
id=f"{parent_chapter.id}_sub_{i:02d}_{j:02d}",
title=correct_child_title,
title=child_title, # 不添加编号
level=child_data.get("level", 3),
template_placeholder=f"{{{{{parent_chapter.id}_sub_{i:02d}_{j:02d}_content}}}}"
)
@ -438,30 +415,23 @@ def _generate_template_sub_chapters(criteria: ScoringCriteria,
parent_chapter: DocumentChapter,
template_file: Optional[str]) -> List[DocumentChapter]:
"""基于模板生成子标题"""
# 提供默认结构
# 从父章节标题中提取章节号,处理 "2. 技术方案 (10分)" 这种格式
parent_title = parent_chapter.title
if "." in parent_title:
parent_number = parent_title.split(".")[0].strip()
else:
parent_number = "1"
# 提供默认结构(不包含编号)
default_sub_chapters = [
DocumentChapter(
id=f"{parent_chapter.id}_def_01",
title=f"{parent_number}.1 方案概述",
title="方案概述", # 不包含编号
level=2,
template_placeholder=f"{{{{{parent_chapter.id}_def_01_content}}}}"
),
DocumentChapter(
id=f"{parent_chapter.id}_def_02",
title=f"{parent_number}.2 具体实施",
title="具体实施", # 不包含编号
level=2,
template_placeholder=f"{{{{{parent_chapter.id}_def_02_content}}}}"
),
DocumentChapter(
id=f"{parent_chapter.id}_def_03",
title=f"{parent_number}.3 保障措施",
title="保障措施", # 不包含编号
level=2,
template_placeholder=f"{{{{{parent_chapter.id}_def_03_content}}}}"
)

View File

@ -26,28 +26,28 @@ class WordProcessor:
def create_template_from_chapters(self, chapters: List[DocumentChapter], output_path: str, project_name: str = "标书项目") -> bool:
"""根据章节结构创建Word模板"""
try:
# 在生成Word之前重新编号所有章节
self._renumber_chapters_for_word(chapters)
doc = Document()
# 配置多级编号样式
self._setup_numbering_styles(doc)
# 设置文档标题
title = doc.add_heading(project_name, 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 添加目录占位符
doc.add_paragraph()
toc_para = doc.add_paragraph("【此处为目录请在Word中按Ctrl+A全选后按F9更新")
toc_para = doc.add_paragraph("【此处为目录请在Word中插入目录:引用→目录→自动目录")
toc_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_page_break()
# 递归添加章节
# 递归添加章节(不进行手动编号)
self._add_chapters_to_doc(doc, chapters)
# 保存文档
doc.save(output_path)
logger.info(f"Word模板已生成: {output_path}")
logger.info(f"Word模板已生成(使用原生编号): {output_path}")
return True
except Exception as e:
@ -55,15 +55,18 @@ class WordProcessor:
return False
def _add_chapters_to_doc(self, doc: Document, chapters: List[DocumentChapter]):
"""递归添加章节到文档"""
"""递归添加章节到文档(不含编号)"""
for chapter in chapters:
# 添加标题
if chapter.level <= 3:
heading = doc.add_heading(chapter.title, level=chapter.level)
# 提取标题文本(去除现有编号)
title_text = self._extract_title_text(chapter.title)
# 使用Word的标题样式让Word自动编号
if chapter.level <= 9: # Word支持Heading 1-9
heading = doc.add_heading(title_text, level=chapter.level)
else:
# 超过3级的用普通段落加粗
# 超过9级用普通段落加粗
para = doc.add_paragraph()
run = para.add_run(chapter.title)
run = para.add_run(title_text)
run.bold = True
# 为有内容的章节添加占位符
@ -91,44 +94,28 @@ class WordProcessor:
if chapter.level == 1:
doc.add_paragraph() # 添加空行
def _renumber_chapters_for_word(self, chapters: List[DocumentChapter]):
"""为Word生成重新编号所有章节"""
logger.info("开始为Word生成重新编号章节...")
for i, chapter in enumerate(chapters, 1):
# 重新编号一级章节
chapter.title = self._update_chapter_number(chapter.title, str(i))
logger.info(f"一级章节重编号: {chapter.title}")
# 重新编号子章节
for j, sub_chapter in enumerate(chapter.children, 1):
sub_chapter.title = self._update_chapter_number(sub_chapter.title, f"{i}.{j}")
logger.info(f" 子章节重编号: {sub_chapter.title}")
# 重新编号三级章节
for k, child_chapter in enumerate(sub_chapter.children, 1):
child_chapter.title = self._update_chapter_number(child_chapter.title, f"{i}.{j}.{k}")
logger.info(f" 三级章节重编号: {child_chapter.title}")
logger.info("章节重编号完成")
def _update_chapter_number(self, title: str, new_number: str) -> str:
"""更新章节标题中的编号,保持标题内容不变"""
def _extract_title_text(self, title: str) -> str:
"""提取标题文本,去除编号"""
import re
# 匹配并移除开头的编号(如 "1. ", "1.1 ", "1.1.1 "
pattern = r'^(\d+(?:\.\d+)*\.?\s+)'
return re.sub(pattern, '', title.strip())
# 使用正则表达式匹配各种编号格式
# 匹配: 1. | 1.1 | 1.1.1 | 1.1.1. 等开头的编号格式
number_pattern = r'^(\d+(?:\.\d+)*\.?)\s*(.*)$'
match = re.match(number_pattern, title.strip())
def _setup_numbering_styles(self, doc: Document):
"""配置标题的多级编号样式"""
# 确保标题样式存在并配置编号格式
styles = doc.styles
if match:
# 提取原有内容(去掉编号)
content = match.group(2).strip()
new_title = f"{new_number} {content}"
logger.info(f"编号转换: '{title}''{new_title}'")
return new_title
# 没有找到编号格式,直接添加编号
new_title = f"{new_number} {title.strip()}"
logger.info(f"编号添加: '{title}''{new_title}'")
return new_title
# 为Heading 1-3配置编号格式提示
# 注意python-docx不直接支持编号格式设置
# 但通过使用标题样式Word会自动应用其默认的多级列表
for i in range(1, 4):
style_name = f'Heading {i}'
try:
if style_name in styles:
style = styles[style_name]
# 确保样式不被隐藏
style.hidden = False
style.quick_style = True
except Exception as e:
logger.debug(f"配置样式 {style_name} 时出错: {e}")