fix: 修复章节编号错乱问题并优化子标题生成

## 主要修复 - 修复子标题编号与父章节编号不匹配的问题 - 从Agent层移除编号处理逻辑，改为在Word生成时动态计算编号 - 优化编号识别算法，支持多级编号格式（1.、1.1、1.1.1等） ## 技术改进 - 在WordProcessor中实现动态章节编号功能 - 使用正则表达式准确提取编号和内容 - 简化Agent工作流，专注于内容生成而非格式化 - 添加original_index字段到ScoringCriteria模型 ## 工作流优化 - 调整LangGraph节点执行顺序：生成章节→映射→生成子标题→AI审核→最终确定 - 章节顺序严格遵循招标文件评分表原始顺序 - 职责分离：Agent专注内容，Word处理器专注格式 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-26 09:52:38 +08:00 · 2025-09-26 09:52:38 +08:00 · a01fa47a00
commit a01fa47a00
parent 43f56eeffa
3 changed files with 1224 additions and 5 deletions
--- a/src/bidmaster/agents/analysis.py
+++ b/src/bidmaster/agents/analysis.py
--- a/src/bidmaster/tools/parser.py
+++ b/src/bidmaster/tools/parser.py
@ -40,6 +40,7 @@ class ScoringCriteria(BaseModel):
    description: str = Field(default="", description="评分要求描述")
    category: TechnicalCategory = Field(default=TechnicalCategory.OTHER, description="技术类别")
    chapter_id: str = Field(..., description="对应章节ID")
+    original_index: int = Field(default=0, description="在评分表中的原始出现顺序")


 class DeviationItem(BaseModel):
@ -142,6 +143,9 @@ class BidStructure(BaseModel):
    deviation_file: str = Field(default="", description="偏离表文件路径")
    template_file: str = Field(default="", description="模板文件路径")

+    # AI审查结果
+    structure_review: dict[str, Any] = Field(default_factory=dict, description="AI结构审查结果")
+

 class BidParser:
    """招标文件解析器"""
@ -264,9 +268,17 @@ class BidParser:
            if pd.isna(row[columns['item_name']]):
                continue

+            # 安全地处理max_score，防止NaN或None值
+            max_score_raw = row[columns['max_score']]
+            try:
+                max_score = float(max_score_raw) if pd.notna(max_score_raw) else 0.0
+            except (ValueError, TypeError):
+                max_score = 0.0
+                logger.warning(f"无法解析评分项'{row[columns['item_name']]}'的分值: {max_score_raw}")
+
            criterion = ScoringCriteria(
                item_name=str(row[columns['item_name']]).strip(),
-                max_score=float(row[columns['max_score']]),
+                max_score=max_score,
                description=str(row.get(columns.get('description', ''), '')).strip(),
                category=TechnicalCategory.OTHER,  # 表格解析默认为OTHER，AI会重新分类
                chapter_id=f"chapter_{i+1:02d}"
@ -402,12 +414,21 @@ class BidParser:
                    except ValueError:
                        category_enum = TechnicalCategory.OTHER

+                    # 安全地处理max_score，防止None值
+                    max_score_raw = item.get("max_score", 0)
+                    try:
+                        max_score = float(max_score_raw) if max_score_raw is not None else 0.0
+                    except (ValueError, TypeError):
+                        max_score = 0.0
+                        logger.warning(f"无法解析评分项'{item.get('item_name', '')}'的分值: {max_score_raw}")
+
                    criterion = ScoringCriteria(
                        item_name=item.get("item_name", ""),
-                        max_score=float(item.get("max_score", 0)),
+                        max_score=max_score,
                        description=item.get("description", ""),
                        category=category_enum,
-                        chapter_id=f"chapter_{i+1:02d}"
+                        chapter_id=f"chapter_{i+1:02d}",
+                        original_index=i  # 保存在评分表中的原始出现顺序
                    )
                    criteria.append(criterion)

--- a/src/bidmaster/tools/word.py
+++ b/src/bidmaster/tools/word.py
@ -26,6 +26,9 @@ class WordProcessor:
    def create_template_from_chapters(self, chapters: List[DocumentChapter], output_path: str, project_name: str = "标书项目") -> bool:
        """根据章节结构创建Word模板"""
        try:
+            # 在生成Word之前，重新编号所有章节
+            self._renumber_chapters_for_word(chapters)
+
            doc = Document()

            # 设置文档标题
@ -86,4 +89,46 @@ class WordProcessor:

            # 一级章节后添加适当间距
            if chapter.level == 1:
-                doc.add_paragraph()  # 添加空行
+                doc.add_paragraph()  # 添加空行
+
+    def _renumber_chapters_for_word(self, chapters: List[DocumentChapter]):
+        """为Word生成重新编号所有章节"""
+        logger.info("开始为Word生成重新编号章节...")
+
+        for i, chapter in enumerate(chapters, 1):
+            # 重新编号一级章节
+            chapter.title = self._update_chapter_number(chapter.title, str(i))
+            logger.info(f"一级章节重编号: {chapter.title}")
+
+            # 重新编号子章节
+            for j, sub_chapter in enumerate(chapter.children, 1):
+                sub_chapter.title = self._update_chapter_number(sub_chapter.title, f"{i}.{j}")
+                logger.info(f"  子章节重编号: {sub_chapter.title}")
+
+                # 重新编号三级章节
+                for k, child_chapter in enumerate(sub_chapter.children, 1):
+                    child_chapter.title = self._update_chapter_number(child_chapter.title, f"{i}.{j}.{k}")
+                    logger.info(f"    三级章节重编号: {child_chapter.title}")
+
+        logger.info("章节重编号完成")
+
+    def _update_chapter_number(self, title: str, new_number: str) -> str:
+        """更新章节标题中的编号，保持标题内容不变"""
+        import re
+
+        # 使用正则表达式匹配各种编号格式
+        # 匹配: 1. | 1.1 | 1.1.1 | 1.1.1. 等开头的编号格式
+        number_pattern = r'^(\d+(?:\.\d+)*\.?)\s*(.*)$'
+        match = re.match(number_pattern, title.strip())
+
+        if match:
+            # 提取原有内容（去掉编号）
+            content = match.group(2).strip()
+            new_title = f"{new_number} {content}"
+            logger.info(f"编号转换: '{title}' → '{new_title}'")
+            return new_title
+
+        # 没有找到编号格式，直接添加编号
+        new_title = f"{new_number} {title.strip()}"
+        logger.info(f"编号添加: '{title}' → '{new_title}'")
+        return new_title