fix: 修复章节编号错乱问题并优化子标题生成

## 主要修复
- 修复子标题编号与父章节编号不匹配的问题
- 从Agent层移除编号处理逻辑,改为在Word生成时动态计算编号
- 优化编号识别算法,支持多级编号格式(1.、1.1、1.1.1等)

## 技术改进
- 在WordProcessor中实现动态章节编号功能
- 使用正则表达式准确提取编号和内容
- 简化Agent工作流,专注于内容生成而非格式化
- 添加original_index字段到ScoringCriteria模型

## 工作流优化
- 调整LangGraph节点执行顺序:生成章节→映射→生成子标题→AI审核→最终确定
- 章节顺序严格遵循招标文件评分表原始顺序
- 职责分离:Agent专注内容,Word处理器专注格式

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
sladro 2025-09-26 09:52:38 +08:00
parent 43f56eeffa
commit a01fa47a00
3 changed files with 1224 additions and 5 deletions

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,7 @@ class ScoringCriteria(BaseModel):
description: str = Field(default="", description="评分要求描述")
category: TechnicalCategory = Field(default=TechnicalCategory.OTHER, description="技术类别")
chapter_id: str = Field(..., description="对应章节ID")
original_index: int = Field(default=0, description="在评分表中的原始出现顺序")
class DeviationItem(BaseModel):
@ -142,6 +143,9 @@ class BidStructure(BaseModel):
deviation_file: str = Field(default="", description="偏离表文件路径")
template_file: str = Field(default="", description="模板文件路径")
# AI审查结果
structure_review: dict[str, Any] = Field(default_factory=dict, description="AI结构审查结果")
class BidParser:
"""招标文件解析器"""
@ -264,9 +268,17 @@ class BidParser:
if pd.isna(row[columns['item_name']]):
continue
# 安全地处理max_score防止NaN或None值
max_score_raw = row[columns['max_score']]
try:
max_score = float(max_score_raw) if pd.notna(max_score_raw) else 0.0
except (ValueError, TypeError):
max_score = 0.0
logger.warning(f"无法解析评分项'{row[columns['item_name']]}'的分值: {max_score_raw}")
criterion = ScoringCriteria(
item_name=str(row[columns['item_name']]).strip(),
max_score=float(row[columns['max_score']]),
max_score=max_score,
description=str(row.get(columns.get('description', ''), '')).strip(),
category=TechnicalCategory.OTHER, # 表格解析默认为OTHERAI会重新分类
chapter_id=f"chapter_{i+1:02d}"
@ -402,12 +414,21 @@ class BidParser:
except ValueError:
category_enum = TechnicalCategory.OTHER
# 安全地处理max_score防止None值
max_score_raw = item.get("max_score", 0)
try:
max_score = float(max_score_raw) if max_score_raw is not None else 0.0
except (ValueError, TypeError):
max_score = 0.0
logger.warning(f"无法解析评分项'{item.get('item_name', '')}'的分值: {max_score_raw}")
criterion = ScoringCriteria(
item_name=item.get("item_name", ""),
max_score=float(item.get("max_score", 0)),
max_score=max_score,
description=item.get("description", ""),
category=category_enum,
chapter_id=f"chapter_{i+1:02d}"
chapter_id=f"chapter_{i+1:02d}",
original_index=i # 保存在评分表中的原始出现顺序
)
criteria.append(criterion)

View File

@ -26,6 +26,9 @@ class WordProcessor:
def create_template_from_chapters(self, chapters: List[DocumentChapter], output_path: str, project_name: str = "标书项目") -> bool:
"""根据章节结构创建Word模板"""
try:
# 在生成Word之前重新编号所有章节
self._renumber_chapters_for_word(chapters)
doc = Document()
# 设置文档标题
@ -86,4 +89,46 @@ class WordProcessor:
# 一级章节后添加适当间距
if chapter.level == 1:
doc.add_paragraph() # 添加空行
doc.add_paragraph() # 添加空行
def _renumber_chapters_for_word(self, chapters: List[DocumentChapter]):
"""为Word生成重新编号所有章节"""
logger.info("开始为Word生成重新编号章节...")
for i, chapter in enumerate(chapters, 1):
# 重新编号一级章节
chapter.title = self._update_chapter_number(chapter.title, str(i))
logger.info(f"一级章节重编号: {chapter.title}")
# 重新编号子章节
for j, sub_chapter in enumerate(chapter.children, 1):
sub_chapter.title = self._update_chapter_number(sub_chapter.title, f"{i}.{j}")
logger.info(f" 子章节重编号: {sub_chapter.title}")
# 重新编号三级章节
for k, child_chapter in enumerate(sub_chapter.children, 1):
child_chapter.title = self._update_chapter_number(child_chapter.title, f"{i}.{j}.{k}")
logger.info(f" 三级章节重编号: {child_chapter.title}")
logger.info("章节重编号完成")
def _update_chapter_number(self, title: str, new_number: str) -> str:
"""更新章节标题中的编号,保持标题内容不变"""
import re
# 使用正则表达式匹配各种编号格式
# 匹配: 1. | 1.1 | 1.1.1 | 1.1.1. 等开头的编号格式
number_pattern = r'^(\d+(?:\.\d+)*\.?)\s*(.*)$'
match = re.match(number_pattern, title.strip())
if match:
# 提取原有内容(去掉编号)
content = match.group(2).strip()
new_title = f"{new_number} {content}"
logger.info(f"编号转换: '{title}''{new_title}'")
return new_title
# 没有找到编号格式,直接添加编号
new_title = f"{new_number} {title.strip()}"
logger.info(f"编号添加: '{title}''{new_title}'")
return new_title