feat: enhance chapter prompt planning

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
sladro 2025-11-18 16:02:00 +08:00
parent d887e7c313
commit 6f785c9f2c
8 changed files with 664 additions and 75 deletions

View File

@ -265,13 +265,30 @@ content_prompts:
generate_with_rag: |
你是一个专业的标书撰写助手。请根据以下信息生成标书章节内容:
章节标题:{title}
评分要求:{requirements}{emphasis_part}{rag_part}
当前章节: 《{title}》
章节定位: {chapter_path}
分值关注: {score_info}
评分要点:
{rubric_points}
写作目标:
{objectives}
一致性约束:
{consistency_rules}
上下文参考:
{context_summary}
评分说明:
{requirements_summary}{emphasis_part}{rag_part}
要求:
1. 内容专业、详实,符合招标文件要求
2. 突出技术优势和实施能力
3. 语言正式、逻辑清晰
4. 字数控制在500-800字
5. 严禁新增任何章/节级标题或“商务条款、技术偏差、响应情况”等模板段,如需结构化仅使用普通段落或加粗语句
6. 开头不得出现“经认真研读招标文件要求”“偏差说明如下”等跨章节套话,内容必须围绕《{title}》本身展开
请直接输出章节内容,不要包含章节标题。

View File

@ -8,6 +8,7 @@ from typing import Any, Dict, Optional
from ..base import BaseNode, NodeContext
from ...config.settings import get_settings
from ...utils.prompt_planner import PromptPlanner
logger = logging.getLogger(__name__)
settings = get_settings()
@ -51,6 +52,7 @@ class GenerateContentNode(BaseNode):
# 获取章节配置(继承父章节)
config = self._get_chapter_config(state, chapter)
planner = PromptPlanner(state)
# 查找当前章节的所有子标题
sub_chapters = self._find_sub_chapters(state, chapter_id, max_level=settings.max_sub_chapter_level)
@ -60,12 +62,12 @@ class GenerateContentNode(BaseNode):
logger.info(f"章节 {chapter_id} 包含 {len(sub_chapters)} 个子标题,逐个生成")
content_parts = []
for sub in sub_chapters:
sub_content = self._generate_with_rag(sub, config, state)
sub_content = self._generate_with_rag(sub, config, state, planner)
content_parts.append(f"## {sub['title']}\n\n{sub_content}")
content = "\n\n".join(content_parts)
else:
# 无子标题:直接生成
content = self._generate_with_rag(chapter, config, state)
content = self._generate_with_rag(chapter, config, state, planner)
# 保存生成结果
state.setdefault("generated_contents", {})[chapter_id] = content
@ -167,7 +169,11 @@ class GenerateContentNode(BaseNode):
return sub_chapters
def _generate_with_rag(
self, chapter: Dict[str, Any], config: Dict[str, Any], state: Dict[str, Any]
self,
chapter: Dict[str, Any],
config: Dict[str, Any],
state: Dict[str, Any],
planner: PromptPlanner,
) -> str:
"""使用RAG生成内容
@ -184,18 +190,19 @@ class GenerateContentNode(BaseNode):
if not rag_tool:
raise ValueError("RAGTool未初始化请检查InitConfigNode配置")
# 构建生成上下文
generation_context = {
"title": chapter["title"],
"level": chapter["level"],
"requirements": chapter.get("requirements", ""),
"emphasis": config.get("emphasis", ""),
}
prompt_spec = planner.build_prompt_spec(chapter)
generation_context = dict(prompt_spec)
# 如果启用RAG添加上下文信息
if config.get("rag_enabled"):
# 检索相关内容
query = f"{chapter['title']} {config.get('emphasis', '')}"
query_fragments = [chapter["title"]]
if prompt_spec.get("emphasis"):
query_fragments.append(prompt_spec["emphasis"])
if prompt_spec.get("requirements_summary"):
query_fragments.append(prompt_spec["requirements_summary"][:120])
query = " ".join(fragment for fragment in query_fragments if fragment)
search_results = rag_tool.search(query, k=settings.rag_search_top_k)
if search_results:
@ -208,11 +215,6 @@ class GenerateContentNode(BaseNode):
else:
generation_context["rag_context"] = ""
# 添加父章节上下文
parent_context = state.get("last_generated_content", "")
if parent_context and chapter["level"] > 1:
generation_context["parent_context"] = parent_context[:settings.parent_context_length]
# 调用生成方法
try:
content = rag_tool.generate_content(chapter["id"], generation_context)

View File

@ -5,13 +5,18 @@
import json
import logging
import re
import unicodedata
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
from ..base import BaseNode, NodeContext
logger = logging.getLogger(__name__)
MAX_HEADING_LEVEL = 9
class InitConfigNode(BaseNode):
"""初始化配置节点
@ -52,10 +57,28 @@ class InitConfigNode(BaseNode):
# 直接从Word文档读取章节结构
chapters = self._load_from_word_document(word_file)
# 可选注入评分元数据
metadata_lookup = self._load_chapter_metadata(Path(word_file))
chapter_metadata = self._apply_metadata_to_chapters(chapters, metadata_lookup)
# 深度优先遍历生成队列
chapter_queue = self._build_depth_first_queue(chapters)
logger.info(f"生成章节队列,共{len(chapter_queue)}个章节")
chapter_map = {ch["id"]: ch for ch in chapters}
children_map: defaultdict[str, List[str]] = defaultdict(list)
for ch in chapters:
parent_id = ch.get("parent_id")
if parent_id:
children_map[parent_id].append(ch["id"])
preview = ", ".join(
f"{ch['id']}:{ch['title']}" for ch in chapter_queue[:5]
) or "N/A"
logger.info(
"章节队列已生成,共 %s 个章节,预览: %s",
len(chapter_queue),
preview,
)
# 初始化RAGTool实例单例全局共享
from ...tools.rag import RAGTool
@ -67,8 +90,11 @@ class InitConfigNode(BaseNode):
return self._update_state(
state,
chapter_queue=chapter_queue,
chapter_map=chapter_map,
chapter_children_map=dict(children_map),
chapter_configs={},
expanded_configs=expanded_configs, # 预展开的配置继承映射
chapter_metadata=chapter_metadata,
generated_contents={},
completed_chapters=[],
current_chapter_index=0,
@ -85,51 +111,50 @@ class InitConfigNode(BaseNode):
章节列表
"""
from docx import Document
import re
doc = Document(word_file)
chapters = []
parent_stack = [] # 栈存储各层级的父章节ID
chapters: List[Dict[str, Any]] = []
parent_stack: List[Dict[str, Any]] = []
level_counters: defaultdict[int, int] = defaultdict(int)
for para in doc.paragraphs:
style_name = para.style.name
# 只处理Heading 1/2/3
style_name = getattr(para.style, 'name', '') or ''
if not style_name.startswith('Heading'):
continue
# 提取层级
level_match = re.match(r'Heading\s+(\d+)', style_name)
if not level_match:
continue
level = int(level_match.group(1))
if level > 3: # 只处理到3级标题
if level < 1 or level > MAX_HEADING_LEVEL:
continue
# 提取标题文本和编号
title_text = para.text.strip()
if not title_text:
raw_heading = para.text.strip()
if not raw_heading:
continue
# 从标题中提取编号(如"2.1.3 标题" → "2_1_3"
number_match = re.match(r'^([\d\.]+)\s+(.+)$', title_text)
if number_match:
number_str = number_match.group(1).rstrip('.')
title = number_match.group(2)
chapter_id = f"chapter_{number_str.replace('.', '_')}"
else:
# 无编号,使用索引
chapter_id = f"chapter_{len(chapters) + 1}"
title = title_text
heading_number, title = self._split_heading_components(raw_heading)
level_counters[level] += 1
for deeper_level in list(level_counters.keys()):
if deeper_level > level:
level_counters[deeper_level] = 0
# 确定父章节ID
while parent_stack and parent_stack[-1]['level'] >= level:
parent_stack.pop()
parent_id = parent_stack[-1]['id'] if parent_stack else None
parent_path: List[str] = parent_stack[-1]['path'] if parent_stack else []
if heading_number:
id_path = heading_number.strip('.').split('.')
else:
segment = self._format_unlabeled_segment(level_counters[level])
id_path = [*parent_path, segment] if parent_path else [segment]
chapter_id = f"chapter_{'_'.join(id_path)}"
# 创建章节信息
chapter_info = {
"id": chapter_id,
"title": title,
@ -138,17 +163,58 @@ class InitConfigNode(BaseNode):
"requirements": "",
"score": 0,
"parent_id": parent_id,
"raw_heading": raw_heading,
"normalized_title": self._normalize_heading_text(title),
"heading_number": heading_number,
"order_index": level_counters[level],
"style_name": style_name,
}
chapters.append(chapter_info)
parent_stack.append({'id': chapter_id, 'level': level, 'path': id_path})
# 将当前章节加入栈
parent_stack.append({'id': chapter_id, 'level': level})
logger.info(f"从Word文档读取到 {len(chapters)} 个章节")
sample = ', '.join(f"{ch['id']}:{ch['title']}" for ch in chapters[:5]) or "N/A"
logger.info(
"从Word文档读取到 %s 个章节,样例: %s",
len(chapters),
sample,
)
return chapters
@staticmethod
def _normalize_heading_text(text: str) -> str:
"""标准化标题文本,消除匹配差异"""
if not text:
return ""
normalized = unicodedata.normalize("NFKC", text)
normalized = normalized.strip()
normalized = re.sub(r"\s+", "", normalized)
return normalized.lower()
def _split_heading_components(self, raw_heading: str) -> tuple[Optional[str], str]:
"""拆分原始标题,返回(编号, 标题)"""
match = re.match(r"^(\d+(?:\.\d+)*)\s+(.+)$", raw_heading)
if match:
number = match.group(1).rstrip('.')
title = match.group(2).strip()
return number, title or raw_heading
return None, raw_heading
def _format_unlabeled_segment(self, order_index: int) -> str:
"""将序号转换为字母序列用于无编号标题ID"""
index = max(order_index, 1)
chars: List[str] = []
while index > 0:
index, remainder = divmod(index - 1, 26)
chars.append(chr(ord('a') + remainder))
return ''.join(reversed(chars))
def _build_depth_first_queue(self, chapters: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""深度优先遍历生成章节队列
@ -171,20 +237,6 @@ class InitConfigNode(BaseNode):
return queue
def _extract_chapter_numbers(self, chapter_id: str) -> List[int]:
"""从章节ID提取数字编号用于排序
Args:
chapter_id: 章节ID "chapter_1_2_3"
Returns:
数字列表 [1, 2, 3]
"""
import re
# 提取所有数字
numbers = re.findall(r'\d+', chapter_id)
return [int(n) for n in numbers]
def _dfs_traverse(
self,
chapter: Dict[str, Any],
@ -202,12 +254,9 @@ class InitConfigNode(BaseNode):
"""
queue.append(chapter)
# 查找子章节
chapter_id = chapter["id"]
children = [ch for ch in all_chapters if ch.get("parent_id") == chapter_id]
# 按数字编号排序确保正确顺序(修复:避免字符串排序导致 1_10 < 1_2
children.sort(key=lambda x: self._extract_chapter_numbers(x["id"]))
children.sort(key=lambda x: x.get("order_index", 0))
# 递归遍历子章节
for child in children:
@ -247,3 +296,193 @@ class InitConfigNode(BaseNode):
logger.info(f"预展开配置继承链,共{len(expanded)}个章节")
return expanded
def _load_chapter_metadata(self, word_path: Path) -> Dict[str, Dict[str, Any]]:
"""读取项目目录下的章节元数据(可选)"""
project_dir = word_path.parent
metadata: Dict[str, Dict[str, Any]] = {}
analysis_file = project_dir / "analysis_result.json"
tasks_file = project_dir / "tasks.json"
if analysis_file.exists():
metadata = self._parse_analysis_metadata(analysis_file)
if metadata:
logger.info("已加载 analysis_result.json 评分元数据")
return metadata
if tasks_file.exists():
metadata = self._parse_tasks_metadata(tasks_file)
if metadata:
logger.info("已加载 tasks.json 评分元数据")
return metadata
def _parse_analysis_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]:
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception as exc:
logger.warning("读取 %s 失败: %s", file_path, exc)
return {}
chapter_titles: Dict[str, str] = {}
def _collect_titles(nodes: List[Dict[str, Any]] | None) -> None:
if not nodes:
return
for node in nodes:
chapter_id = node.get("id")
title = node.get("title", "")
if chapter_id and title:
chapter_titles[chapter_id] = title
_collect_titles(node.get("children"))
_collect_titles(data.get("chapters", []))
metadata: Dict[str, Dict[str, Any]] = {}
criteria = (data.get("technical_criteria") or []) + (data.get("commercial_criteria") or [])
for item in criteria:
chapter_id = item.get("chapter_id")
title = chapter_titles.get(chapter_id) or item.get("item_name") or ""
normalized = self._normalize_heading_text(title)
if not normalized:
continue
entry = metadata.setdefault(
normalized,
{
"title": title,
"score": None,
"category": None,
"requirements": [],
"rubric_points": [],
"source": "analysis_result",
"chapter_id_source": chapter_id,
},
)
if item.get("max_score") is not None:
entry["score"] = item["max_score"]
if item.get("category"):
entry["category"] = item["category"]
description = (item.get("description") or "").strip()
if description:
entry["requirements"].append(description)
entry["rubric_points"].extend(self._split_rubric_points(description))
return self._finalize_metadata_entries(metadata)
def _parse_tasks_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]:
try:
with open(file_path, "r", encoding="utf-8") as f:
tasks = json.load(f)
except Exception as exc:
logger.warning("读取 %s 失败: %s", file_path, exc)
return {}
metadata: Dict[str, Dict[str, Any]] = {}
for task in tasks:
title = task.get("title", "")
normalized = self._normalize_heading_text(title)
if not normalized:
continue
entry = metadata.setdefault(
normalized,
{
"title": title,
"score": task.get("score"),
"category": task.get("category"),
"requirements": [],
"rubric_points": [],
"source": "tasks",
"chapter_id_source": task.get("chapter_id"),
},
)
description = (task.get("description") or "").strip()
if description:
entry["requirements"].append(description)
entry["rubric_points"].extend(self._split_rubric_points(description))
return self._finalize_metadata_entries(metadata)
def _split_rubric_points(self, text: str) -> List[str]:
if not text:
return []
parts = re.split(r"[\n;。]+", text)
clean_parts = [p.strip().strip(":·•") for p in parts if len(p.strip()) >= 3]
return clean_parts
def _finalize_metadata_entries(self, metadata: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
finalized: Dict[str, Dict[str, Any]] = {}
for key, entry in metadata.items():
requirements_list = entry.get("requirements") or []
dedup_reqs = []
for req in requirements_list:
if req and req not in dedup_reqs:
dedup_reqs.append(req)
rubric_points = entry.get("rubric_points") or []
dedup_points: List[str] = []
for point in rubric_points:
if point and point not in dedup_points:
dedup_points.append(point)
finalized[key] = {
"title": entry.get("title", ""),
"score": entry.get("score"),
"category": entry.get("category"),
"requirements": "\n".join(dedup_reqs).strip(),
"rubric_points": dedup_points,
"source": entry.get("source"),
"chapter_id_source": entry.get("chapter_id_source"),
}
return finalized
def _apply_metadata_to_chapters(
self,
chapters: List[Dict[str, Any]],
metadata_lookup: Dict[str, Dict[str, Any]],
) -> Dict[str, Dict[str, Any]]:
if not metadata_lookup:
return {}
chapter_metadata: Dict[str, Dict[str, Any]] = {}
for chapter in chapters:
normalized = chapter.get("normalized_title")
meta = metadata_lookup.get(normalized)
if not meta and chapter.get("raw_heading"):
meta = metadata_lookup.get(self._normalize_heading_text(chapter["raw_heading"]))
if not meta:
continue
enriched = {
"title": chapter.get("title"),
"score": meta.get("score"),
"category": meta.get("category"),
"requirements": meta.get("requirements", ""),
"rubric_points": meta.get("rubric_points", []),
"source": meta.get("source"),
"chapter_id_source": meta.get("chapter_id_source"),
}
chapter["requirements"] = enriched["requirements"]
if enriched["score"] is not None:
chapter["score"] = enriched["score"]
if enriched["category"]:
chapter["category"] = enriched["category"]
chapter_metadata[chapter["id"]] = enriched
logger.info("已为 %s 个章节注入评分元数据", len(chapter_metadata))
return chapter_metadata

View File

@ -75,7 +75,7 @@ class InteractWithUserNode(BaseNode):
interaction_type="choice",
prompt="是否使用RAG知识库辅助生成内容",
options=["", ""],
default="",
default="",
key=f"use_rag_{chapter_id}",
)

View File

@ -209,7 +209,6 @@ class RAGTool:
# 从上下文中提取任务信息
task_title = context.get('title', '任务')
task_requirements = context.get('requirements', '')
emphasis = context.get('emphasis', '')
rag_context = context.get('rag_context', '')
@ -217,20 +216,29 @@ class RAGTool:
emphasis_part = f'\n特别强调:{emphasis}' if emphasis else ''
rag_part = f'\n\n参考资料:\n{rag_context}' if rag_context else ''
prompt_variables = {
"title": task_title,
"chapter_path": context.get('chapter_path', task_title),
"score_info": context.get('score_info', '目标得分:未明确'),
"requirements_summary": context.get('requirements_summary', ''),
"rubric_points": context.get('rubric_points', '- 无明确评分要点'),
"objectives": context.get('objectives', '1. 围绕章节主题输出详实内容'),
"consistency_rules": context.get('consistency_rules', '1. 保持章节语气与格式一致'),
"context_summary": context.get('context_summary', '(暂无可引用的上下文)'),
"emphasis_part": emphasis_part,
"rag_part": rag_part,
}
# 从配置获取提示词
prompt_manager = get_prompt_manager()
prompt = prompt_manager.get_content_prompt(
"generate_with_rag",
title=task_title,
requirements=task_requirements,
emphasis_part=emphasis_part,
rag_part=rag_part
)
prompt = prompt_manager.get_content_prompt("generate_with_rag", **prompt_variables)
# 调用LLM生成
client = OpenAI(
api_key=self.settings.api_key,
base_url=self.settings.base_url
base_url=self.settings.base_url,
timeout=180,
max_retries=2,
)
response = client.chat.completions.create(

View File

@ -0,0 +1,199 @@
"""章节提示词规划器
根据章节上下文评分元数据和用户配置构建结构化写作brief
"""
from __future__ import annotations
import textwrap
from typing import Any, Dict, List, Optional
class PromptPlanner:
"""构建章节写作brief的工具"""
def __init__(self, state: Dict[str, Any]):
self.state = state
self.chapter_queue: List[Dict[str, Any]] = state.get("chapter_queue", [])
self.chapter_map: Dict[str, Dict[str, Any]] = state.get("chapter_map") or {
ch["id"]: ch for ch in self.chapter_queue
}
self.chapter_children_map: Dict[str, List[str]] = state.get("chapter_children_map", {})
self.chapter_metadata: Dict[str, Dict[str, Any]] = state.get("chapter_metadata", {})
self.chapter_configs: Dict[str, Dict[str, Any]] = state.get("chapter_configs", {})
self.expanded_configs: Dict[str, str] = state.get("expanded_configs", {})
self.generated_contents: Dict[str, str] = state.get("generated_contents", {})
# 回写以便后续节点直接使用
state.setdefault("chapter_map", self.chapter_map)
def build_prompt_spec(self, chapter: Dict[str, Any]) -> Dict[str, Any]:
chapter_id = chapter["id"]
metadata = self.chapter_metadata.get(chapter_id)
requirements = (
(metadata or {}).get("requirements")
or chapter.get("requirements")
or self._build_child_outline(chapter_id)
or ""
)
rubric_points = (metadata or {}).get("rubric_points") or self._split_requirements(requirements)
score_info = self._format_score_info(chapter, metadata)
emphasis = self._resolve_emphasis(chapter_id)
chapter_path = self._build_chapter_path(chapter)
parent_context = self._collect_parent_context(chapter)
sibling_outline = self._collect_sibling_outline(chapter)
objectives = self._build_objectives(requirements, metadata, emphasis)
consistency_rules = self._build_consistency_rules(chapter_path, parent_context)
context_parts: List[str] = []
if parent_context:
context_parts.append(f"父章节摘要:{parent_context}")
if sibling_outline:
context_parts.append(f"同级章节定位:{sibling_outline}")
context_summary = "\n".join(context_parts) or "(暂无可引用的上文,可直接围绕本章节展开)"
spec = {
"title": chapter.get("title", chapter_id),
"chapter_path": chapter_path,
"score_info": score_info,
"requirements_summary": requirements or "该章节未提供评分描述,需结合项目背景补足内容。",
"rubric_points": self._format_bullet_block(rubric_points, "- 无明确评分要点,仍需提供详实内容"),
"objectives": self._format_ordered_block(objectives, "1. 围绕章节主题提供专业、详实且可执行的方案描述"),
"consistency_rules": self._format_ordered_block(
consistency_rules,
"1. 语言保持正式且与全篇一致,不得创建新的章/节标题",
),
"context_summary": context_summary,
"emphasis": emphasis or "",
"category": (metadata or {}).get("category"),
}
return spec
def _build_chapter_path(self, chapter: Dict[str, Any]) -> str:
segments: List[str] = []
current = chapter
while current:
title = current.get("title") or current.get("raw_heading") or current.get("id")
if title:
segments.append(title)
parent_id = current.get("parent_id")
current = self.chapter_map.get(parent_id)
return " > ".join(reversed(segments))
def _resolve_emphasis(self, chapter_id: str) -> str:
config = self.chapter_configs.get(chapter_id)
if config and config.get("emphasis"):
return config["emphasis"].strip()
source_id = self.expanded_configs.get(chapter_id)
if source_id:
parent_config = self.chapter_configs.get(source_id)
if parent_config and parent_config.get("emphasis"):
return parent_config["emphasis"].strip()
return ""
def _collect_parent_context(self, chapter: Dict[str, Any]) -> str:
parent_id = chapter.get("parent_id")
if not parent_id:
return ""
parent_content = self.generated_contents.get(parent_id, "")
return self._truncate(parent_content, 420)
def _collect_sibling_outline(self, chapter: Dict[str, Any]) -> str:
parent_id = chapter.get("parent_id")
if not parent_id:
return ""
siblings = self.chapter_children_map.get(parent_id, [])
titles = [self.chapter_map[sid]["title"] for sid in siblings if sid != chapter["id"] and sid in self.chapter_map]
if not titles:
return ""
preview = "".join(titles[:4])
return preview
def _build_child_outline(self, chapter_id: str) -> str:
child_ids = self.chapter_children_map.get(chapter_id, [])
titles = [self.chapter_map[ch_id]["title"] for ch_id in child_ids if ch_id in self.chapter_map]
if not titles:
return ""
return "".join(titles[:6])
def _build_objectives(
self,
requirements: str,
metadata: Optional[Dict[str, Any]],
emphasis: str,
) -> List[str]:
objectives: List[str] = []
if requirements:
objectives.append(f"完整覆盖评分描述:{self._truncate(requirements, 160)}")
score = (metadata or {}).get("score")
if score:
objectives.append(f"明确呈现可支撑 {score} 分评价的量化亮点")
if emphasis:
objectives.append(f"突出用户强调内容:{emphasis}")
base_objective = "围绕章节主题提供结构化、工程化的内容"
if base_objective not in objectives:
objectives.append(base_objective)
return objectives
def _build_consistency_rules(self, chapter_path: str, parent_context: str) -> List[str]:
rules = [
f"严格聚焦章节路径 {chapter_path},不得跨章节展开",
"禁止创建新的章/节级标题,仅可使用段落或加粗语句",
"语言保持正式、数据化、工程化,避免模板化口号",
]
if parent_context:
rules.append("与父章节内容保持逻辑衔接,避免信息冲突")
return rules
def _format_score_info(self, chapter: Dict[str, Any], metadata: Optional[Dict[str, Any]]) -> str:
score = (metadata or {}).get("score")
if not score:
chapter_score = chapter.get("score")
if chapter_score:
score = chapter_score
category = (metadata or {}).get("category")
if score:
info = f"目标得分:{score}"
else:
info = "目标得分:未明确,但需保持与整体方案一致"
if category:
info = f"{info}(类别:{category}"
return info
def _split_requirements(self, requirements: str) -> List[str]:
if not requirements:
return []
raw = [seg.strip() for seg in requirements.replace("\r", "").split("\n") if seg.strip()]
return raw
def _format_bullet_block(self, items: List[str], fallback: str) -> str:
valid = [item for item in items if item]
if not valid:
return fallback
return "\n".join(f"- {item}" for item in valid[:8])
def _format_ordered_block(self, items: List[str], fallback: str) -> str:
if not items:
return fallback
return "\n".join(f"{idx}. {item}" for idx, item in enumerate(items[:8], start=1))
def _truncate(self, text: str, limit: int) -> str:
if not text:
return ""
cleaned = " ".join(text.split())
return textwrap.shorten(cleaned, width=limit, placeholder="...")

View File

@ -0,0 +1,68 @@
from pathlib import Path
from docx import Document
from bidmaster.nodes.content.init_config import InitConfigNode
def _create_deep_heading_doc(path: Path) -> None:
doc = Document()
doc.add_heading("8 服务方案", level=1)
doc.add_heading("8.1 大南湖七矿VR智能培训系统及智能体感设备设计方案", level=2)
doc.add_heading("8.1.1 VR智能培训中心", level=3)
doc.add_heading("8.1.1.1 煤矿典型事故案例VR教学系统", level=4)
doc.add_heading("煤矿重大灾害VR虚拟体验与逃生系统", level=5)
doc.save(path)
def test_load_from_word_document_handles_deep_headings(tmp_path) -> None:
doc_path = tmp_path / "deep.docx"
_create_deep_heading_doc(doc_path)
node = InitConfigNode()
chapters = node._load_from_word_document(str(doc_path))
assert len(chapters) == 5
assert chapters[0]["id"] == "chapter_8"
assert chapters[1]["id"] == "chapter_8_1"
assert chapters[2]["parent_id"] == chapters[1]["id"]
deepest = chapters[-1]
assert deepest["level"] == 5
assert deepest["heading_number"] is None
assert deepest["id"] == "chapter_8_1_1_1_a"
assert deepest["parent_id"] == chapters[-2]["id"]
def test_apply_metadata_to_chapters_enriches_requirements() -> None:
node = InitConfigNode()
title = "1.1 技术方案-基本要求 (3.0分)"
normalized = node._normalize_heading_text(title)
chapters = [
{
"id": "chapter_1_1",
"title": title,
"raw_heading": title,
"normalized_title": normalized,
"requirements": "",
"score": 0,
}
]
metadata_lookup = {
normalized: {
"title": title,
"score": 3.0,
"category": "technical_solution",
"requirements": "需覆盖总体架构、性能指标与技术栈选择。",
"rubric_points": ["总体架构完整", "性能指标可量化"],
"source": "analysis_result",
"chapter_id_source": "tech_solution_01_01",
}
}
enriched = node._apply_metadata_to_chapters(chapters, metadata_lookup)
assert chapters[0]["requirements"].startswith("需覆盖")
assert chapters[0]["score"] == 3.0
assert enriched["chapter_1_1"]["rubric_points"][0] == "总体架构完整"

View File

@ -0,0 +1,56 @@
from bidmaster.utils.prompt_planner import PromptPlanner
def test_prompt_planner_uses_metadata_and_parent_context() -> None:
parent = {"id": "chapter_1", "title": "1. 总体技术方案", "level": 1}
child = {
"id": "chapter_1_1",
"title": "1.1 技术方案-基本要求 (3.0分)",
"level": 2,
"parent_id": "chapter_1",
}
state = {
"chapter_queue": [parent, child],
"chapter_metadata": {
"chapter_1_1": {
"title": child["title"],
"score": 3.0,
"category": "technical_solution",
"requirements": "方案需覆盖总体架构;提供性能指标;描述安全策略。",
"rubric_points": ["总体架构完整", "性能指标量化"],
}
},
"chapter_children_map": {"chapter_1": ["chapter_1_1"]},
"generated_contents": {"chapter_1": "本章节描述总体设计与安全架构要点。"},
"chapter_configs": {"chapter_1": {"emphasis": "突出安全策略"}},
"expanded_configs": {"chapter_1": "chapter_1", "chapter_1_1": "chapter_1"},
}
planner = PromptPlanner(state)
spec = planner.build_prompt_spec(child)
assert "总体架构完整" in spec["rubric_points"]
assert "父章节摘要" in spec["context_summary"]
assert "突出安全策略" in spec["objectives"]
def test_prompt_planner_fallback_without_metadata() -> None:
root = {"id": "chapter_root", "title": "1. 服务方案", "level": 1}
child = {
"id": "chapter_child",
"title": "1.1 现场服务体系",
"level": 2,
"parent_id": "chapter_root",
}
state = {
"chapter_queue": [root, child],
"chapter_children_map": {"chapter_root": ["chapter_child"]},
}
planner = PromptPlanner(state)
spec = planner.build_prompt_spec(root)
assert "1.1 现场服务体系" in spec["requirements_summary"]
assert "围绕章节主题" in spec["objectives"]