From 6f785c9f2cdbeec748ef04377294ad201fdf4e67 Mon Sep 17 00:00:00 2001 From: sladro Date: Tue, 18 Nov 2025 16:02:00 +0800 Subject: [PATCH] feat: enhance chapter prompt planning Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- config/prompts.yaml | 21 +- .../nodes/content/generate_content.py | 34 +- src/bidmaster/nodes/content/init_config.py | 333 +++++++++++++++--- src/bidmaster/nodes/content/interact_user.py | 2 +- src/bidmaster/tools/rag.py | 26 +- src/bidmaster/utils/prompt_planner.py | 199 +++++++++++ tests/unit/test_init_config.py | 68 ++++ tests/unit/test_prompt_planner.py | 56 +++ 8 files changed, 664 insertions(+), 75 deletions(-) create mode 100644 src/bidmaster/utils/prompt_planner.py create mode 100644 tests/unit/test_init_config.py create mode 100644 tests/unit/test_prompt_planner.py diff --git a/config/prompts.yaml b/config/prompts.yaml index c1a815d..f19d48e 100644 --- a/config/prompts.yaml +++ b/config/prompts.yaml @@ -265,13 +265,30 @@ content_prompts: generate_with_rag: | 你是一个专业的标书撰写助手。请根据以下信息生成标书章节内容: - 章节标题:{title} - 评分要求:{requirements}{emphasis_part}{rag_part} + 当前章节: 《{title}》 + 章节定位: {chapter_path} + 分值关注: {score_info} + 评分要点: + {rubric_points} + + 写作目标: + {objectives} + + 一致性约束: + {consistency_rules} + + 上下文参考: + {context_summary} + + 评分说明: + {requirements_summary}{emphasis_part}{rag_part} 要求: 1. 内容专业、详实,符合招标文件要求 2. 突出技术优势和实施能力 3. 语言正式、逻辑清晰 4. 字数控制在500-800字 + 5. 严禁新增任何章/节级标题或“商务条款、技术偏差、响应情况”等模板段,如需结构化仅使用普通段落或加粗语句 + 6. 开头不得出现“经认真研读招标文件要求”“偏差说明如下”等跨章节套话,内容必须围绕《{title}》本身展开 请直接输出章节内容,不要包含章节标题。 diff --git a/src/bidmaster/nodes/content/generate_content.py b/src/bidmaster/nodes/content/generate_content.py index 81b4bb7..8b9a042 100644 --- a/src/bidmaster/nodes/content/generate_content.py +++ b/src/bidmaster/nodes/content/generate_content.py @@ -8,6 +8,7 @@ from typing import Any, Dict, Optional from ..base import BaseNode, NodeContext from ...config.settings import get_settings +from ...utils.prompt_planner import PromptPlanner logger = logging.getLogger(__name__) settings = get_settings() @@ -51,6 +52,7 @@ class GenerateContentNode(BaseNode): # 获取章节配置(继承父章节) config = self._get_chapter_config(state, chapter) + planner = PromptPlanner(state) # 查找当前章节的所有子标题 sub_chapters = self._find_sub_chapters(state, chapter_id, max_level=settings.max_sub_chapter_level) @@ -60,12 +62,12 @@ class GenerateContentNode(BaseNode): logger.info(f"章节 {chapter_id} 包含 {len(sub_chapters)} 个子标题,逐个生成") content_parts = [] for sub in sub_chapters: - sub_content = self._generate_with_rag(sub, config, state) + sub_content = self._generate_with_rag(sub, config, state, planner) content_parts.append(f"## {sub['title']}\n\n{sub_content}") content = "\n\n".join(content_parts) else: # 无子标题:直接生成 - content = self._generate_with_rag(chapter, config, state) + content = self._generate_with_rag(chapter, config, state, planner) # 保存生成结果 state.setdefault("generated_contents", {})[chapter_id] = content @@ -167,7 +169,11 @@ class GenerateContentNode(BaseNode): return sub_chapters def _generate_with_rag( - self, chapter: Dict[str, Any], config: Dict[str, Any], state: Dict[str, Any] + self, + chapter: Dict[str, Any], + config: Dict[str, Any], + state: Dict[str, Any], + planner: PromptPlanner, ) -> str: """使用RAG生成内容 @@ -184,18 +190,19 @@ class GenerateContentNode(BaseNode): if not rag_tool: raise ValueError("RAGTool未初始化,请检查InitConfigNode配置") - # 构建生成上下文 - generation_context = { - "title": chapter["title"], - "level": chapter["level"], - "requirements": chapter.get("requirements", ""), - "emphasis": config.get("emphasis", ""), - } + prompt_spec = planner.build_prompt_spec(chapter) + generation_context = dict(prompt_spec) # 如果启用RAG,添加上下文信息 if config.get("rag_enabled"): # 检索相关内容 - query = f"{chapter['title']} {config.get('emphasis', '')}" + query_fragments = [chapter["title"]] + if prompt_spec.get("emphasis"): + query_fragments.append(prompt_spec["emphasis"]) + if prompt_spec.get("requirements_summary"): + query_fragments.append(prompt_spec["requirements_summary"][:120]) + + query = " ".join(fragment for fragment in query_fragments if fragment) search_results = rag_tool.search(query, k=settings.rag_search_top_k) if search_results: @@ -208,11 +215,6 @@ class GenerateContentNode(BaseNode): else: generation_context["rag_context"] = "" - # 添加父章节上下文 - parent_context = state.get("last_generated_content", "") - if parent_context and chapter["level"] > 1: - generation_context["parent_context"] = parent_context[:settings.parent_context_length] - # 调用生成方法 try: content = rag_tool.generate_content(chapter["id"], generation_context) diff --git a/src/bidmaster/nodes/content/init_config.py b/src/bidmaster/nodes/content/init_config.py index 51b31ef..bd0626f 100644 --- a/src/bidmaster/nodes/content/init_config.py +++ b/src/bidmaster/nodes/content/init_config.py @@ -5,13 +5,18 @@ import json import logging +import re +import unicodedata +from collections import defaultdict from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from ..base import BaseNode, NodeContext logger = logging.getLogger(__name__) +MAX_HEADING_LEVEL = 9 + class InitConfigNode(BaseNode): """初始化配置节点 @@ -52,10 +57,28 @@ class InitConfigNode(BaseNode): # 直接从Word文档读取章节结构 chapters = self._load_from_word_document(word_file) + # 可选注入评分元数据 + metadata_lookup = self._load_chapter_metadata(Path(word_file)) + chapter_metadata = self._apply_metadata_to_chapters(chapters, metadata_lookup) + # 深度优先遍历生成队列 chapter_queue = self._build_depth_first_queue(chapters) - logger.info(f"生成章节队列,共{len(chapter_queue)}个章节") + chapter_map = {ch["id"]: ch for ch in chapters} + children_map: defaultdict[str, List[str]] = defaultdict(list) + for ch in chapters: + parent_id = ch.get("parent_id") + if parent_id: + children_map[parent_id].append(ch["id"]) + + preview = ", ".join( + f"{ch['id']}:{ch['title']}" for ch in chapter_queue[:5] + ) or "N/A" + logger.info( + "章节队列已生成,共 %s 个章节,预览: %s", + len(chapter_queue), + preview, + ) # 初始化RAGTool实例(单例,全局共享) from ...tools.rag import RAGTool @@ -67,8 +90,11 @@ class InitConfigNode(BaseNode): return self._update_state( state, chapter_queue=chapter_queue, + chapter_map=chapter_map, + chapter_children_map=dict(children_map), chapter_configs={}, expanded_configs=expanded_configs, # 预展开的配置继承映射 + chapter_metadata=chapter_metadata, generated_contents={}, completed_chapters=[], current_chapter_index=0, @@ -85,51 +111,50 @@ class InitConfigNode(BaseNode): 章节列表 """ from docx import Document - import re doc = Document(word_file) - chapters = [] - parent_stack = [] # 栈:存储各层级的父章节ID + chapters: List[Dict[str, Any]] = [] + parent_stack: List[Dict[str, Any]] = [] + level_counters: defaultdict[int, int] = defaultdict(int) for para in doc.paragraphs: - style_name = para.style.name - - # 只处理Heading 1/2/3 + style_name = getattr(para.style, 'name', '') or '' if not style_name.startswith('Heading'): continue - # 提取层级 level_match = re.match(r'Heading\s+(\d+)', style_name) if not level_match: continue level = int(level_match.group(1)) - if level > 3: # 只处理到3级标题 + if level < 1 or level > MAX_HEADING_LEVEL: continue - # 提取标题文本和编号 - title_text = para.text.strip() - if not title_text: + raw_heading = para.text.strip() + if not raw_heading: continue - # 从标题中提取编号(如"2.1.3 标题" → "2_1_3") - number_match = re.match(r'^([\d\.]+)\s+(.+)$', title_text) - if number_match: - number_str = number_match.group(1).rstrip('.') - title = number_match.group(2) - chapter_id = f"chapter_{number_str.replace('.', '_')}" - else: - # 无编号,使用索引 - chapter_id = f"chapter_{len(chapters) + 1}" - title = title_text + heading_number, title = self._split_heading_components(raw_heading) + + level_counters[level] += 1 + for deeper_level in list(level_counters.keys()): + if deeper_level > level: + level_counters[deeper_level] = 0 - # 确定父章节ID while parent_stack and parent_stack[-1]['level'] >= level: parent_stack.pop() parent_id = parent_stack[-1]['id'] if parent_stack else None + parent_path: List[str] = parent_stack[-1]['path'] if parent_stack else [] + + if heading_number: + id_path = heading_number.strip('.').split('.') + else: + segment = self._format_unlabeled_segment(level_counters[level]) + id_path = [*parent_path, segment] if parent_path else [segment] + + chapter_id = f"chapter_{'_'.join(id_path)}" - # 创建章节信息 chapter_info = { "id": chapter_id, "title": title, @@ -138,17 +163,58 @@ class InitConfigNode(BaseNode): "requirements": "", "score": 0, "parent_id": parent_id, + "raw_heading": raw_heading, + "normalized_title": self._normalize_heading_text(title), + "heading_number": heading_number, + "order_index": level_counters[level], + "style_name": style_name, } chapters.append(chapter_info) + parent_stack.append({'id': chapter_id, 'level': level, 'path': id_path}) - # 将当前章节加入栈 - parent_stack.append({'id': chapter_id, 'level': level}) - - logger.info(f"从Word文档读取到 {len(chapters)} 个章节") + sample = ', '.join(f"{ch['id']}:{ch['title']}" for ch in chapters[:5]) or "N/A" + logger.info( + "从Word文档读取到 %s 个章节,样例: %s", + len(chapters), + sample, + ) return chapters + @staticmethod + def _normalize_heading_text(text: str) -> str: + """标准化标题文本,消除匹配差异""" + + if not text: + return "" + + normalized = unicodedata.normalize("NFKC", text) + normalized = normalized.strip() + normalized = re.sub(r"\s+", "", normalized) + return normalized.lower() + + def _split_heading_components(self, raw_heading: str) -> tuple[Optional[str], str]: + """拆分原始标题,返回(编号, 标题)""" + + match = re.match(r"^(\d+(?:\.\d+)*)\s+(.+)$", raw_heading) + if match: + number = match.group(1).rstrip('.') + title = match.group(2).strip() + return number, title or raw_heading + + return None, raw_heading + + def _format_unlabeled_segment(self, order_index: int) -> str: + """将序号转换为字母序列,用于无编号标题ID""" + + index = max(order_index, 1) + chars: List[str] = [] + while index > 0: + index, remainder = divmod(index - 1, 26) + chars.append(chr(ord('a') + remainder)) + return ''.join(reversed(chars)) + def _build_depth_first_queue(self, chapters: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """深度优先遍历生成章节队列 @@ -171,20 +237,6 @@ class InitConfigNode(BaseNode): return queue - def _extract_chapter_numbers(self, chapter_id: str) -> List[int]: - """从章节ID提取数字编号用于排序 - - Args: - chapter_id: 章节ID,如 "chapter_1_2_3" - - Returns: - 数字列表,如 [1, 2, 3] - """ - import re - # 提取所有数字 - numbers = re.findall(r'\d+', chapter_id) - return [int(n) for n in numbers] - def _dfs_traverse( self, chapter: Dict[str, Any], @@ -202,12 +254,9 @@ class InitConfigNode(BaseNode): """ queue.append(chapter) - # 查找子章节 chapter_id = chapter["id"] children = [ch for ch in all_chapters if ch.get("parent_id") == chapter_id] - - # 按数字编号排序确保正确顺序(修复:避免字符串排序导致 1_10 < 1_2) - children.sort(key=lambda x: self._extract_chapter_numbers(x["id"])) + children.sort(key=lambda x: x.get("order_index", 0)) # 递归遍历子章节 for child in children: @@ -247,3 +296,193 @@ class InitConfigNode(BaseNode): logger.info(f"预展开配置继承链,共{len(expanded)}个章节") return expanded + def _load_chapter_metadata(self, word_path: Path) -> Dict[str, Dict[str, Any]]: + """读取项目目录下的章节元数据(可选)""" + + project_dir = word_path.parent + metadata: Dict[str, Dict[str, Any]] = {} + + analysis_file = project_dir / "analysis_result.json" + tasks_file = project_dir / "tasks.json" + + if analysis_file.exists(): + metadata = self._parse_analysis_metadata(analysis_file) + if metadata: + logger.info("已加载 analysis_result.json 评分元数据") + return metadata + + if tasks_file.exists(): + metadata = self._parse_tasks_metadata(tasks_file) + if metadata: + logger.info("已加载 tasks.json 评分元数据") + + return metadata + + def _parse_analysis_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]: + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception as exc: + logger.warning("读取 %s 失败: %s", file_path, exc) + return {} + + chapter_titles: Dict[str, str] = {} + + def _collect_titles(nodes: List[Dict[str, Any]] | None) -> None: + if not nodes: + return + for node in nodes: + chapter_id = node.get("id") + title = node.get("title", "") + if chapter_id and title: + chapter_titles[chapter_id] = title + _collect_titles(node.get("children")) + + _collect_titles(data.get("chapters", [])) + + metadata: Dict[str, Dict[str, Any]] = {} + criteria = (data.get("technical_criteria") or []) + (data.get("commercial_criteria") or []) + for item in criteria: + chapter_id = item.get("chapter_id") + title = chapter_titles.get(chapter_id) or item.get("item_name") or "" + normalized = self._normalize_heading_text(title) + if not normalized: + continue + + entry = metadata.setdefault( + normalized, + { + "title": title, + "score": None, + "category": None, + "requirements": [], + "rubric_points": [], + "source": "analysis_result", + "chapter_id_source": chapter_id, + }, + ) + + if item.get("max_score") is not None: + entry["score"] = item["max_score"] + + if item.get("category"): + entry["category"] = item["category"] + + description = (item.get("description") or "").strip() + if description: + entry["requirements"].append(description) + entry["rubric_points"].extend(self._split_rubric_points(description)) + + return self._finalize_metadata_entries(metadata) + + def _parse_tasks_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]: + try: + with open(file_path, "r", encoding="utf-8") as f: + tasks = json.load(f) + except Exception as exc: + logger.warning("读取 %s 失败: %s", file_path, exc) + return {} + + metadata: Dict[str, Dict[str, Any]] = {} + for task in tasks: + title = task.get("title", "") + normalized = self._normalize_heading_text(title) + if not normalized: + continue + + entry = metadata.setdefault( + normalized, + { + "title": title, + "score": task.get("score"), + "category": task.get("category"), + "requirements": [], + "rubric_points": [], + "source": "tasks", + "chapter_id_source": task.get("chapter_id"), + }, + ) + + description = (task.get("description") or "").strip() + if description: + entry["requirements"].append(description) + entry["rubric_points"].extend(self._split_rubric_points(description)) + + return self._finalize_metadata_entries(metadata) + + def _split_rubric_points(self, text: str) -> List[str]: + if not text: + return [] + + parts = re.split(r"[\n;;。]+", text) + clean_parts = [p.strip().strip("::·•") for p in parts if len(p.strip()) >= 3] + return clean_parts + + def _finalize_metadata_entries(self, metadata: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + finalized: Dict[str, Dict[str, Any]] = {} + for key, entry in metadata.items(): + requirements_list = entry.get("requirements") or [] + dedup_reqs = [] + for req in requirements_list: + if req and req not in dedup_reqs: + dedup_reqs.append(req) + + rubric_points = entry.get("rubric_points") or [] + dedup_points: List[str] = [] + for point in rubric_points: + if point and point not in dedup_points: + dedup_points.append(point) + + finalized[key] = { + "title": entry.get("title", ""), + "score": entry.get("score"), + "category": entry.get("category"), + "requirements": "\n".join(dedup_reqs).strip(), + "rubric_points": dedup_points, + "source": entry.get("source"), + "chapter_id_source": entry.get("chapter_id_source"), + } + + return finalized + + def _apply_metadata_to_chapters( + self, + chapters: List[Dict[str, Any]], + metadata_lookup: Dict[str, Dict[str, Any]], + ) -> Dict[str, Dict[str, Any]]: + if not metadata_lookup: + return {} + + chapter_metadata: Dict[str, Dict[str, Any]] = {} + + for chapter in chapters: + normalized = chapter.get("normalized_title") + meta = metadata_lookup.get(normalized) + + if not meta and chapter.get("raw_heading"): + meta = metadata_lookup.get(self._normalize_heading_text(chapter["raw_heading"])) + + if not meta: + continue + + enriched = { + "title": chapter.get("title"), + "score": meta.get("score"), + "category": meta.get("category"), + "requirements": meta.get("requirements", ""), + "rubric_points": meta.get("rubric_points", []), + "source": meta.get("source"), + "chapter_id_source": meta.get("chapter_id_source"), + } + + chapter["requirements"] = enriched["requirements"] + if enriched["score"] is not None: + chapter["score"] = enriched["score"] + if enriched["category"]: + chapter["category"] = enriched["category"] + + chapter_metadata[chapter["id"]] = enriched + + logger.info("已为 %s 个章节注入评分元数据", len(chapter_metadata)) + return chapter_metadata + diff --git a/src/bidmaster/nodes/content/interact_user.py b/src/bidmaster/nodes/content/interact_user.py index 7a3ce12..e5ecd9a 100644 --- a/src/bidmaster/nodes/content/interact_user.py +++ b/src/bidmaster/nodes/content/interact_user.py @@ -75,7 +75,7 @@ class InteractWithUserNode(BaseNode): interaction_type="choice", prompt="是否使用RAG知识库辅助生成内容", options=["是", "否"], - default="是", + default="否", key=f"use_rag_{chapter_id}", ) diff --git a/src/bidmaster/tools/rag.py b/src/bidmaster/tools/rag.py index ae3b7be..1cb052e 100644 --- a/src/bidmaster/tools/rag.py +++ b/src/bidmaster/tools/rag.py @@ -209,7 +209,6 @@ class RAGTool: # 从上下文中提取任务信息 task_title = context.get('title', '任务') - task_requirements = context.get('requirements', '') emphasis = context.get('emphasis', '') rag_context = context.get('rag_context', '') @@ -217,20 +216,29 @@ class RAGTool: emphasis_part = f'\n特别强调:{emphasis}' if emphasis else '' rag_part = f'\n\n参考资料:\n{rag_context}' if rag_context else '' + prompt_variables = { + "title": task_title, + "chapter_path": context.get('chapter_path', task_title), + "score_info": context.get('score_info', '目标得分:未明确'), + "requirements_summary": context.get('requirements_summary', ''), + "rubric_points": context.get('rubric_points', '- 无明确评分要点'), + "objectives": context.get('objectives', '1. 围绕章节主题输出详实内容'), + "consistency_rules": context.get('consistency_rules', '1. 保持章节语气与格式一致'), + "context_summary": context.get('context_summary', '(暂无可引用的上下文)'), + "emphasis_part": emphasis_part, + "rag_part": rag_part, + } + # 从配置获取提示词 prompt_manager = get_prompt_manager() - prompt = prompt_manager.get_content_prompt( - "generate_with_rag", - title=task_title, - requirements=task_requirements, - emphasis_part=emphasis_part, - rag_part=rag_part - ) + prompt = prompt_manager.get_content_prompt("generate_with_rag", **prompt_variables) # 调用LLM生成 client = OpenAI( api_key=self.settings.api_key, - base_url=self.settings.base_url + base_url=self.settings.base_url, + timeout=180, + max_retries=2, ) response = client.chat.completions.create( diff --git a/src/bidmaster/utils/prompt_planner.py b/src/bidmaster/utils/prompt_planner.py new file mode 100644 index 0000000..671ffb5 --- /dev/null +++ b/src/bidmaster/utils/prompt_planner.py @@ -0,0 +1,199 @@ +"""章节提示词规划器 + +根据章节上下文、评分元数据和用户配置构建结构化写作brief。 +""" + +from __future__ import annotations + +import textwrap +from typing import Any, Dict, List, Optional + + +class PromptPlanner: + """构建章节写作brief的工具""" + + def __init__(self, state: Dict[str, Any]): + self.state = state + self.chapter_queue: List[Dict[str, Any]] = state.get("chapter_queue", []) + self.chapter_map: Dict[str, Dict[str, Any]] = state.get("chapter_map") or { + ch["id"]: ch for ch in self.chapter_queue + } + self.chapter_children_map: Dict[str, List[str]] = state.get("chapter_children_map", {}) + self.chapter_metadata: Dict[str, Dict[str, Any]] = state.get("chapter_metadata", {}) + self.chapter_configs: Dict[str, Dict[str, Any]] = state.get("chapter_configs", {}) + self.expanded_configs: Dict[str, str] = state.get("expanded_configs", {}) + self.generated_contents: Dict[str, str] = state.get("generated_contents", {}) + + # 回写以便后续节点直接使用 + state.setdefault("chapter_map", self.chapter_map) + + def build_prompt_spec(self, chapter: Dict[str, Any]) -> Dict[str, Any]: + chapter_id = chapter["id"] + metadata = self.chapter_metadata.get(chapter_id) + + requirements = ( + (metadata or {}).get("requirements") + or chapter.get("requirements") + or self._build_child_outline(chapter_id) + or "" + ) + rubric_points = (metadata or {}).get("rubric_points") or self._split_requirements(requirements) + score_info = self._format_score_info(chapter, metadata) + emphasis = self._resolve_emphasis(chapter_id) + chapter_path = self._build_chapter_path(chapter) + parent_context = self._collect_parent_context(chapter) + sibling_outline = self._collect_sibling_outline(chapter) + + objectives = self._build_objectives(requirements, metadata, emphasis) + consistency_rules = self._build_consistency_rules(chapter_path, parent_context) + + context_parts: List[str] = [] + if parent_context: + context_parts.append(f"父章节摘要:{parent_context}") + if sibling_outline: + context_parts.append(f"同级章节定位:{sibling_outline}") + context_summary = "\n".join(context_parts) or "(暂无可引用的上文,可直接围绕本章节展开)" + + spec = { + "title": chapter.get("title", chapter_id), + "chapter_path": chapter_path, + "score_info": score_info, + "requirements_summary": requirements or "该章节未提供评分描述,需结合项目背景补足内容。", + "rubric_points": self._format_bullet_block(rubric_points, "- 无明确评分要点,仍需提供详实内容"), + "objectives": self._format_ordered_block(objectives, "1. 围绕章节主题提供专业、详实且可执行的方案描述"), + "consistency_rules": self._format_ordered_block( + consistency_rules, + "1. 语言保持正式且与全篇一致,不得创建新的章/节标题", + ), + "context_summary": context_summary, + "emphasis": emphasis or "", + "category": (metadata or {}).get("category"), + } + + return spec + + def _build_chapter_path(self, chapter: Dict[str, Any]) -> str: + segments: List[str] = [] + current = chapter + while current: + title = current.get("title") or current.get("raw_heading") or current.get("id") + if title: + segments.append(title) + parent_id = current.get("parent_id") + current = self.chapter_map.get(parent_id) + return " > ".join(reversed(segments)) + + def _resolve_emphasis(self, chapter_id: str) -> str: + config = self.chapter_configs.get(chapter_id) + if config and config.get("emphasis"): + return config["emphasis"].strip() + + source_id = self.expanded_configs.get(chapter_id) + if source_id: + parent_config = self.chapter_configs.get(source_id) + if parent_config and parent_config.get("emphasis"): + return parent_config["emphasis"].strip() + + return "" + + def _collect_parent_context(self, chapter: Dict[str, Any]) -> str: + parent_id = chapter.get("parent_id") + if not parent_id: + return "" + parent_content = self.generated_contents.get(parent_id, "") + return self._truncate(parent_content, 420) + + def _collect_sibling_outline(self, chapter: Dict[str, Any]) -> str: + parent_id = chapter.get("parent_id") + if not parent_id: + return "" + siblings = self.chapter_children_map.get(parent_id, []) + titles = [self.chapter_map[sid]["title"] for sid in siblings if sid != chapter["id"] and sid in self.chapter_map] + if not titles: + return "" + preview = "、".join(titles[:4]) + return preview + + def _build_child_outline(self, chapter_id: str) -> str: + child_ids = self.chapter_children_map.get(chapter_id, []) + titles = [self.chapter_map[ch_id]["title"] for ch_id in child_ids if ch_id in self.chapter_map] + if not titles: + return "" + return ";".join(titles[:6]) + + def _build_objectives( + self, + requirements: str, + metadata: Optional[Dict[str, Any]], + emphasis: str, + ) -> List[str]: + objectives: List[str] = [] + if requirements: + objectives.append(f"完整覆盖评分描述:{self._truncate(requirements, 160)}") + + score = (metadata or {}).get("score") + if score: + objectives.append(f"明确呈现可支撑 {score} 分评价的量化亮点") + + if emphasis: + objectives.append(f"突出用户强调内容:{emphasis}") + + base_objective = "围绕章节主题提供结构化、工程化的内容" + if base_objective not in objectives: + objectives.append(base_objective) + + return objectives + + def _build_consistency_rules(self, chapter_path: str, parent_context: str) -> List[str]: + rules = [ + f"严格聚焦章节路径 {chapter_path},不得跨章节展开", + "禁止创建新的章/节级标题,仅可使用段落或加粗语句", + "语言保持正式、数据化、工程化,避免模板化口号", + ] + + if parent_context: + rules.append("与父章节内容保持逻辑衔接,避免信息冲突") + + return rules + + def _format_score_info(self, chapter: Dict[str, Any], metadata: Optional[Dict[str, Any]]) -> str: + score = (metadata or {}).get("score") + if not score: + chapter_score = chapter.get("score") + if chapter_score: + score = chapter_score + + category = (metadata or {}).get("category") + + if score: + info = f"目标得分:{score}分" + else: + info = "目标得分:未明确,但需保持与整体方案一致" + + if category: + info = f"{info}(类别:{category})" + + return info + + def _split_requirements(self, requirements: str) -> List[str]: + if not requirements: + return [] + raw = [seg.strip() for seg in requirements.replace("\r", "").split("\n") if seg.strip()] + return raw + + def _format_bullet_block(self, items: List[str], fallback: str) -> str: + valid = [item for item in items if item] + if not valid: + return fallback + return "\n".join(f"- {item}" for item in valid[:8]) + + def _format_ordered_block(self, items: List[str], fallback: str) -> str: + if not items: + return fallback + return "\n".join(f"{idx}. {item}" for idx, item in enumerate(items[:8], start=1)) + + def _truncate(self, text: str, limit: int) -> str: + if not text: + return "" + cleaned = " ".join(text.split()) + return textwrap.shorten(cleaned, width=limit, placeholder="...") diff --git a/tests/unit/test_init_config.py b/tests/unit/test_init_config.py new file mode 100644 index 0000000..25f597f --- /dev/null +++ b/tests/unit/test_init_config.py @@ -0,0 +1,68 @@ +from pathlib import Path + +from docx import Document + +from bidmaster.nodes.content.init_config import InitConfigNode + + +def _create_deep_heading_doc(path: Path) -> None: + doc = Document() + doc.add_heading("8 服务方案", level=1) + doc.add_heading("8.1 大南湖七矿VR智能培训系统及智能体感设备设计方案", level=2) + doc.add_heading("8.1.1 VR智能培训中心", level=3) + doc.add_heading("8.1.1.1 煤矿典型事故案例VR教学系统", level=4) + doc.add_heading("煤矿重大灾害VR虚拟体验与逃生系统", level=5) + doc.save(path) + + +def test_load_from_word_document_handles_deep_headings(tmp_path) -> None: + doc_path = tmp_path / "deep.docx" + _create_deep_heading_doc(doc_path) + + node = InitConfigNode() + chapters = node._load_from_word_document(str(doc_path)) + + assert len(chapters) == 5 + assert chapters[0]["id"] == "chapter_8" + assert chapters[1]["id"] == "chapter_8_1" + assert chapters[2]["parent_id"] == chapters[1]["id"] + + deepest = chapters[-1] + assert deepest["level"] == 5 + assert deepest["heading_number"] is None + assert deepest["id"] == "chapter_8_1_1_1_a" + assert deepest["parent_id"] == chapters[-2]["id"] + + +def test_apply_metadata_to_chapters_enriches_requirements() -> None: + node = InitConfigNode() + title = "1.1 技术方案-基本要求 (3.0分)" + normalized = node._normalize_heading_text(title) + chapters = [ + { + "id": "chapter_1_1", + "title": title, + "raw_heading": title, + "normalized_title": normalized, + "requirements": "", + "score": 0, + } + ] + + metadata_lookup = { + normalized: { + "title": title, + "score": 3.0, + "category": "technical_solution", + "requirements": "需覆盖总体架构、性能指标与技术栈选择。", + "rubric_points": ["总体架构完整", "性能指标可量化"], + "source": "analysis_result", + "chapter_id_source": "tech_solution_01_01", + } + } + + enriched = node._apply_metadata_to_chapters(chapters, metadata_lookup) + + assert chapters[0]["requirements"].startswith("需覆盖") + assert chapters[0]["score"] == 3.0 + assert enriched["chapter_1_1"]["rubric_points"][0] == "总体架构完整" diff --git a/tests/unit/test_prompt_planner.py b/tests/unit/test_prompt_planner.py new file mode 100644 index 0000000..05890dc --- /dev/null +++ b/tests/unit/test_prompt_planner.py @@ -0,0 +1,56 @@ +from bidmaster.utils.prompt_planner import PromptPlanner + + +def test_prompt_planner_uses_metadata_and_parent_context() -> None: + parent = {"id": "chapter_1", "title": "1. 总体技术方案", "level": 1} + child = { + "id": "chapter_1_1", + "title": "1.1 技术方案-基本要求 (3.0分)", + "level": 2, + "parent_id": "chapter_1", + } + + state = { + "chapter_queue": [parent, child], + "chapter_metadata": { + "chapter_1_1": { + "title": child["title"], + "score": 3.0, + "category": "technical_solution", + "requirements": "方案需覆盖总体架构;提供性能指标;描述安全策略。", + "rubric_points": ["总体架构完整", "性能指标量化"], + } + }, + "chapter_children_map": {"chapter_1": ["chapter_1_1"]}, + "generated_contents": {"chapter_1": "本章节描述总体设计与安全架构要点。"}, + "chapter_configs": {"chapter_1": {"emphasis": "突出安全策略"}}, + "expanded_configs": {"chapter_1": "chapter_1", "chapter_1_1": "chapter_1"}, + } + + planner = PromptPlanner(state) + spec = planner.build_prompt_spec(child) + + assert "总体架构完整" in spec["rubric_points"] + assert "父章节摘要" in spec["context_summary"] + assert "突出安全策略" in spec["objectives"] + + +def test_prompt_planner_fallback_without_metadata() -> None: + root = {"id": "chapter_root", "title": "1. 服务方案", "level": 1} + child = { + "id": "chapter_child", + "title": "1.1 现场服务体系", + "level": 2, + "parent_id": "chapter_root", + } + + state = { + "chapter_queue": [root, child], + "chapter_children_map": {"chapter_root": ["chapter_child"]}, + } + + planner = PromptPlanner(state) + spec = planner.build_prompt_spec(root) + + assert "1.1 现场服务体系" in spec["requirements_summary"] + assert "围绕章节主题" in spec["objectives"]