feat: enhance chapter prompt planning
Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
parent
d887e7c313
commit
6f785c9f2c
@ -265,13 +265,30 @@ content_prompts:
|
||||
generate_with_rag: |
|
||||
你是一个专业的标书撰写助手。请根据以下信息生成标书章节内容:
|
||||
|
||||
章节标题:{title}
|
||||
评分要求:{requirements}{emphasis_part}{rag_part}
|
||||
当前章节: 《{title}》
|
||||
章节定位: {chapter_path}
|
||||
分值关注: {score_info}
|
||||
评分要点:
|
||||
{rubric_points}
|
||||
|
||||
写作目标:
|
||||
{objectives}
|
||||
|
||||
一致性约束:
|
||||
{consistency_rules}
|
||||
|
||||
上下文参考:
|
||||
{context_summary}
|
||||
|
||||
评分说明:
|
||||
{requirements_summary}{emphasis_part}{rag_part}
|
||||
|
||||
要求:
|
||||
1. 内容专业、详实,符合招标文件要求
|
||||
2. 突出技术优势和实施能力
|
||||
3. 语言正式、逻辑清晰
|
||||
4. 字数控制在500-800字
|
||||
5. 严禁新增任何章/节级标题或“商务条款、技术偏差、响应情况”等模板段,如需结构化仅使用普通段落或加粗语句
|
||||
6. 开头不得出现“经认真研读招标文件要求”“偏差说明如下”等跨章节套话,内容必须围绕《{title}》本身展开
|
||||
|
||||
请直接输出章节内容,不要包含章节标题。
|
||||
|
||||
@ -8,6 +8,7 @@ from typing import Any, Dict, Optional
|
||||
|
||||
from ..base import BaseNode, NodeContext
|
||||
from ...config.settings import get_settings
|
||||
from ...utils.prompt_planner import PromptPlanner
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
@ -51,6 +52,7 @@ class GenerateContentNode(BaseNode):
|
||||
|
||||
# 获取章节配置(继承父章节)
|
||||
config = self._get_chapter_config(state, chapter)
|
||||
planner = PromptPlanner(state)
|
||||
|
||||
# 查找当前章节的所有子标题
|
||||
sub_chapters = self._find_sub_chapters(state, chapter_id, max_level=settings.max_sub_chapter_level)
|
||||
@ -60,12 +62,12 @@ class GenerateContentNode(BaseNode):
|
||||
logger.info(f"章节 {chapter_id} 包含 {len(sub_chapters)} 个子标题,逐个生成")
|
||||
content_parts = []
|
||||
for sub in sub_chapters:
|
||||
sub_content = self._generate_with_rag(sub, config, state)
|
||||
sub_content = self._generate_with_rag(sub, config, state, planner)
|
||||
content_parts.append(f"## {sub['title']}\n\n{sub_content}")
|
||||
content = "\n\n".join(content_parts)
|
||||
else:
|
||||
# 无子标题:直接生成
|
||||
content = self._generate_with_rag(chapter, config, state)
|
||||
content = self._generate_with_rag(chapter, config, state, planner)
|
||||
|
||||
# 保存生成结果
|
||||
state.setdefault("generated_contents", {})[chapter_id] = content
|
||||
@ -167,7 +169,11 @@ class GenerateContentNode(BaseNode):
|
||||
return sub_chapters
|
||||
|
||||
def _generate_with_rag(
|
||||
self, chapter: Dict[str, Any], config: Dict[str, Any], state: Dict[str, Any]
|
||||
self,
|
||||
chapter: Dict[str, Any],
|
||||
config: Dict[str, Any],
|
||||
state: Dict[str, Any],
|
||||
planner: PromptPlanner,
|
||||
) -> str:
|
||||
"""使用RAG生成内容
|
||||
|
||||
@ -184,18 +190,19 @@ class GenerateContentNode(BaseNode):
|
||||
if not rag_tool:
|
||||
raise ValueError("RAGTool未初始化,请检查InitConfigNode配置")
|
||||
|
||||
# 构建生成上下文
|
||||
generation_context = {
|
||||
"title": chapter["title"],
|
||||
"level": chapter["level"],
|
||||
"requirements": chapter.get("requirements", ""),
|
||||
"emphasis": config.get("emphasis", ""),
|
||||
}
|
||||
prompt_spec = planner.build_prompt_spec(chapter)
|
||||
generation_context = dict(prompt_spec)
|
||||
|
||||
# 如果启用RAG,添加上下文信息
|
||||
if config.get("rag_enabled"):
|
||||
# 检索相关内容
|
||||
query = f"{chapter['title']} {config.get('emphasis', '')}"
|
||||
query_fragments = [chapter["title"]]
|
||||
if prompt_spec.get("emphasis"):
|
||||
query_fragments.append(prompt_spec["emphasis"])
|
||||
if prompt_spec.get("requirements_summary"):
|
||||
query_fragments.append(prompt_spec["requirements_summary"][:120])
|
||||
|
||||
query = " ".join(fragment for fragment in query_fragments if fragment)
|
||||
search_results = rag_tool.search(query, k=settings.rag_search_top_k)
|
||||
|
||||
if search_results:
|
||||
@ -208,11 +215,6 @@ class GenerateContentNode(BaseNode):
|
||||
else:
|
||||
generation_context["rag_context"] = ""
|
||||
|
||||
# 添加父章节上下文
|
||||
parent_context = state.get("last_generated_content", "")
|
||||
if parent_context and chapter["level"] > 1:
|
||||
generation_context["parent_context"] = parent_context[:settings.parent_context_length]
|
||||
|
||||
# 调用生成方法
|
||||
try:
|
||||
content = rag_tool.generate_content(chapter["id"], generation_context)
|
||||
|
||||
@ -5,13 +5,18 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ..base import BaseNode, NodeContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_HEADING_LEVEL = 9
|
||||
|
||||
|
||||
class InitConfigNode(BaseNode):
|
||||
"""初始化配置节点
|
||||
@ -52,10 +57,28 @@ class InitConfigNode(BaseNode):
|
||||
# 直接从Word文档读取章节结构
|
||||
chapters = self._load_from_word_document(word_file)
|
||||
|
||||
# 可选注入评分元数据
|
||||
metadata_lookup = self._load_chapter_metadata(Path(word_file))
|
||||
chapter_metadata = self._apply_metadata_to_chapters(chapters, metadata_lookup)
|
||||
|
||||
# 深度优先遍历生成队列
|
||||
chapter_queue = self._build_depth_first_queue(chapters)
|
||||
|
||||
logger.info(f"生成章节队列,共{len(chapter_queue)}个章节")
|
||||
chapter_map = {ch["id"]: ch for ch in chapters}
|
||||
children_map: defaultdict[str, List[str]] = defaultdict(list)
|
||||
for ch in chapters:
|
||||
parent_id = ch.get("parent_id")
|
||||
if parent_id:
|
||||
children_map[parent_id].append(ch["id"])
|
||||
|
||||
preview = ", ".join(
|
||||
f"{ch['id']}:{ch['title']}" for ch in chapter_queue[:5]
|
||||
) or "N/A"
|
||||
logger.info(
|
||||
"章节队列已生成,共 %s 个章节,预览: %s",
|
||||
len(chapter_queue),
|
||||
preview,
|
||||
)
|
||||
|
||||
# 初始化RAGTool实例(单例,全局共享)
|
||||
from ...tools.rag import RAGTool
|
||||
@ -67,8 +90,11 @@ class InitConfigNode(BaseNode):
|
||||
return self._update_state(
|
||||
state,
|
||||
chapter_queue=chapter_queue,
|
||||
chapter_map=chapter_map,
|
||||
chapter_children_map=dict(children_map),
|
||||
chapter_configs={},
|
||||
expanded_configs=expanded_configs, # 预展开的配置继承映射
|
||||
chapter_metadata=chapter_metadata,
|
||||
generated_contents={},
|
||||
completed_chapters=[],
|
||||
current_chapter_index=0,
|
||||
@ -85,51 +111,50 @@ class InitConfigNode(BaseNode):
|
||||
章节列表
|
||||
"""
|
||||
from docx import Document
|
||||
import re
|
||||
|
||||
doc = Document(word_file)
|
||||
chapters = []
|
||||
parent_stack = [] # 栈:存储各层级的父章节ID
|
||||
chapters: List[Dict[str, Any]] = []
|
||||
parent_stack: List[Dict[str, Any]] = []
|
||||
level_counters: defaultdict[int, int] = defaultdict(int)
|
||||
|
||||
for para in doc.paragraphs:
|
||||
style_name = para.style.name
|
||||
|
||||
# 只处理Heading 1/2/3
|
||||
style_name = getattr(para.style, 'name', '') or ''
|
||||
if not style_name.startswith('Heading'):
|
||||
continue
|
||||
|
||||
# 提取层级
|
||||
level_match = re.match(r'Heading\s+(\d+)', style_name)
|
||||
if not level_match:
|
||||
continue
|
||||
|
||||
level = int(level_match.group(1))
|
||||
if level > 3: # 只处理到3级标题
|
||||
if level < 1 or level > MAX_HEADING_LEVEL:
|
||||
continue
|
||||
|
||||
# 提取标题文本和编号
|
||||
title_text = para.text.strip()
|
||||
if not title_text:
|
||||
raw_heading = para.text.strip()
|
||||
if not raw_heading:
|
||||
continue
|
||||
|
||||
# 从标题中提取编号(如"2.1.3 标题" → "2_1_3")
|
||||
number_match = re.match(r'^([\d\.]+)\s+(.+)$', title_text)
|
||||
if number_match:
|
||||
number_str = number_match.group(1).rstrip('.')
|
||||
title = number_match.group(2)
|
||||
chapter_id = f"chapter_{number_str.replace('.', '_')}"
|
||||
else:
|
||||
# 无编号,使用索引
|
||||
chapter_id = f"chapter_{len(chapters) + 1}"
|
||||
title = title_text
|
||||
heading_number, title = self._split_heading_components(raw_heading)
|
||||
|
||||
level_counters[level] += 1
|
||||
for deeper_level in list(level_counters.keys()):
|
||||
if deeper_level > level:
|
||||
level_counters[deeper_level] = 0
|
||||
|
||||
# 确定父章节ID
|
||||
while parent_stack and parent_stack[-1]['level'] >= level:
|
||||
parent_stack.pop()
|
||||
|
||||
parent_id = parent_stack[-1]['id'] if parent_stack else None
|
||||
parent_path: List[str] = parent_stack[-1]['path'] if parent_stack else []
|
||||
|
||||
if heading_number:
|
||||
id_path = heading_number.strip('.').split('.')
|
||||
else:
|
||||
segment = self._format_unlabeled_segment(level_counters[level])
|
||||
id_path = [*parent_path, segment] if parent_path else [segment]
|
||||
|
||||
chapter_id = f"chapter_{'_'.join(id_path)}"
|
||||
|
||||
# 创建章节信息
|
||||
chapter_info = {
|
||||
"id": chapter_id,
|
||||
"title": title,
|
||||
@ -138,17 +163,58 @@ class InitConfigNode(BaseNode):
|
||||
"requirements": "",
|
||||
"score": 0,
|
||||
"parent_id": parent_id,
|
||||
"raw_heading": raw_heading,
|
||||
"normalized_title": self._normalize_heading_text(title),
|
||||
"heading_number": heading_number,
|
||||
"order_index": level_counters[level],
|
||||
"style_name": style_name,
|
||||
}
|
||||
|
||||
chapters.append(chapter_info)
|
||||
parent_stack.append({'id': chapter_id, 'level': level, 'path': id_path})
|
||||
|
||||
# 将当前章节加入栈
|
||||
parent_stack.append({'id': chapter_id, 'level': level})
|
||||
|
||||
logger.info(f"从Word文档读取到 {len(chapters)} 个章节")
|
||||
sample = ', '.join(f"{ch['id']}:{ch['title']}" for ch in chapters[:5]) or "N/A"
|
||||
logger.info(
|
||||
"从Word文档读取到 %s 个章节,样例: %s",
|
||||
len(chapters),
|
||||
sample,
|
||||
)
|
||||
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _normalize_heading_text(text: str) -> str:
|
||||
"""标准化标题文本,消除匹配差异"""
|
||||
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
normalized = unicodedata.normalize("NFKC", text)
|
||||
normalized = normalized.strip()
|
||||
normalized = re.sub(r"\s+", "", normalized)
|
||||
return normalized.lower()
|
||||
|
||||
def _split_heading_components(self, raw_heading: str) -> tuple[Optional[str], str]:
|
||||
"""拆分原始标题,返回(编号, 标题)"""
|
||||
|
||||
match = re.match(r"^(\d+(?:\.\d+)*)\s+(.+)$", raw_heading)
|
||||
if match:
|
||||
number = match.group(1).rstrip('.')
|
||||
title = match.group(2).strip()
|
||||
return number, title or raw_heading
|
||||
|
||||
return None, raw_heading
|
||||
|
||||
def _format_unlabeled_segment(self, order_index: int) -> str:
|
||||
"""将序号转换为字母序列,用于无编号标题ID"""
|
||||
|
||||
index = max(order_index, 1)
|
||||
chars: List[str] = []
|
||||
while index > 0:
|
||||
index, remainder = divmod(index - 1, 26)
|
||||
chars.append(chr(ord('a') + remainder))
|
||||
return ''.join(reversed(chars))
|
||||
|
||||
def _build_depth_first_queue(self, chapters: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""深度优先遍历生成章节队列
|
||||
|
||||
@ -171,20 +237,6 @@ class InitConfigNode(BaseNode):
|
||||
|
||||
return queue
|
||||
|
||||
def _extract_chapter_numbers(self, chapter_id: str) -> List[int]:
|
||||
"""从章节ID提取数字编号用于排序
|
||||
|
||||
Args:
|
||||
chapter_id: 章节ID,如 "chapter_1_2_3"
|
||||
|
||||
Returns:
|
||||
数字列表,如 [1, 2, 3]
|
||||
"""
|
||||
import re
|
||||
# 提取所有数字
|
||||
numbers = re.findall(r'\d+', chapter_id)
|
||||
return [int(n) for n in numbers]
|
||||
|
||||
def _dfs_traverse(
|
||||
self,
|
||||
chapter: Dict[str, Any],
|
||||
@ -202,12 +254,9 @@ class InitConfigNode(BaseNode):
|
||||
"""
|
||||
queue.append(chapter)
|
||||
|
||||
# 查找子章节
|
||||
chapter_id = chapter["id"]
|
||||
children = [ch for ch in all_chapters if ch.get("parent_id") == chapter_id]
|
||||
|
||||
# 按数字编号排序确保正确顺序(修复:避免字符串排序导致 1_10 < 1_2)
|
||||
children.sort(key=lambda x: self._extract_chapter_numbers(x["id"]))
|
||||
children.sort(key=lambda x: x.get("order_index", 0))
|
||||
|
||||
# 递归遍历子章节
|
||||
for child in children:
|
||||
@ -247,3 +296,193 @@ class InitConfigNode(BaseNode):
|
||||
logger.info(f"预展开配置继承链,共{len(expanded)}个章节")
|
||||
return expanded
|
||||
|
||||
def _load_chapter_metadata(self, word_path: Path) -> Dict[str, Dict[str, Any]]:
|
||||
"""读取项目目录下的章节元数据(可选)"""
|
||||
|
||||
project_dir = word_path.parent
|
||||
metadata: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
analysis_file = project_dir / "analysis_result.json"
|
||||
tasks_file = project_dir / "tasks.json"
|
||||
|
||||
if analysis_file.exists():
|
||||
metadata = self._parse_analysis_metadata(analysis_file)
|
||||
if metadata:
|
||||
logger.info("已加载 analysis_result.json 评分元数据")
|
||||
return metadata
|
||||
|
||||
if tasks_file.exists():
|
||||
metadata = self._parse_tasks_metadata(tasks_file)
|
||||
if metadata:
|
||||
logger.info("已加载 tasks.json 评分元数据")
|
||||
|
||||
return metadata
|
||||
|
||||
def _parse_analysis_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]:
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except Exception as exc:
|
||||
logger.warning("读取 %s 失败: %s", file_path, exc)
|
||||
return {}
|
||||
|
||||
chapter_titles: Dict[str, str] = {}
|
||||
|
||||
def _collect_titles(nodes: List[Dict[str, Any]] | None) -> None:
|
||||
if not nodes:
|
||||
return
|
||||
for node in nodes:
|
||||
chapter_id = node.get("id")
|
||||
title = node.get("title", "")
|
||||
if chapter_id and title:
|
||||
chapter_titles[chapter_id] = title
|
||||
_collect_titles(node.get("children"))
|
||||
|
||||
_collect_titles(data.get("chapters", []))
|
||||
|
||||
metadata: Dict[str, Dict[str, Any]] = {}
|
||||
criteria = (data.get("technical_criteria") or []) + (data.get("commercial_criteria") or [])
|
||||
for item in criteria:
|
||||
chapter_id = item.get("chapter_id")
|
||||
title = chapter_titles.get(chapter_id) or item.get("item_name") or ""
|
||||
normalized = self._normalize_heading_text(title)
|
||||
if not normalized:
|
||||
continue
|
||||
|
||||
entry = metadata.setdefault(
|
||||
normalized,
|
||||
{
|
||||
"title": title,
|
||||
"score": None,
|
||||
"category": None,
|
||||
"requirements": [],
|
||||
"rubric_points": [],
|
||||
"source": "analysis_result",
|
||||
"chapter_id_source": chapter_id,
|
||||
},
|
||||
)
|
||||
|
||||
if item.get("max_score") is not None:
|
||||
entry["score"] = item["max_score"]
|
||||
|
||||
if item.get("category"):
|
||||
entry["category"] = item["category"]
|
||||
|
||||
description = (item.get("description") or "").strip()
|
||||
if description:
|
||||
entry["requirements"].append(description)
|
||||
entry["rubric_points"].extend(self._split_rubric_points(description))
|
||||
|
||||
return self._finalize_metadata_entries(metadata)
|
||||
|
||||
def _parse_tasks_metadata(self, file_path: Path) -> Dict[str, Dict[str, Any]]:
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
tasks = json.load(f)
|
||||
except Exception as exc:
|
||||
logger.warning("读取 %s 失败: %s", file_path, exc)
|
||||
return {}
|
||||
|
||||
metadata: Dict[str, Dict[str, Any]] = {}
|
||||
for task in tasks:
|
||||
title = task.get("title", "")
|
||||
normalized = self._normalize_heading_text(title)
|
||||
if not normalized:
|
||||
continue
|
||||
|
||||
entry = metadata.setdefault(
|
||||
normalized,
|
||||
{
|
||||
"title": title,
|
||||
"score": task.get("score"),
|
||||
"category": task.get("category"),
|
||||
"requirements": [],
|
||||
"rubric_points": [],
|
||||
"source": "tasks",
|
||||
"chapter_id_source": task.get("chapter_id"),
|
||||
},
|
||||
)
|
||||
|
||||
description = (task.get("description") or "").strip()
|
||||
if description:
|
||||
entry["requirements"].append(description)
|
||||
entry["rubric_points"].extend(self._split_rubric_points(description))
|
||||
|
||||
return self._finalize_metadata_entries(metadata)
|
||||
|
||||
def _split_rubric_points(self, text: str) -> List[str]:
|
||||
if not text:
|
||||
return []
|
||||
|
||||
parts = re.split(r"[\n;;。]+", text)
|
||||
clean_parts = [p.strip().strip("::·•") for p in parts if len(p.strip()) >= 3]
|
||||
return clean_parts
|
||||
|
||||
def _finalize_metadata_entries(self, metadata: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
|
||||
finalized: Dict[str, Dict[str, Any]] = {}
|
||||
for key, entry in metadata.items():
|
||||
requirements_list = entry.get("requirements") or []
|
||||
dedup_reqs = []
|
||||
for req in requirements_list:
|
||||
if req and req not in dedup_reqs:
|
||||
dedup_reqs.append(req)
|
||||
|
||||
rubric_points = entry.get("rubric_points") or []
|
||||
dedup_points: List[str] = []
|
||||
for point in rubric_points:
|
||||
if point and point not in dedup_points:
|
||||
dedup_points.append(point)
|
||||
|
||||
finalized[key] = {
|
||||
"title": entry.get("title", ""),
|
||||
"score": entry.get("score"),
|
||||
"category": entry.get("category"),
|
||||
"requirements": "\n".join(dedup_reqs).strip(),
|
||||
"rubric_points": dedup_points,
|
||||
"source": entry.get("source"),
|
||||
"chapter_id_source": entry.get("chapter_id_source"),
|
||||
}
|
||||
|
||||
return finalized
|
||||
|
||||
def _apply_metadata_to_chapters(
|
||||
self,
|
||||
chapters: List[Dict[str, Any]],
|
||||
metadata_lookup: Dict[str, Dict[str, Any]],
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
if not metadata_lookup:
|
||||
return {}
|
||||
|
||||
chapter_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for chapter in chapters:
|
||||
normalized = chapter.get("normalized_title")
|
||||
meta = metadata_lookup.get(normalized)
|
||||
|
||||
if not meta and chapter.get("raw_heading"):
|
||||
meta = metadata_lookup.get(self._normalize_heading_text(chapter["raw_heading"]))
|
||||
|
||||
if not meta:
|
||||
continue
|
||||
|
||||
enriched = {
|
||||
"title": chapter.get("title"),
|
||||
"score": meta.get("score"),
|
||||
"category": meta.get("category"),
|
||||
"requirements": meta.get("requirements", ""),
|
||||
"rubric_points": meta.get("rubric_points", []),
|
||||
"source": meta.get("source"),
|
||||
"chapter_id_source": meta.get("chapter_id_source"),
|
||||
}
|
||||
|
||||
chapter["requirements"] = enriched["requirements"]
|
||||
if enriched["score"] is not None:
|
||||
chapter["score"] = enriched["score"]
|
||||
if enriched["category"]:
|
||||
chapter["category"] = enriched["category"]
|
||||
|
||||
chapter_metadata[chapter["id"]] = enriched
|
||||
|
||||
logger.info("已为 %s 个章节注入评分元数据", len(chapter_metadata))
|
||||
return chapter_metadata
|
||||
|
||||
|
||||
@ -75,7 +75,7 @@ class InteractWithUserNode(BaseNode):
|
||||
interaction_type="choice",
|
||||
prompt="是否使用RAG知识库辅助生成内容",
|
||||
options=["是", "否"],
|
||||
default="是",
|
||||
default="否",
|
||||
key=f"use_rag_{chapter_id}",
|
||||
)
|
||||
|
||||
|
||||
@ -209,7 +209,6 @@ class RAGTool:
|
||||
|
||||
# 从上下文中提取任务信息
|
||||
task_title = context.get('title', '任务')
|
||||
task_requirements = context.get('requirements', '')
|
||||
emphasis = context.get('emphasis', '')
|
||||
rag_context = context.get('rag_context', '')
|
||||
|
||||
@ -217,20 +216,29 @@ class RAGTool:
|
||||
emphasis_part = f'\n特别强调:{emphasis}' if emphasis else ''
|
||||
rag_part = f'\n\n参考资料:\n{rag_context}' if rag_context else ''
|
||||
|
||||
prompt_variables = {
|
||||
"title": task_title,
|
||||
"chapter_path": context.get('chapter_path', task_title),
|
||||
"score_info": context.get('score_info', '目标得分:未明确'),
|
||||
"requirements_summary": context.get('requirements_summary', ''),
|
||||
"rubric_points": context.get('rubric_points', '- 无明确评分要点'),
|
||||
"objectives": context.get('objectives', '1. 围绕章节主题输出详实内容'),
|
||||
"consistency_rules": context.get('consistency_rules', '1. 保持章节语气与格式一致'),
|
||||
"context_summary": context.get('context_summary', '(暂无可引用的上下文)'),
|
||||
"emphasis_part": emphasis_part,
|
||||
"rag_part": rag_part,
|
||||
}
|
||||
|
||||
# 从配置获取提示词
|
||||
prompt_manager = get_prompt_manager()
|
||||
prompt = prompt_manager.get_content_prompt(
|
||||
"generate_with_rag",
|
||||
title=task_title,
|
||||
requirements=task_requirements,
|
||||
emphasis_part=emphasis_part,
|
||||
rag_part=rag_part
|
||||
)
|
||||
prompt = prompt_manager.get_content_prompt("generate_with_rag", **prompt_variables)
|
||||
|
||||
# 调用LLM生成
|
||||
client = OpenAI(
|
||||
api_key=self.settings.api_key,
|
||||
base_url=self.settings.base_url
|
||||
base_url=self.settings.base_url,
|
||||
timeout=180,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
|
||||
199
src/bidmaster/utils/prompt_planner.py
Normal file
199
src/bidmaster/utils/prompt_planner.py
Normal file
@ -0,0 +1,199 @@
|
||||
"""章节提示词规划器
|
||||
|
||||
根据章节上下文、评分元数据和用户配置构建结构化写作brief。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class PromptPlanner:
|
||||
"""构建章节写作brief的工具"""
|
||||
|
||||
def __init__(self, state: Dict[str, Any]):
|
||||
self.state = state
|
||||
self.chapter_queue: List[Dict[str, Any]] = state.get("chapter_queue", [])
|
||||
self.chapter_map: Dict[str, Dict[str, Any]] = state.get("chapter_map") or {
|
||||
ch["id"]: ch for ch in self.chapter_queue
|
||||
}
|
||||
self.chapter_children_map: Dict[str, List[str]] = state.get("chapter_children_map", {})
|
||||
self.chapter_metadata: Dict[str, Dict[str, Any]] = state.get("chapter_metadata", {})
|
||||
self.chapter_configs: Dict[str, Dict[str, Any]] = state.get("chapter_configs", {})
|
||||
self.expanded_configs: Dict[str, str] = state.get("expanded_configs", {})
|
||||
self.generated_contents: Dict[str, str] = state.get("generated_contents", {})
|
||||
|
||||
# 回写以便后续节点直接使用
|
||||
state.setdefault("chapter_map", self.chapter_map)
|
||||
|
||||
def build_prompt_spec(self, chapter: Dict[str, Any]) -> Dict[str, Any]:
|
||||
chapter_id = chapter["id"]
|
||||
metadata = self.chapter_metadata.get(chapter_id)
|
||||
|
||||
requirements = (
|
||||
(metadata or {}).get("requirements")
|
||||
or chapter.get("requirements")
|
||||
or self._build_child_outline(chapter_id)
|
||||
or ""
|
||||
)
|
||||
rubric_points = (metadata or {}).get("rubric_points") or self._split_requirements(requirements)
|
||||
score_info = self._format_score_info(chapter, metadata)
|
||||
emphasis = self._resolve_emphasis(chapter_id)
|
||||
chapter_path = self._build_chapter_path(chapter)
|
||||
parent_context = self._collect_parent_context(chapter)
|
||||
sibling_outline = self._collect_sibling_outline(chapter)
|
||||
|
||||
objectives = self._build_objectives(requirements, metadata, emphasis)
|
||||
consistency_rules = self._build_consistency_rules(chapter_path, parent_context)
|
||||
|
||||
context_parts: List[str] = []
|
||||
if parent_context:
|
||||
context_parts.append(f"父章节摘要:{parent_context}")
|
||||
if sibling_outline:
|
||||
context_parts.append(f"同级章节定位:{sibling_outline}")
|
||||
context_summary = "\n".join(context_parts) or "(暂无可引用的上文,可直接围绕本章节展开)"
|
||||
|
||||
spec = {
|
||||
"title": chapter.get("title", chapter_id),
|
||||
"chapter_path": chapter_path,
|
||||
"score_info": score_info,
|
||||
"requirements_summary": requirements or "该章节未提供评分描述,需结合项目背景补足内容。",
|
||||
"rubric_points": self._format_bullet_block(rubric_points, "- 无明确评分要点,仍需提供详实内容"),
|
||||
"objectives": self._format_ordered_block(objectives, "1. 围绕章节主题提供专业、详实且可执行的方案描述"),
|
||||
"consistency_rules": self._format_ordered_block(
|
||||
consistency_rules,
|
||||
"1. 语言保持正式且与全篇一致,不得创建新的章/节标题",
|
||||
),
|
||||
"context_summary": context_summary,
|
||||
"emphasis": emphasis or "",
|
||||
"category": (metadata or {}).get("category"),
|
||||
}
|
||||
|
||||
return spec
|
||||
|
||||
def _build_chapter_path(self, chapter: Dict[str, Any]) -> str:
|
||||
segments: List[str] = []
|
||||
current = chapter
|
||||
while current:
|
||||
title = current.get("title") or current.get("raw_heading") or current.get("id")
|
||||
if title:
|
||||
segments.append(title)
|
||||
parent_id = current.get("parent_id")
|
||||
current = self.chapter_map.get(parent_id)
|
||||
return " > ".join(reversed(segments))
|
||||
|
||||
def _resolve_emphasis(self, chapter_id: str) -> str:
|
||||
config = self.chapter_configs.get(chapter_id)
|
||||
if config and config.get("emphasis"):
|
||||
return config["emphasis"].strip()
|
||||
|
||||
source_id = self.expanded_configs.get(chapter_id)
|
||||
if source_id:
|
||||
parent_config = self.chapter_configs.get(source_id)
|
||||
if parent_config and parent_config.get("emphasis"):
|
||||
return parent_config["emphasis"].strip()
|
||||
|
||||
return ""
|
||||
|
||||
def _collect_parent_context(self, chapter: Dict[str, Any]) -> str:
|
||||
parent_id = chapter.get("parent_id")
|
||||
if not parent_id:
|
||||
return ""
|
||||
parent_content = self.generated_contents.get(parent_id, "")
|
||||
return self._truncate(parent_content, 420)
|
||||
|
||||
def _collect_sibling_outline(self, chapter: Dict[str, Any]) -> str:
|
||||
parent_id = chapter.get("parent_id")
|
||||
if not parent_id:
|
||||
return ""
|
||||
siblings = self.chapter_children_map.get(parent_id, [])
|
||||
titles = [self.chapter_map[sid]["title"] for sid in siblings if sid != chapter["id"] and sid in self.chapter_map]
|
||||
if not titles:
|
||||
return ""
|
||||
preview = "、".join(titles[:4])
|
||||
return preview
|
||||
|
||||
def _build_child_outline(self, chapter_id: str) -> str:
|
||||
child_ids = self.chapter_children_map.get(chapter_id, [])
|
||||
titles = [self.chapter_map[ch_id]["title"] for ch_id in child_ids if ch_id in self.chapter_map]
|
||||
if not titles:
|
||||
return ""
|
||||
return ";".join(titles[:6])
|
||||
|
||||
def _build_objectives(
|
||||
self,
|
||||
requirements: str,
|
||||
metadata: Optional[Dict[str, Any]],
|
||||
emphasis: str,
|
||||
) -> List[str]:
|
||||
objectives: List[str] = []
|
||||
if requirements:
|
||||
objectives.append(f"完整覆盖评分描述:{self._truncate(requirements, 160)}")
|
||||
|
||||
score = (metadata or {}).get("score")
|
||||
if score:
|
||||
objectives.append(f"明确呈现可支撑 {score} 分评价的量化亮点")
|
||||
|
||||
if emphasis:
|
||||
objectives.append(f"突出用户强调内容:{emphasis}")
|
||||
|
||||
base_objective = "围绕章节主题提供结构化、工程化的内容"
|
||||
if base_objective not in objectives:
|
||||
objectives.append(base_objective)
|
||||
|
||||
return objectives
|
||||
|
||||
def _build_consistency_rules(self, chapter_path: str, parent_context: str) -> List[str]:
|
||||
rules = [
|
||||
f"严格聚焦章节路径 {chapter_path},不得跨章节展开",
|
||||
"禁止创建新的章/节级标题,仅可使用段落或加粗语句",
|
||||
"语言保持正式、数据化、工程化,避免模板化口号",
|
||||
]
|
||||
|
||||
if parent_context:
|
||||
rules.append("与父章节内容保持逻辑衔接,避免信息冲突")
|
||||
|
||||
return rules
|
||||
|
||||
def _format_score_info(self, chapter: Dict[str, Any], metadata: Optional[Dict[str, Any]]) -> str:
|
||||
score = (metadata or {}).get("score")
|
||||
if not score:
|
||||
chapter_score = chapter.get("score")
|
||||
if chapter_score:
|
||||
score = chapter_score
|
||||
|
||||
category = (metadata or {}).get("category")
|
||||
|
||||
if score:
|
||||
info = f"目标得分:{score}分"
|
||||
else:
|
||||
info = "目标得分:未明确,但需保持与整体方案一致"
|
||||
|
||||
if category:
|
||||
info = f"{info}(类别:{category})"
|
||||
|
||||
return info
|
||||
|
||||
def _split_requirements(self, requirements: str) -> List[str]:
|
||||
if not requirements:
|
||||
return []
|
||||
raw = [seg.strip() for seg in requirements.replace("\r", "").split("\n") if seg.strip()]
|
||||
return raw
|
||||
|
||||
def _format_bullet_block(self, items: List[str], fallback: str) -> str:
|
||||
valid = [item for item in items if item]
|
||||
if not valid:
|
||||
return fallback
|
||||
return "\n".join(f"- {item}" for item in valid[:8])
|
||||
|
||||
def _format_ordered_block(self, items: List[str], fallback: str) -> str:
|
||||
if not items:
|
||||
return fallback
|
||||
return "\n".join(f"{idx}. {item}" for idx, item in enumerate(items[:8], start=1))
|
||||
|
||||
def _truncate(self, text: str, limit: int) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
cleaned = " ".join(text.split())
|
||||
return textwrap.shorten(cleaned, width=limit, placeholder="...")
|
||||
68
tests/unit/test_init_config.py
Normal file
68
tests/unit/test_init_config.py
Normal file
@ -0,0 +1,68 @@
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
|
||||
from bidmaster.nodes.content.init_config import InitConfigNode
|
||||
|
||||
|
||||
def _create_deep_heading_doc(path: Path) -> None:
|
||||
doc = Document()
|
||||
doc.add_heading("8 服务方案", level=1)
|
||||
doc.add_heading("8.1 大南湖七矿VR智能培训系统及智能体感设备设计方案", level=2)
|
||||
doc.add_heading("8.1.1 VR智能培训中心", level=3)
|
||||
doc.add_heading("8.1.1.1 煤矿典型事故案例VR教学系统", level=4)
|
||||
doc.add_heading("煤矿重大灾害VR虚拟体验与逃生系统", level=5)
|
||||
doc.save(path)
|
||||
|
||||
|
||||
def test_load_from_word_document_handles_deep_headings(tmp_path) -> None:
|
||||
doc_path = tmp_path / "deep.docx"
|
||||
_create_deep_heading_doc(doc_path)
|
||||
|
||||
node = InitConfigNode()
|
||||
chapters = node._load_from_word_document(str(doc_path))
|
||||
|
||||
assert len(chapters) == 5
|
||||
assert chapters[0]["id"] == "chapter_8"
|
||||
assert chapters[1]["id"] == "chapter_8_1"
|
||||
assert chapters[2]["parent_id"] == chapters[1]["id"]
|
||||
|
||||
deepest = chapters[-1]
|
||||
assert deepest["level"] == 5
|
||||
assert deepest["heading_number"] is None
|
||||
assert deepest["id"] == "chapter_8_1_1_1_a"
|
||||
assert deepest["parent_id"] == chapters[-2]["id"]
|
||||
|
||||
|
||||
def test_apply_metadata_to_chapters_enriches_requirements() -> None:
|
||||
node = InitConfigNode()
|
||||
title = "1.1 技术方案-基本要求 (3.0分)"
|
||||
normalized = node._normalize_heading_text(title)
|
||||
chapters = [
|
||||
{
|
||||
"id": "chapter_1_1",
|
||||
"title": title,
|
||||
"raw_heading": title,
|
||||
"normalized_title": normalized,
|
||||
"requirements": "",
|
||||
"score": 0,
|
||||
}
|
||||
]
|
||||
|
||||
metadata_lookup = {
|
||||
normalized: {
|
||||
"title": title,
|
||||
"score": 3.0,
|
||||
"category": "technical_solution",
|
||||
"requirements": "需覆盖总体架构、性能指标与技术栈选择。",
|
||||
"rubric_points": ["总体架构完整", "性能指标可量化"],
|
||||
"source": "analysis_result",
|
||||
"chapter_id_source": "tech_solution_01_01",
|
||||
}
|
||||
}
|
||||
|
||||
enriched = node._apply_metadata_to_chapters(chapters, metadata_lookup)
|
||||
|
||||
assert chapters[0]["requirements"].startswith("需覆盖")
|
||||
assert chapters[0]["score"] == 3.0
|
||||
assert enriched["chapter_1_1"]["rubric_points"][0] == "总体架构完整"
|
||||
56
tests/unit/test_prompt_planner.py
Normal file
56
tests/unit/test_prompt_planner.py
Normal file
@ -0,0 +1,56 @@
|
||||
from bidmaster.utils.prompt_planner import PromptPlanner
|
||||
|
||||
|
||||
def test_prompt_planner_uses_metadata_and_parent_context() -> None:
|
||||
parent = {"id": "chapter_1", "title": "1. 总体技术方案", "level": 1}
|
||||
child = {
|
||||
"id": "chapter_1_1",
|
||||
"title": "1.1 技术方案-基本要求 (3.0分)",
|
||||
"level": 2,
|
||||
"parent_id": "chapter_1",
|
||||
}
|
||||
|
||||
state = {
|
||||
"chapter_queue": [parent, child],
|
||||
"chapter_metadata": {
|
||||
"chapter_1_1": {
|
||||
"title": child["title"],
|
||||
"score": 3.0,
|
||||
"category": "technical_solution",
|
||||
"requirements": "方案需覆盖总体架构;提供性能指标;描述安全策略。",
|
||||
"rubric_points": ["总体架构完整", "性能指标量化"],
|
||||
}
|
||||
},
|
||||
"chapter_children_map": {"chapter_1": ["chapter_1_1"]},
|
||||
"generated_contents": {"chapter_1": "本章节描述总体设计与安全架构要点。"},
|
||||
"chapter_configs": {"chapter_1": {"emphasis": "突出安全策略"}},
|
||||
"expanded_configs": {"chapter_1": "chapter_1", "chapter_1_1": "chapter_1"},
|
||||
}
|
||||
|
||||
planner = PromptPlanner(state)
|
||||
spec = planner.build_prompt_spec(child)
|
||||
|
||||
assert "总体架构完整" in spec["rubric_points"]
|
||||
assert "父章节摘要" in spec["context_summary"]
|
||||
assert "突出安全策略" in spec["objectives"]
|
||||
|
||||
|
||||
def test_prompt_planner_fallback_without_metadata() -> None:
|
||||
root = {"id": "chapter_root", "title": "1. 服务方案", "level": 1}
|
||||
child = {
|
||||
"id": "chapter_child",
|
||||
"title": "1.1 现场服务体系",
|
||||
"level": 2,
|
||||
"parent_id": "chapter_root",
|
||||
}
|
||||
|
||||
state = {
|
||||
"chapter_queue": [root, child],
|
||||
"chapter_children_map": {"chapter_root": ["chapter_child"]},
|
||||
}
|
||||
|
||||
planner = PromptPlanner(state)
|
||||
spec = planner.build_prompt_spec(root)
|
||||
|
||||
assert "1.1 现场服务体系" in spec["requirements_summary"]
|
||||
assert "围绕章节主题" in spec["objectives"]
|
||||
Loading…
Reference in New Issue
Block a user