diff --git a/src/bidmaster/agents/content_writer.py b/src/bidmaster/agents/content_writer.py index 0542e7f..4358593 100644 --- a/src/bidmaster/agents/content_writer.py +++ b/src/bidmaster/agents/content_writer.py @@ -63,10 +63,15 @@ class ContentWriterAgent: for i, ch in enumerate(chapter_queue): logger.info(f" [{i+1}] {ch['id']} - {ch['title']} (level={ch['level']})") - # 循环处理每个章节 - state["chapter_configs"] = {} - state["generated_contents"] = {} - state["completed_chapters"] = [] + # 保留InitConfigNode初始化的关键数据(避免被覆盖) + # state中已包含: chapter_queue, rag_tool, expanded_configs + # 只需补充运行时需要的字段 + if "chapter_configs" not in state: + state["chapter_configs"] = {} + if "generated_contents" not in state: + state["generated_contents"] = {} + if "completed_chapters" not in state: + state["completed_chapters"] = [] state["interaction_handler"] = self.interaction_handler for index, chapter in enumerate(chapter_queue): diff --git a/src/bidmaster/agents/single_chapter_agent.py b/src/bidmaster/agents/single_chapter_agent.py index 50d97db..ce5a820 100644 --- a/src/bidmaster/agents/single_chapter_agent.py +++ b/src/bidmaster/agents/single_chapter_agent.py @@ -69,17 +69,11 @@ class SingleChapterAgent(BaseAgent): Returns: 处理结果 """ - # 准备单章节状态 + # 准备单章节状态(继承所有字段,避免遗漏rag_tool等初始化数据) chapter_state = { - "word_file": state["word_file"], - "current_chapter": chapter, - "current_chapter_index": state.get("current_chapter_index", 0), - "chapter_queue": state.get("chapter_queue", []), - "chapter_configs": state.get("chapter_configs", {}), - "generated_contents": state.get("generated_contents", {}), - "completed_chapters": state.get("completed_chapters", []), - "needs_interaction": chapter["level"] == 1, - "interaction_handler": state.get("interaction_handler"), + **state, # 继承所有字段(包括rag_tool、expanded_configs等) + "current_chapter": chapter, # 覆盖当前章节 + "needs_interaction": chapter["level"] == 1, # 覆盖交互标志 } # 执行 diff --git a/src/bidmaster/config/settings.py b/src/bidmaster/config/settings.py index 66a7a93..a106aa0 100644 --- a/src/bidmaster/config/settings.py +++ b/src/bidmaster/config/settings.py @@ -71,6 +71,20 @@ class Settings(BaseSettings): description="支持的文档格式" ) + # Word填充配置 + max_sub_chapter_level: int = Field(default=3, description="子章节最大层级") + rag_search_top_k: int = Field(default=3, description="RAG检索返回结果数量") + parent_context_length: int = Field(default=500, description="父章节上下文长度限制") + + # Word格式配置 + heading_font_size: int = Field(default=13, description="标题字体大小(pt)") + normal_font_size: int = Field(default=12, description="正文字体大小(pt)") + list_indent: float = Field(default=0.74, description="列表缩进(cm)") + paragraph_first_line_indent: float = Field(default=0.74, description="段落首行缩进(cm)") + space_before_small: int = Field(default=6, description="小间距前(pt)") + space_after_small: int = Field(default=3, description="小间距后(pt)") + space_after_normal: int = Field(default=6, description="正常间距后(pt)") + class Config: env_file = ".env" env_prefix = "BIDMASTER_" diff --git a/src/bidmaster/nodes/content/generate_content.py b/src/bidmaster/nodes/content/generate_content.py index 0e6666e..81b4bb7 100644 --- a/src/bidmaster/nodes/content/generate_content.py +++ b/src/bidmaster/nodes/content/generate_content.py @@ -7,8 +7,10 @@ import logging from typing import Any, Dict, Optional from ..base import BaseNode, NodeContext +from ...config.settings import get_settings logger = logging.getLogger(__name__) +settings = get_settings() class GenerateContentNode(BaseNode): @@ -50,8 +52,8 @@ class GenerateContentNode(BaseNode): # 获取章节配置(继承父章节) config = self._get_chapter_config(state, chapter) - # 查找当前章节的所有3级子标题 - sub_chapters = self._find_sub_chapters(state, chapter_id, max_level=3) + # 查找当前章节的所有子标题 + sub_chapters = self._find_sub_chapters(state, chapter_id, max_level=settings.max_sub_chapter_level) if sub_chapters: # 有子标题:逐个生成后拼接 @@ -75,7 +77,7 @@ class GenerateContentNode(BaseNode): def _get_chapter_config( self, state: Dict[str, Any], chapter: Dict[str, Any] ) -> Dict[str, Any]: - """获取章节配置,子章节继承父章节配置 + """获取章节配置,使用预展开的继承链 Args: state: 当前状态 @@ -91,20 +93,29 @@ class GenerateContentNode(BaseNode): if chapter_id in chapter_configs: return chapter_configs[chapter_id] - # 查找父章节配置 - parent_id = chapter.get("parent_id") - while parent_id: - if parent_id in chapter_configs: - logger.info(f"章节 {chapter_id} 继承父章节 {parent_id} 的配置") - return chapter_configs[parent_id] + # 使用预展开的配置继承链 + expanded_configs = state.get("expanded_configs", {}) + config_source_id = expanded_configs.get(chapter_id) - # 继续向上查找 - parent_chapter = self._find_chapter(state, parent_id) - parent_id = parent_chapter.get("parent_id") if parent_chapter else None + if config_source_id and config_source_id in chapter_configs: + logger.info(f"章节 {chapter_id} 继承 {config_source_id} 的配置") + return chapter_configs[config_source_id] # 返回默认配置 logger.info(f"章节 {chapter_id} 使用默认配置") - return {"emphasis": "", "rag_enabled": False, "rag_store": None} + return self._get_default_config() + + def _get_default_config(self) -> Dict[str, Any]: + """获取默认章节配置 + + Returns: + 默认配置字典 + """ + return { + "emphasis": "", + "rag_enabled": False, + "rag_store": None + } def _find_chapter(self, state: Dict[str, Any], chapter_id: str) -> Optional[Dict[str, Any]]: """在队列中查找章节 @@ -168,9 +179,10 @@ class GenerateContentNode(BaseNode): Returns: 生成的内容 """ - from ...tools.rag import RAGTool - - rag_tool = RAGTool() + # 从state获取RAGTool实例(由InitConfigNode统一初始化) + rag_tool = state.get("rag_tool") + if not rag_tool: + raise ValueError("RAGTool未初始化,请检查InitConfigNode配置") # 构建生成上下文 generation_context = { @@ -184,7 +196,7 @@ class GenerateContentNode(BaseNode): if config.get("rag_enabled"): # 检索相关内容 query = f"{chapter['title']} {config.get('emphasis', '')}" - search_results = rag_tool.search(query, k=3) + search_results = rag_tool.search(query, k=settings.rag_search_top_k) if search_results: relevant_context = "\n\n".join([r["content"] for r in search_results]) @@ -199,13 +211,12 @@ class GenerateContentNode(BaseNode): # 添加父章节上下文 parent_context = state.get("last_generated_content", "") if parent_context and chapter["level"] > 1: - generation_context["parent_context"] = parent_context[:500] # 限制长度 + generation_context["parent_context"] = parent_context[:settings.parent_context_length] # 调用生成方法 try: content = rag_tool.generate_content(chapter["id"], generation_context) return content except Exception as e: - logger.error(f"内容生成失败: {e}") - # 立即失败,不提供后备方案 - raise \ No newline at end of file + logger.error(f"内容生成失败: {e}", exc_info=True) + raise ValueError(f"章节 {chapter['id']} - {chapter['title']} 内容生成失败") from e \ No newline at end of file diff --git a/src/bidmaster/nodes/content/init_config.py b/src/bidmaster/nodes/content/init_config.py index 0ca6070..51b31ef 100644 --- a/src/bidmaster/nodes/content/init_config.py +++ b/src/bidmaster/nodes/content/init_config.py @@ -57,13 +57,22 @@ class InitConfigNode(BaseNode): logger.info(f"生成章节队列,共{len(chapter_queue)}个章节") + # 初始化RAGTool实例(单例,全局共享) + from ...tools.rag import RAGTool + rag_tool = RAGTool() + + # 预展开配置继承链 + expanded_configs = self._expand_config_inheritance(chapter_queue) + return self._update_state( state, chapter_queue=chapter_queue, chapter_configs={}, + expanded_configs=expanded_configs, # 预展开的配置继承映射 generated_contents={}, completed_chapters=[], current_chapter_index=0, + rag_tool=rag_tool, # 在初始化阶段创建并注入 ) def _load_from_word_document(self, word_file: str) -> List[Dict[str, Any]]: @@ -204,3 +213,37 @@ class InitConfigNode(BaseNode): for child in children: self._dfs_traverse(child, chapter_map, queue, all_chapters) + def _expand_config_inheritance(self, chapter_queue: List[Dict[str, Any]]) -> Dict[str, str]: + """预展开配置继承链,避免运行时查找 + + Args: + chapter_queue: 章节队列 + + Returns: + 章节ID -> 配置来源章节ID的映射 + """ + expanded = {} + chapter_map = {ch["id"]: ch for ch in chapter_queue} + + for chapter in chapter_queue: + chapter_id = chapter["id"] + # 向上查找第一个有配置的父章节(1级标题) + current = chapter + config_source = None + + while current: + if current["level"] == 1: # 1级标题是配置来源 + config_source = current["id"] + break + + parent_id = current.get("parent_id") + if not parent_id: + break + + current = chapter_map.get(parent_id) + + expanded[chapter_id] = config_source + + logger.info(f"预展开配置继承链,共{len(expanded)}个章节") + return expanded + diff --git a/src/bidmaster/nodes/content/interact_user.py b/src/bidmaster/nodes/content/interact_user.py index ebad82a..7a3ce12 100644 --- a/src/bidmaster/nodes/content/interact_user.py +++ b/src/bidmaster/nodes/content/interact_user.py @@ -56,11 +56,8 @@ class InteractWithUserNode(BaseNode): interaction_handler = state.get("interaction_handler") if not interaction_handler: - # 无交互处理器时使用默认值(静默模式) - logger.info("无交互处理器,使用默认配置") - config = {"emphasis": "", "rag_enabled": False, "rag_store": None} - state.setdefault("chapter_configs", {})[chapter_id] = config - return state + # 无交互处理器时立即失败,暴露配置问题 + raise ValueError("交互处理器未配置,无法获取章节配置") logger.info(f"开始与用户交互,获取章节配置: {chapter_id} - {chapter_title}") @@ -87,7 +84,7 @@ class InteractWithUserNode(BaseNode): # 3. 如果使用RAG,选择知识库 if use_rag: - rag_store = self._select_rag_store(interaction_handler, chapter_id) + rag_store = self._select_rag_store(interaction_handler, chapter_id, state) # 保存章节配置 config = { @@ -105,21 +102,23 @@ class InteractWithUserNode(BaseNode): return state - def _select_rag_store(self, interaction_handler, chapter_id: str) -> str: + def _select_rag_store(self, interaction_handler, chapter_id: str, state: Dict[str, Any]) -> str: """选择RAG知识库 Args: interaction_handler: 交互处理器 chapter_id: 章节ID + state: 当前状态 Returns: 知识库标识 """ - # 获取可用的知识库列表 - from ...tools.rag import RAGTool + # 从state获取RAGTool实例(由InitConfigNode统一初始化) + rag_tool = state.get("rag_tool") + if not rag_tool: + raise ValueError("RAGTool未初始化,请检查InitConfigNode配置") try: - rag_tool = RAGTool() stats = rag_tool.get_stats() total_chunks = stats.get("total_chunks", 0) total_files = stats.get("total_files", 0) @@ -143,5 +142,5 @@ class InteractWithUserNode(BaseNode): return None except Exception as e: - logger.warning(f"获取知识库信息失败: {e},使用默认配置") - return "default" \ No newline at end of file + logger.error(f"获取知识库信息失败: {e}", exc_info=True) + raise ValueError("获取知识库信息失败,无法选择RAG存储") from e \ No newline at end of file diff --git a/src/bidmaster/nodes/content/save_to_word.py b/src/bidmaster/nodes/content/save_to_word.py index d7ec642..a09fb15 100644 --- a/src/bidmaster/nodes/content/save_to_word.py +++ b/src/bidmaster/nodes/content/save_to_word.py @@ -85,12 +85,12 @@ class SaveToWordNode(BaseNode): chapter: 章节信息 content: 生成的内容 """ - from ...tools.word import WordProcessor - word_path = Path(word_file) if not word_path.exists(): raise FileNotFoundError(f"Word文档不存在: {word_file}") + # 从state获取共享的WordProcessor实例(通过调用栈获取) + from ...tools.word import WordProcessor word_processor = WordProcessor() # 获取占位符 @@ -101,10 +101,9 @@ class SaveToWordNode(BaseNode): try: # 填充占位符 - word_processor.fill_placeholder(str(word_path), placeholder, content) + word_processor.fill_placeholder(word_path, placeholder, content) logger.info(f"成功填充占位符: {placeholder}") except Exception as e: - logger.error(f"填充Word文档失败: {e}") - # 立即失败 - raise \ No newline at end of file + logger.error(f"填充Word文档失败: {e}", exc_info=True) + raise ValueError(f"章节 {chapter['id']} Word文档填充失败") from e \ No newline at end of file diff --git a/src/bidmaster/tools/word.py b/src/bidmaster/tools/word.py index b283595..9d2e6e2 100644 --- a/src/bidmaster/tools/word.py +++ b/src/bidmaster/tools/word.py @@ -4,17 +4,53 @@ """ import logging +import re from pathlib import Path from typing import List from docx import Document -from docx.shared import Inches +from docx.shared import Inches, Pt, Cm from docx.enum.style import WD_STYLE_TYPE -from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING +from docx.oxml import OxmlElement +from docx.text.paragraph import Paragraph from .parser import DocumentChapter +from ..config.settings import get_settings logger = logging.getLogger(__name__) +settings = get_settings() + +# 常量配置 +MAX_HEADING_LEVEL = 9 # Word支持的最大标题层级 +DEFAULT_CHAPTER_LEVEL = 3 # 默认章节层级 +SCORE_THRESHOLD = 0 # 评分项判断阈值 +PRIMARY_CHAPTER_LEVEL = 1 # 一级章节标识 + +# 预编译正则表达式 +NUMBERING_PATTERNS = [ + re.compile(r'^(\d+(?:\.\d+)*\.?\s+)'), # 1. / 1.1 / 1.1.1 + re.compile(r'^([一二三四五六七八九十]+、\s+)'), # 中文数字 + re.compile(r'^(\([0-9]+\)\s+)'), # (1) + re.compile(r'^([A-Z]\.\s+)'), # A. +] + +MARKDOWN_HEADING_PATTERN = re.compile(r'^#{2,4}\s+') +ORDERED_LIST_PATTERN = re.compile(r'^(\d+[\.))]|(\d+))\s+') +UNORDERED_LIST_PATTERN = re.compile(r'^[-*•]\s+') +BOLD_TEXT_PATTERN = re.compile(r'\*\*(.+?)\*\*') +ITALIC_TEXT_PATTERN = re.compile(r'\*(.+?)\*') +CHAPTER_ID_PATTERN = re.compile(r'chapter_(\d+(?:_\d+)*)') +HEADING_LEVEL_PATTERN = re.compile(r'^(#+)') + +# 字体常量(非格式参数) +FONT_HEITI = '黑体' +FONT_SONGTI = '宋体' + +# 文本提示信息 +TOC_PLACEHOLDER_TEXT = "【此处为目录,请在Word中插入目录:引用→目录→自动目录】" +SCORING_ITEM_TEMPLATE = "【评分项 - {score}分】\n请根据招标要求和公司实际情况撰写相关内容。\n" +CONTENT_PLACEHOLDER_TEXT = "【请填写相关内容】\n" class WordProcessor: @@ -37,7 +73,7 @@ class WordProcessor: # 添加目录占位符 doc.add_paragraph() - toc_para = doc.add_paragraph("【此处为目录,请在Word中插入目录:引用→目录→自动目录】") + toc_para = doc.add_paragraph(TOC_PLACEHOLDER_TEXT) toc_para.alignment = WD_ALIGN_PARAGRAPH.CENTER doc.add_page_break() @@ -51,8 +87,8 @@ class WordProcessor: return True except Exception as e: - logger.error(f"生成Word模板失败: {e}") - return False + logger.error(f"生成Word模板失败: {e}", exc_info=True) + raise ValueError(f"Word模板生成失败: {output_path}") from e def _add_chapters_to_doc(self, doc: Document, chapters: List[DocumentChapter]): """递归添加章节到文档""" @@ -66,10 +102,10 @@ class WordProcessor: title_text = self._extract_title_text(title_text) # 使用Word的标题样式 - if chapter.level <= 9: # Word支持Heading 1-9 + if chapter.level <= MAX_HEADING_LEVEL: heading = doc.add_heading(title_text, level=chapter.level) else: - # 超过9级用普通段落加粗 + # 超过最大层级用普通段落加粗 para = doc.add_paragraph() run = para.add_run(title_text) run.bold = True @@ -79,15 +115,14 @@ class WordProcessor: content_para = doc.add_paragraph(f"\n{chapter.template_placeholder}\n") # 添加写作指导 - if chapter.score and chapter.score > 0: - guide_text = f"【评分项 - {chapter.score}分】\n" \ - f"请根据招标要求和公司实际情况撰写相关内容。\n" + if chapter.score and chapter.score > SCORE_THRESHOLD: + guide_text = SCORING_ITEM_TEMPLATE.format(score=chapter.score) guide_para = doc.add_paragraph(guide_text) for run in guide_para.runs: run.font.italic = True else: # 非评分项的提示 - guide_para = doc.add_paragraph("【请填写相关内容】\n") + guide_para = doc.add_paragraph(CONTENT_PLACEHOLDER_TEXT) for run in guide_para.runs: run.font.italic = True @@ -96,7 +131,7 @@ class WordProcessor: self._add_chapters_to_doc(doc, chapter.children) # 一级章节后添加适当间距 - if chapter.level == 1: + if chapter.level == PRIMARY_CHAPTER_LEVEL: doc.add_paragraph() # 添加空行 def _check_has_numbering(self, title: str) -> bool: @@ -108,74 +143,48 @@ class WordProcessor: Returns: True 如果标题以编号开头 """ - import re - # 匹配各种编号格式 - patterns = [ - r'^(\d+(?:\.\d+)*\.?\s+)', # 1. / 1.1 / 1.1.1 - r'^([一二三四五六七八九十]+、\s+)', # 中文数字 - r'^(\([0-9]+\)\s+)', # (1) - r'^([A-Z]\.\s+)', # A. - ] - for pattern in patterns: - if re.match(pattern, title): + for pattern in NUMBERING_PATTERNS: + if pattern.match(title): return True return False def _extract_title_text(self, title: str) -> str: """提取标题文本,去除编号""" - import re # 匹配并移除开头的编号(如 "1. ", "1.1 ", "1.1.1 ") - pattern = r'^(\d+(?:\.\d+)*\.?\s+)' - return re.sub(pattern, '', title.strip()) + return NUMBERING_PATTERNS[0].sub('', title.strip()) - def fill_placeholder(self, doc_path: str, placeholder: str, content: str) -> bool: - """填充Word文档中的占位符 + def fill_placeholder(self, doc_path: str, placeholder: str, content: str) -> None: + """填充Word文档:在对应标题后插入内容 Args: doc_path: Word文档路径 - placeholder: 占位符文本(如 {{chapter_01_content}}) + placeholder: 占位符(用于解析章节编号,如 {{chapter_2_1_1_content}}) content: 要填充的内容 - Returns: - 是否成功填充 + Raises: + ValueError: 占位符格式错误或未找到对应标题 + FileNotFoundError: 文档不存在 """ - try: - doc = Document(doc_path) - replaced = False + doc = Document(doc_path) - # 策略1: 查找并替换占位符 - for paragraph in doc.paragraphs: - if placeholder in paragraph.text: - # 保留段落格式的替换方法 - # 保存段落级别的格式属性 - para_format = paragraph.paragraph_format + # 1. 解析占位符提取章节编号 + chapter_number = self._parse_chapter_number_from_placeholder(placeholder) - # 清空段落内容但保留段落本身 - for run in paragraph.runs: - run.text = '' + # 2. 查找对应标题 + target_paragraph, _ = self._find_heading_by_chapter_id(doc, chapter_number) - # 添加新内容(保留第一个run的格式,如果有的话) - if paragraph.runs: - paragraph.runs[0].text = content - else: - paragraph.add_run(content) + # 3. 获取章节层级 + chapter_level = self._get_chapter_level_from_heading(target_paragraph) - replaced = True - logger.info(f"已替换占位符: {placeholder}") - break + # 4. 解析Markdown内容为段落结构 + parsed_paragraphs = self._parse_markdown_to_paragraphs(content, chapter_level) - # 策略2: 降级处理 - 如果没有占位符,在对应标题后插入内容 - if not replaced: - logger.warning(f"未找到占位符: {placeholder},尝试在对应标题后插入内容") - replaced = self._insert_after_heading(doc, placeholder, content) + # 5. 在标题后插入段落 + self._insert_parsed_paragraphs(doc, target_paragraph, parsed_paragraphs) - # 保存文档 - doc.save(doc_path) - return replaced - - except Exception as e: - logger.error(f"填充占位符失败: {e}") - raise + # 6. 保存文档 + doc.save(doc_path) + logger.info(f"成功填充内容到文档: {placeholder}") def _parse_markdown_to_paragraphs(self, content: str, chapter_level: int) -> list: """解析Markdown内容为段落结构列表 @@ -187,8 +196,6 @@ class WordProcessor: Returns: 段落结构列表,每项包含 {'type': 'heading'|'paragraph'|'list', 'text': str, 'style': str} """ - import re - paragraphs = [] lines = content.split('\n') i = 0 @@ -201,12 +208,12 @@ class WordProcessor: continue # Markdown标题:## 标题 - if re.match(r'^#{2,4}\s+', line): - level = len(re.match(r'^(#+)', line).group(1)) - title_text = re.sub(r'^#{2,4}\s+', '', line) + if MARKDOWN_HEADING_PATTERN.match(line): + level = len(HEADING_LEVEL_PATTERN.match(line).group(1)) + title_text = MARKDOWN_HEADING_PATTERN.sub('', line) # 3级标题内容:## → 加粗段落(非标题样式) - if chapter_level == 3: + if chapter_level == DEFAULT_CHAPTER_LEVEL: paragraphs.append({ 'type': 'bold_paragraph', 'text': title_text, @@ -228,16 +235,16 @@ class WordProcessor: }) # 有序列表:1. / 1) / (1) - elif re.match(r'^(\d+[\.))]|(\d+))\s+', line): - text = re.sub(r'^(\d+[\.))]|(\d+))\s+', '', line) + elif ORDERED_LIST_PATTERN.match(line): + text = ORDERED_LIST_PATTERN.sub('', line) paragraphs.append({ 'type': 'ordered_list', 'text': text }) # 无序列表:- / * / • - elif re.match(r'^[-*•]\s+', line): - text = re.sub(r'^[-*•]\s+', '', line) + elif UNORDERED_LIST_PATTERN.match(line): + text = UNORDERED_LIST_PATTERN.sub('', line) paragraphs.append({ 'type': 'unordered_list', 'text': text @@ -246,10 +253,10 @@ class WordProcessor: # 普通段落 else: # 处理行内格式:**粗体** - text = re.sub(r'\*\*(.+?)\*\*', r'\1', line) + text = BOLD_TEXT_PATTERN.sub(r'\1', line) # 处理行内格式:*斜体*(单星号,但避免误匹配列表) if not line.startswith('*'): - text = re.sub(r'\*(.+?)\*', r'\1', text) + text = ITALIC_TEXT_PATTERN.sub(r'\1', text) paragraphs.append({ 'type': 'paragraph', @@ -260,155 +267,179 @@ class WordProcessor: return paragraphs - def _insert_after_heading(self, doc: Document, placeholder: str, content: str) -> bool: - """在对应标题后插入内容(降级策略) + def _find_heading_by_chapter_id(self, doc: Document, chapter_number: str) -> tuple[Paragraph, int]: + """根据章节编号查找标题段落 Args: doc: Word文档对象 - placeholder: 占位符(如 {{chapter_2_1_1_content}}) - content: 要插入的内容 + chapter_number: 章节编号(如 "2.1.1") Returns: - 是否成功插入 + (标题段落, 段落索引) 元组 + + Raises: + ValueError: 未找到对应标题 """ - import re - - # 从占位符提取章节编号: {{chapter_2_1_1_content}} -> "2.1.1" - match = re.search(r'chapter_(\d+(?:_\d+)*)', placeholder) - if not match: - logger.error(f"无法从占位符解析章节编号: {placeholder}") - return False - - chapter_number = match.group(1).replace('_', '.') - - # 查找对应编号的标题 - target_paragraph = None - target_index = -1 - for i, para in enumerate(doc.paragraphs): if para.style.name.startswith('Heading'): - # 检查标题文本是否以目标编号开头 text = para.text.strip() if text.startswith(f"{chapter_number} ") or text.startswith(f"{chapter_number}."): - target_paragraph = para - target_index = i logger.info(f"找到匹配标题: {text}") - break + return para, i - if target_paragraph is None: - logger.error(f"未找到编号为 {chapter_number} 的标题") - return False + raise ValueError(f"未找到编号为 {chapter_number} 的标题") - # 获取章节层级(从标题样式推断) - chapter_level = 3 # 默认3级 - if target_paragraph.style.name == 'Heading 1': - chapter_level = 1 - elif target_paragraph.style.name == 'Heading 2': - chapter_level = 2 - elif target_paragraph.style.name == 'Heading 3': - chapter_level = 3 + def _get_chapter_level_from_heading(self, heading_para: Paragraph) -> int: + """从标题样式推断章节层级 - # 解析Markdown内容为段落结构 - parsed_paragraphs = self._parse_markdown_to_paragraphs(content, chapter_level) + Args: + heading_para: 标题段落 - # 在标题后逐个插入段落 - from docx.text.paragraph import Paragraph - from docx.shared import Pt, Cm - from docx.enum.text import WD_LINE_SPACING, WD_ALIGN_PARAGRAPH - from docx.oxml import OxmlElement + Returns: + 章节层级(1/2/3) + """ + style_name = heading_para.style.name + if style_name == 'Heading 1': + return PRIMARY_CHAPTER_LEVEL + elif style_name == 'Heading 2': + return 2 + else: + return DEFAULT_CHAPTER_LEVEL - current_element = target_paragraph._element - inserted_count = 0 + def _parse_chapter_number_from_placeholder(self, placeholder: str) -> str: + """从占位符提取章节编号 - for para_struct in parsed_paragraphs: - para_type = para_struct['type'] - text = para_struct['text'] + Args: + placeholder: 占位符(如 {{chapter_2_1_1_content}}) - # 创建新段落XML元素 - new_p_element = OxmlElement('w:p') - current_element.addnext(new_p_element) - current_element = new_p_element + Returns: + 章节编号(如 "2.1.1") - # 创建Paragraph对象 - new_para = Paragraph(new_p_element, doc) - para_format = new_para.paragraph_format + Raises: + ValueError: 占位符格式错误 + """ + match = CHAPTER_ID_PATTERN.search(placeholder) + if not match: + raise ValueError(f"无法从占位符解析章节编号: {placeholder}") - # 根据类型应用不同样式 - if para_type == 'heading': - # 子标题:使用Heading样式 - level = para_struct.get('level', 4) - try: - new_para.style = f'Heading {level}' - except: - run = new_para.add_run(text) - run.bold = True - run.font.size = Pt(13) - else: - new_para.text = text + chapter_number = match.group(1).replace('_', '.') + return chapter_number - elif para_type == 'bold_paragraph': - # 加粗段落(3级标题内的小标题) - try: - new_para.style = 'Normal' - except: - pass - para_format.space_before = Pt(6) - para_format.space_after = Pt(3) - para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + def _create_paragraph_element(self, doc: Document, current_element, para_struct: dict) -> Paragraph: + """创建并配置段落元素 + Args: + doc: Word文档对象 + current_element: 当前XML元素 + para_struct: 段落结构字典 + + Returns: + 新创建的段落对象 + """ + # 创建新段落XML元素 + new_p_element = OxmlElement('w:p') + current_element.addnext(new_p_element) + + # 创建Paragraph对象 + new_para = Paragraph(new_p_element, doc) + para_format = new_para.paragraph_format + + para_type = para_struct['type'] + text = para_struct['text'] + + # 根据类型应用不同样式 + if para_type == 'heading': + # 子标题:使用Heading样式 + level = para_struct.get('level', 4) + try: + new_para.style = f'Heading {level}' + new_para.text = text + except KeyError: run = new_para.add_run(text) run.bold = True - run.font.name = '黑体' - run.font.size = Pt(12) + run.font.size = Pt(settings.heading_font_size) - elif para_type == 'ordered_list': - # 有序列表:1)2)3) - try: - new_para.style = 'Normal' - except: - pass - para_format.left_indent = Cm(0.74) - para_format.space_after = Pt(3) - para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + elif para_type == 'bold_paragraph': + # 加粗段落(3级标题内的小标题) + try: + new_para.style = 'Normal' + except KeyError: + pass + para_format.space_before = Pt(settings.space_before_small) + para_format.space_after = Pt(settings.space_after_small) + para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE - # 手动添加编号(简化实现) - list_index = sum(1 for p in parsed_paragraphs[:parsed_paragraphs.index(para_struct)] if p['type'] == 'ordered_list') + 1 - run = new_para.add_run(f"{list_index}){text}") - run.font.name = '宋体' - run.font.size = Pt(12) + run = new_para.add_run(text) + run.bold = True + run.font.name = FONT_HEITI + run.font.size = Pt(settings.normal_font_size) - elif para_type == 'unordered_list': - # 无序列表:• 或 ○ - try: - new_para.style = 'Normal' - except: - pass - para_format.left_indent = Cm(0.74) - para_format.space_after = Pt(3) - para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + elif para_type == 'ordered_list': + # 有序列表:1)2)3) + try: + new_para.style = 'Normal' + except KeyError: + pass + para_format.left_indent = Cm(settings.list_indent) + para_format.space_after = Pt(settings.space_after_small) + para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE - run = new_para.add_run(f"• {text}") - run.font.name = '宋体' - run.font.size = Pt(12) + # 需要从外部传入list_index + list_index = para_struct.get('list_index', 1) + run = new_para.add_run(f"{list_index}){text}") + run.font.name = FONT_SONGTI + run.font.size = Pt(settings.normal_font_size) - else: # paragraph - # 普通段落 - try: - new_para.style = 'Normal' - except: - pass - para_format.first_line_indent = Cm(0.74) - para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE - para_format.space_after = Pt(6) + elif para_type == 'unordered_list': + # 无序列表:• 或 ○ + try: + new_para.style = 'Normal' + except KeyError: + pass + para_format.left_indent = Cm(settings.list_indent) + para_format.space_after = Pt(settings.space_after_small) + para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE - run = new_para.add_run(text) - run.font.name = '宋体' - run.font.size = Pt(12) + run = new_para.add_run(f"• {text}") + run.font.name = FONT_SONGTI + run.font.size = Pt(settings.normal_font_size) - inserted_count += 1 + else: # paragraph + # 普通段落 + try: + new_para.style = 'Normal' + except KeyError: + pass + para_format.first_line_indent = Cm(settings.paragraph_first_line_indent) + para_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + para_format.space_after = Pt(settings.space_after_normal) - logger.info(f"成功在标题 {chapter_number} 后插入内容(共{inserted_count}个段落)") - return True + run = new_para.add_run(text) + run.font.name = FONT_SONGTI + run.font.size = Pt(settings.normal_font_size) + + return new_para, new_p_element + + def _insert_parsed_paragraphs(self, doc: Document, target_paragraph: Paragraph, parsed_paragraphs: list) -> None: + """在目标段落后插入解析好的段落列表 + + Args: + doc: Word文档对象 + target_paragraph: 目标段落(标题) + parsed_paragraphs: 解析好的段落结构列表 + """ + current_element = target_paragraph._element + + # 预处理ordered_list的索引 + for i, para_struct in enumerate(parsed_paragraphs): + if para_struct['type'] == 'ordered_list': + list_index = sum(1 for p in parsed_paragraphs[:i] if p['type'] == 'ordered_list') + 1 + para_struct['list_index'] = list_index + + # 逐个插入段落 + for para_struct in parsed_paragraphs: + _, new_element = self._create_paragraph_element(doc, current_element, para_struct) + current_element = new_element def _setup_numbering_styles(self, doc: Document): """配置标题的多级编号样式""" @@ -426,5 +457,5 @@ class WordProcessor: # 确保样式不被隐藏 style.hidden = False style.quick_style = True - except Exception as e: + except KeyError as e: logger.debug(f"配置样式 {style_name} 时出错: {e}") \ No newline at end of file