From 4a86081ade0d54d543cee86946167cf4504d7d18 Mon Sep 17 00:00:00 2001 From: sladro Date: Mon, 29 Dec 2025 18:21:57 +0800 Subject: [PATCH] =?UTF-8?q?Word=20=E5=A1=AB=E5=85=85=E5=90=8E=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=20Markdown=20=E6=B8=85=E7=90=86=E8=8A=82=E7=82=B9?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8D=E6=BC=8F=E6=8E=89=E6=A0=87=E9=A2=98?= =?UTF-8?q?=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check_headings.py | 8 ++++++++ src/bidmaster/nodes/content/init_config.py | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 check_headings.py diff --git a/check_headings.py b/check_headings.py new file mode 100644 index 0000000..a4aae2a --- /dev/null +++ b/check_headings.py @@ -0,0 +1,8 @@ +from docx import Document +doc = Document('mianyang/mianyang.docx') +for i, para in enumerate(doc.paragraphs): + style_name = getattr(para.style, 'name', '') or '' + if style_name.startswith('Heading'): + text = para.text.strip() + if text: + print(f'[{i}] {style_name}: {repr(text[:80])}') diff --git a/src/bidmaster/nodes/content/init_config.py b/src/bidmaster/nodes/content/init_config.py index 885fd56..3889734 100644 --- a/src/bidmaster/nodes/content/init_config.py +++ b/src/bidmaster/nodes/content/init_config.py @@ -116,6 +116,7 @@ class InitConfigNode(BaseNode): chapters: List[Dict[str, Any]] = [] parent_stack: List[Dict[str, Any]] = [] level_counters: defaultdict[int, int] = defaultdict(int) + seen_ids: Dict[str, int] = {} # 记录已使用的ID及其出现次数 for para in doc.paragraphs: style_name = getattr(para.style, 'name', '') or '' @@ -153,7 +154,19 @@ class InitConfigNode(BaseNode): segment = self._format_unlabeled_segment(level_counters[level]) id_path = [*parent_path, segment] if parent_path else [segment] - chapter_id = f"chapter_{'_'.join(id_path)}" + base_chapter_id = f"chapter_{'_'.join(id_path)}" + + # 检测重复ID并添加后缀 + if base_chapter_id in seen_ids: + seen_ids[base_chapter_id] += 1 + chapter_id = f"{base_chapter_id}_{seen_ids[base_chapter_id]}" + logger.warning( + "检测到重复章节ID '%s',自动重命名为 '%s' (标题: %s)", + base_chapter_id, chapter_id, title + ) + else: + seen_ids[base_chapter_id] = 1 + chapter_id = base_chapter_id chapter_info = { "id": chapter_id,