Word 填充后新增 Markdown 清理节点,修复漏掉标题的bug
This commit is contained in:
parent
91a2050b9f
commit
4a86081ade
8
check_headings.py
Normal file
8
check_headings.py
Normal file
@ -0,0 +1,8 @@
|
||||
from docx import Document
|
||||
doc = Document('mianyang/mianyang.docx')
|
||||
for i, para in enumerate(doc.paragraphs):
|
||||
style_name = getattr(para.style, 'name', '') or ''
|
||||
if style_name.startswith('Heading'):
|
||||
text = para.text.strip()
|
||||
if text:
|
||||
print(f'[{i}] {style_name}: {repr(text[:80])}')
|
||||
@ -116,6 +116,7 @@ class InitConfigNode(BaseNode):
|
||||
chapters: List[Dict[str, Any]] = []
|
||||
parent_stack: List[Dict[str, Any]] = []
|
||||
level_counters: defaultdict[int, int] = defaultdict(int)
|
||||
seen_ids: Dict[str, int] = {} # 记录已使用的ID及其出现次数
|
||||
|
||||
for para in doc.paragraphs:
|
||||
style_name = getattr(para.style, 'name', '') or ''
|
||||
@ -153,7 +154,19 @@ class InitConfigNode(BaseNode):
|
||||
segment = self._format_unlabeled_segment(level_counters[level])
|
||||
id_path = [*parent_path, segment] if parent_path else [segment]
|
||||
|
||||
chapter_id = f"chapter_{'_'.join(id_path)}"
|
||||
base_chapter_id = f"chapter_{'_'.join(id_path)}"
|
||||
|
||||
# 检测重复ID并添加后缀
|
||||
if base_chapter_id in seen_ids:
|
||||
seen_ids[base_chapter_id] += 1
|
||||
chapter_id = f"{base_chapter_id}_{seen_ids[base_chapter_id]}"
|
||||
logger.warning(
|
||||
"检测到重复章节ID '%s',自动重命名为 '%s' (标题: %s)",
|
||||
base_chapter_id, chapter_id, title
|
||||
)
|
||||
else:
|
||||
seen_ids[base_chapter_id] = 1
|
||||
chapter_id = base_chapter_id
|
||||
|
||||
chapter_info = {
|
||||
"id": chapter_id,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user