bidmaster-cli/tests/unit/test_document_context.py

71 lines
2.0 KiB
Python

from pathlib import Path
from docx import Document
from bidmaster.utils.document_context import (
DocumentContextBuilder,
DocumentContextSearcher,
)
def _create_sample_doc(doc_path: Path) -> None:
doc = Document()
doc.add_heading("第一章 项目总体概述", level=1)
doc.add_paragraph("本项目聚焦城市智慧照明系统建设,强调云边协同与多维感知能力。")
doc.add_heading("第二章 建设目标", level=1)
doc.add_paragraph("目标包括统一管控平台、智能终端、数据中台三大部分。")
table = doc.add_table(rows=2, cols=2)
table.rows[0].cells[0].text = "指标"
table.rows[0].cells[1].text = "要求"
table.rows[1].cells[0].text = "系统稳定性"
table.rows[1].cells[1].text = "7x24小时无故障运行"
doc.save(doc_path)
def _dummy_embedding(texts):
return [[float(len(text))] for text in texts]
def test_document_context_builder_creates_chunks(tmp_path):
doc_path = tmp_path / "context.docx"
_create_sample_doc(doc_path)
builder = DocumentContextBuilder(
chunk_size=120,
chunk_overlap=10,
embedding_fn=_dummy_embedding,
)
context = builder.build(str(doc_path))
assert not context.is_empty()
assert all(chunk.embedding for chunk in context.chunks)
assert any("项目总体概述" in chunk.section for chunk in context.chunks)
def test_document_context_searcher_returns_matches(tmp_path):
doc_path = tmp_path / "search.docx"
_create_sample_doc(doc_path)
builder = DocumentContextBuilder(
chunk_size=80,
chunk_overlap=10,
embedding_fn=_dummy_embedding,
)
context = builder.build(str(doc_path))
searcher = DocumentContextSearcher(
context,
embedding_fn=_dummy_embedding,
top_k=2,
)
matches = searcher.search("智慧照明平台")
assert matches
assert matches[0].score > 0
themes = searcher.summarize_themes()
assert "第一章" in themes