from pathlib import Path from docx import Document from bidmaster.utils.document_context import ( DocumentContextBuilder, DocumentContextSearcher, ) def _create_sample_doc(doc_path: Path) -> None: doc = Document() doc.add_heading("第一章 项目总体概述", level=1) doc.add_paragraph("本项目聚焦城市智慧照明系统建设,强调云边协同与多维感知能力。") doc.add_heading("第二章 建设目标", level=1) doc.add_paragraph("目标包括统一管控平台、智能终端、数据中台三大部分。") table = doc.add_table(rows=2, cols=2) table.rows[0].cells[0].text = "指标" table.rows[0].cells[1].text = "要求" table.rows[1].cells[0].text = "系统稳定性" table.rows[1].cells[1].text = "7x24小时无故障运行" doc.save(doc_path) def _dummy_embedding(texts): return [[float(len(text))] for text in texts] def test_document_context_builder_creates_chunks(tmp_path): doc_path = tmp_path / "context.docx" _create_sample_doc(doc_path) builder = DocumentContextBuilder( chunk_size=120, chunk_overlap=10, embedding_fn=_dummy_embedding, ) context = builder.build(str(doc_path)) assert not context.is_empty() assert all(chunk.embedding for chunk in context.chunks) assert any("项目总体概述" in chunk.section for chunk in context.chunks) def test_document_context_searcher_returns_matches(tmp_path): doc_path = tmp_path / "search.docx" _create_sample_doc(doc_path) builder = DocumentContextBuilder( chunk_size=80, chunk_overlap=10, embedding_fn=_dummy_embedding, ) context = builder.build(str(doc_path)) searcher = DocumentContextSearcher( context, embedding_fn=_dummy_embedding, top_k=2, ) matches = searcher.search("智慧照明平台") assert matches assert matches[0].score > 0 themes = searcher.summarize_themes() assert "第一章" in themes