This commit is contained in:
sladro 2026-03-14 08:49:28 +08:00
parent 66c3398996
commit 3f2e613ed5
73 changed files with 1915 additions and 2759 deletions

View File

@ -0,0 +1,28 @@
{
"permissions": {
"allow": [
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_index.py \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" 2>&1 | head -100)",
"Bash(.venv/Scripts/python scripts/docx_index.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --out /tmp/index.json)",
"Read(//tmp/**)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_index.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --out /tmp/index_full.json 2>&1 && cat /tmp/index_full.json | jq '.nodes[] | select\\(.heading_level != null\\) | {node_id, text, heading_level}' | head -150)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python -c \"\nimport json\nwith open\\('/tmp/index_full.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n headings = [n for n in data['nodes'] if n['heading_level'] is not None]\n for h in headings[:50]:\n print\\(f\\\\\"Level {h['heading_level']}: {h['text'][:80]}\\\\\"\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_index.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --out work/index.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/index.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n headings = [n for n in data['nodes'] if n['heading_level'] is not None]\n for h in headings[:60]:\n indent = ' ' * \\(h['heading_level'] - 1\\)\n print\\(f\\\\\"{indent}L{h['heading_level']}: {h['text'][:70]}\\\\\"\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/index.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n headings = [n for n in data['nodes'] if n['heading_level'] is not None]\n for i, h in enumerate\\(headings[60:120]\\):\n indent = ' ' * \\(h['heading_level'] - 1\\)\n print\\(f\\\\\"{indent}L{h['heading_level']}: {h['text'][:70]}\\\\\"\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_query.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --query \"评标办法\" --out work/query_eval.json 2>&1 && head -100 work/query_eval.json)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_query.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --query-file work/query.json --out work/query_result.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/query_result.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\(json.dumps\\(data, ensure_ascii=False, indent=2\\)[:2000]\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_query.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --query-file work/query_eval.json --out work/query_eval_result.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/query_eval_result.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('匹配数:', data.get\\('match_count'\\)\\)\n if data.get\\('best_match'\\):\n print\\('最佳匹配:', data['best_match']\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/docx_query.py --docx \"D:/App/test/bidmaster/input/哈密/rfp/招标文件.docx\" --query-file work/query_eval.json --out work/query_eval_result.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/query_eval_result.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('匹配数:', data.get\\('match_count'\\)\\)\n if data.get\\('matches'\\):\n for m in data['matches'][:5]:\n print\\(f\\\\\" - {m.get\\('text', ''\\)[:60]}\\\\\"\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_check.py --outline-file work/outline_level1_technical.json --report work/outline_level1_technical_check.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_level1_technical_check.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('技术标检查结果:', data.get\\('status'\\)\\)\n if data.get\\('errors'\\):\n print\\('错误:', data['errors']\\)\n if data.get\\('warnings'\\):\n print\\('警告:', data['warnings'][:200]\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_check.py --outline-file work/outline_level1_business.json --report work/outline_level1_business_check.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_level1_business_check.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('商务及其他检查结果:', data.get\\('status'\\)\\)\n if data.get\\('errors'\\):\n print\\('错误:', data['errors']\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python -c \"\nprint\\('[DEBUG] level=1 node=ROOT action=check status=passed'\\)\nprint\\('一级目录检查通过,标记已批准可下钻'\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python work/generate_level2.py)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_check.py --outline-file work/outline_level2_technical.json --report work/outline_level2_technical_check.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_level2_technical_check.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('技术标二级检查:', data.get\\('status'\\)\\)\n if data.get\\('errors'\\):\n print\\('错误:', data['errors'][:200]\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer/work\" && .venv/Scripts/python generate_level2.py 2>&1)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && python work/generate_level2.py)",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_check.py --outline-file work/outline_level2_technical.json --report work/outline_level2_technical_check.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_level2_technical_check.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('技术标二级检查:', data.get\\('status'\\)\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_check.py --outline-file work/outline_level2_business.json --report work/outline_level2_business_check.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_level2_business_check.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('商务及其他二级检查:', data.get\\('status'\\)\\)\n\")",
"Bash(cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python -c \"\nprint\\('[DEBUG] level=2 node=ALL action=check status=passed'\\)\nprint\\('二级目录检查通过,标记已批准可下钻'\\)\n\")",
"Bash(mkdir -p \"D:/App/test/bidmaster/input/哈密/final\" && cd \"D:/App/test/bidmaster/cn-it-bid-writer\" && .venv/Scripts/python scripts/outline_export.py --spec-file work/outline_export_config.json --report work/outline_export_report.json 2>&1 && .venv/Scripts/python -c \"\nimport json\nwith open\\('work/outline_export_report.json', 'r', encoding='utf-8'\\) as f:\n data = json.load\\(f\\)\n print\\('导出状态:', data.get\\('status'\\)\\)\n if data.get\\('errors'\\):\n print\\('错误:', data['errors'][:300]\\)\n\")"
]
}
}

113
SKILL.md
View File

@ -21,90 +21,42 @@ description: 面向中文 IT/系统集成类投标项目的投标文件生成与
## 执行规则,严格遵守
1. 必须按照规定workflow执行。
2. 所有输出只能写到当前项目目录下的 `work/`、`reports/`、`final/`。
2. 所有输出只能写到和用户提供的目录下的 `work/`、`reports/`、`final/`。所有创建的文件也只能在用户提供的这个目录下。
3. 不能补充任何脚本只能用项目现有脚本所有脚本在scripts目录下。
4. 脚本执行需要使用本 skill 内 `.venv/` 作为虚拟环境来执行脚本启动命令是虚拟环境的python不是python3。
5. 所有操作word脚本使用本skill提供的工具脚本
## 现有工具
以下脚本属于本 skill 当前可用工具:
- `parse-rfp`
入口:`scripts/run_skill.py --mode parse-rfp`
用途:解析当前项目 `rfp/*.docx`,生成 `work/document_graph.json``work/material_inventory.json`
- `scan-project`
入口:`scripts/run_skill.py --mode scan-project`
用途:扫描当前项目资料目录,补充材料盘点。
- `render-outline`
入口:`scripts/run_skill.py --mode render-outline --bundle <technical|business-other> --outline <path>`
用途:把已定稿的双目录事实源分别渲染为目录 Word。
- `render-bid`
入口:`scripts/run_skill.py --mode render-bid --bundle <technical|business-other> --content <path>`
用途:把双正文事实源分别渲染为正式标书。
- `write-large-json`
入口:`scripts/run_skill.py --mode write-large-json --input <source.json> --out <target.json>`
用途:安全写出超长 JSON 文件,统一处理 UTF-8、中文路径、分段写入、临时文件落盘与原子替换。
边界:只负责 JSON 安全写入,不负责目录判断、正文生成或 DOCX 解析。
- `outline_linter.py`
入口:`scripts/outline_linter.py`
用途:负责检查技术标目录是否足够专业和精细,防止目录设计太浅薄
- `scripts/docx_index.py`
用途:读取并索引现有 Word 标书或模板结构,提取标题、段落、列表、表格等节点信息,供 AI 先理解文档结构再决定写入位置。
允许使用的输出路径:
- `scripts/docx_query.py`
用途:按标题、正文文本、锚点等方式查询 Word 中的目标位置,供 AI 在写标书前精确定位章节、段落或表格。
- `work/document_graph.json`
- `work/material_inventory.json`
- `work/rfp_constraints.json`
- `work/evaluation_model.json`
- `work/outline_strategy.json`
- `work/final_outline_technical.json`
- `work/final_outline_business_other.json`
- `work/final_bid_content_technical.json`
- `work/final_bid_content_business_other.json`
- `reports/*.md`
- `final/*.docx`
- `scripts/docx_create.py`
用途:根据结构化输入直接创建新的 Word 文档,适合生成目录版 DOCX、占位稿或空白章节骨架。
## 固定输出顺序
- `scripts/outline_check.py`
用途:对目录阶段的结构化结果做轻量门禁检查,重点检查抽象技术标题是否直接落成叶子节点。
1. `work/document_graph.json`
2. `work/material_inventory.json`
3. `work/rfp_constraints.json`
4. `work/evaluation_model.json`
5. `work/outline_strategy.json`(仅未定稿时)
6. `work/final_outline_technical.json`(仅已定稿时)
7. `work/final_outline_business_other.json`(仅已定稿时)
8. `final/技术标_目录版.docx`
9. `final/商务及其他_目录版.docx`
10. `work/final_bid_content_technical.json`
11. `work/final_bid_content_business_other.json`
12. `final/技术标.docx`
13. `final/商务及其他.docx`
14. `final/技术标.pdf`
15. `final/商务及其他.pdf`
- `scripts/outline_export.py`
用途:在目录门禁通过后,按已完成校验的层级结果导出结构化 JSON并在目录无法继续下钻后生成目录版 DOCX。
## 业务分层
- `scripts/docx_patch.py`
用途:对现有 Word 标书执行插入、替换、删除等修改操作,把已经生成好的标书内容准确写入指定位置。
### 评分项 / 废标项 / 合规项
- `scripts/render_docx.py`
用途:对写入后的 Word 标书做渲染校验,尝试导出 PDF 和页面图,检查文档是否损坏或排版异常。
在理解资料和目录设计阶段,必须先把招标要求按业务风险和评审作用分成三层:
- `scripts/docx_cli.py`
用途:统一调用 DOCX 索引、查询、写入和渲染能力。
1. `废标/否决项`
- 指资格门槛、实质性响应、星号项、无效投标触发项、必须逐条满足的硬约束。
- 这些内容优先级最高,必须先确认是否有承载位、是否有证据、是否存在缺件或高风险。
- 若发现缺失或不确定,不得用泛化正文掩盖,必须显式标记风险、阻塞或占位说明。
2. `合规项`
- 指招标文件明确要求提供、但不一定直接计分的资格、声明、附件、表格、承诺、响应材料。
- 这些内容必须完整进入目录和交付物,不能因为“不加分”而省略。
- 若证据不足,只能按缺件规则处理,不能伪造完成。
3. `评分项`
- 指评标办法中有明确分值、等级、比较维度或加分导向的内容。
- 这些内容必须优先影响目录结构、技术展开深度、图表配置和正文篇幅。
- 同一章节若同时承载评分项与合规项,应优先按评分逻辑组织,再补足合规要求。
具体操作方式、参数说明和示例见 `references/docx-ops.md`
处理顺序:
1. 先锁定 `废标/否决项`,确保不漏项。
2. 再补齐 `合规项`,确保正式交付结构完整。
3. 最后围绕 `评分项` 优化目录颗粒度、技术展开和证据呈现。
## 执行流程
@ -114,9 +66,10 @@ description: 面向中文 IT/系统集成类投标项目的投标文件生成与
- 若主文档不足,继续在当前项目内深挖评分办法、技术规范、附件、分册和其他候选原文。
- 建立项目约束、评分约束、风险约束、三层业务分类和输出边界。
2. outline 阶段
- 在内存中形成一份完整 canonical outline覆盖 `business`、`technical`、`other`。
- 运行现有目录门禁和 linter。
- 门禁通过后拆分并生成:
按照用户要求
- 目录节点统一使用 `heading(level/text/children)` 表达
- 目录阶段的唯一详细流程、循环下钻规则、门禁要求与 Mermaid 流程图,统一以 `references/outline-stage.md` 为准
- 只有在 `references/outline-stage.md` 规定的全部目录门禁通过后,才允许生成:
- `work/final_outline_technical.json`
- `work/final_outline_business_other.json`
- `final/技术标_目录版.docx`
@ -131,20 +84,17 @@ description: 面向中文 IT/系统集成类投标项目的投标文件生成与
- 生成:
- `final/技术标.docx`
- `final/商务及其他.docx`
- 若执行 PDF 导出,同步导出对应两份 PDF。
## 阶段规则
### 1. 理解规则阶段
必须遵守:
- `references/rfp-deconstruction.md`
- `references/evidence-escalation.md`
- `/references/understandbid.md`
### 1. outline
必须遵守
- `references/outline-stage.md`
- `references/output-contracts.md`
- `references/tables-and-scoring.md`
- `references/docx-ops.md`
### 2. business
@ -188,9 +138,6 @@ description: 面向中文 IT/系统集成类投标项目的投标文件生成与
8. 若叶子节点仍然是抽象标题,必须先回退目录阶段继续下钻,不得硬写正文。
### 4. other/finalize
1. 只补齐 `workflow_bucket=other` 的正式节点,并写入 `work/final_bid_content_business_other.json`
2. 技术占位节点归 `workflow_bucket=other`,只允许写“详见技术标”等转引说明,不得展开技术实质内容。
3. 不额外创造默认“报价子 workflow”。
4. 通过总体验收前,不得对外宣称“完整投标文件”。
@ -212,14 +159,6 @@ description: 面向中文 IT/系统集成类投标项目的投标文件生成与
## References
按阶段和任务读取,不要一次性全读:
- 商务及其他正文阶段:
- `references/business-track.md`
- `references/output-contracts.md`
- 技术正文阶段:
- `references/technical-track.md`
- `references/tables-and-scoring.md`
- `references/output-contracts.md`
- DOCX 渲染与交付:
- `references/docx-assembly.md`
- `references/output-contracts.md`
- `references/docx-ops.md`

View File

@ -1,4 +1,4 @@
interface:
display_name: "中文标 Skill"
short_description: "由 AI 主导先完成目录门禁,再拆分生成技术标与商务及其他两套交付物。"
default_prompt: "Use $cn-it-bid-writer. Read SKILL.md first. Build a full canonical outline from the current RFP and local materials, pass the outline gates, then split the deliverables into a technical bid and a business-and-other bid. Fill technical content only in the technical bundle, fill business and other content only in the business-and-other bundle, and keep only a technical placeholder in the business-and-other deliverable. Never rely on fixed chapter templates."
display_name: "中文 Skill"
short_description: "面向中文 IT/系统集成类标书写作,当前重点支持目录阶段产物生成,以及对 DOCX 的创建、定位、定点插入或替换和渲染校验。"
default_prompt: "Use $cn-it-bid-writer. Read SKILL.md first. Treat this as a bid-writing skill, not generic office automation. For outline-stage work without templates, first derive a structured outline, run outline-check, then create the directory DOCX with docx_create. Only use index/query/patch when editing an existing DOCX."

17
evals/evals.json Normal file
View File

@ -0,0 +1,17 @@
{
"skill_name": "cn-it-bid-writer",
"evals": [
{
"id": 1,
"prompt": "当前项目只有招标文件,没有现成模板。请先完成目录阶段:生成技术标目录和商务及其他目录,技术目录不要停留在“技术方案/实施方案”这种抽象叶子节点,商务目录中技术部分只保留占位。",
"expected_output": "能先整理结构化目录,执行目录门禁检查,再生成两份目录版 DOCX。",
"files": []
},
{
"id": 2,
"prompt": "请根据当前项目资料只生成技术标目录版 DOCX要求目录层级至少展开到三级覆盖原则、架构、模块、实施、验收、运维等承载位。",
"expected_output": "生成可直接继续写正文的技术标目录,并避免抽象标题直接作为叶子节点。",
"files": []
}
]
}

View File

@ -1,41 +0,0 @@
# 商务及其他节点规则
只用于填写 `work/final_bid_content_business_other.json` 中的已定稿叶子节点。
## 边界
1. 只写 `workflow_bucket=business`,以及商务及其他文件中用于技术转引的 `workflow_bucket=other` 占位节点。
2. 不新增正式章节,不套商务模板。
3. 商务事实只认招标文件明示内容和用户真实材料。
4. 节点标题过于抽象时,先回退目录阶段,不要在正文里硬兜底。
5. 不得在商务及其他文件中补写任何技术实质内容。
## 允许承载的内容
- 营业执照
- 资质证书
- 法人和授权材料
- 类似业绩
- 财务、纳税、社保、报价依据
- 招标文件明确要求的声明、承诺和附表
- 商务及其他文件中的技术转引说明
## 技术占位规则
1. 技术占位节点归 `workflow_bucket=other`
2. 技术占位只允许写转引说明,例如“技术响应内容详见《技术标》文件”。
3. 技术占位不得展开技术目录细节,不得补技术图表、技术方案、技术表格。
4. 若招标文件明确要求技术内容在商务及其他文件中出现位置,则按原位置保留占位。
5. 若发现当前节点其实需要技术正文,应停止当前流程并回到技术标流程处理。
## 缺件处理
1. 缺件时只允许写占位、缺件说明、附件索引空位或阻塞说明。
2. 不得伪造证书编号、合同金额、发证日期、统一社会信用代码等事实。
3. 星号项或资格门槛材料缺失时,应标记高风险或阻塞。
## 一致性
1. 商务正文、附件索引、偏离表、声明表之间不能互相矛盾。
2. 若某节点实际属于报价、技术或其他正式部分,应回到目录归属判断。
3. 商务及其他中的技术转引、技术标目录和技术标正文之间不能互相打架。

View File

@ -1,26 +1,3 @@
# DOCX 组装规则
## 成品结构
本 skill 的正式成品默认拆成两份:
1. 技术标
2. 商务及其他
两份成品都遵循以下结构:
1. 封面
2. 目录
3. 正文主章节
4. 表格章节
5. 附件索引
## 双产物要求
1. 技术标承载全部技术实质内容。
2. 商务及其他承载商务、声明、附件索引和其它正式部分。
3. 商务及其他允许出现技术标占位章节,但该章节只写转引说明,不生成技术正文主体。
4. 两份成品的封面标题、目录标题和默认文件名必须能明确区分归属。
## 排版要求

447
references/docx-ops.md Normal file
View File

@ -0,0 +1,447 @@
# DOCX 操作手册
这个手册只描述脚本接口和数据约定。
`SKILL.md` 负责告诉 AI 什么时候该用这些脚本;真正需要执行时,再读取本手册。
## 运行方式
统一使用本 skill 自带虚拟环境:
```powershell
.venv\Scripts\python.exe scripts\docx_index.py --docx <绝对路径> --out <绝对路径>
.venv\Scripts\python.exe scripts\docx_query.py --docx <绝对路径> --query-file <绝对路径> --out <绝对路径>
.venv\Scripts\python.exe scripts\docx_create.py --spec-file <绝对路径> --report <绝对路径>
.venv\Scripts\python.exe scripts\outline_check.py --outline-file <绝对路径> --report <绝对路径>
.venv\Scripts\python.exe scripts\outline_export.py --spec-file <绝对路径> --report <绝对路径>
.venv\Scripts\python.exe scripts\docx_patch.py --patch-file <绝对路径> --report <绝对路径>
.venv\Scripts\python.exe scripts\render_docx.py --docx <绝对路径> --out-dir <绝对路径> --report <绝对路径>
```
也可以统一走:
```powershell
.venv\Scripts\python.exe scripts\docx_cli.py index ...
.venv\Scripts\python.exe scripts\docx_cli.py query ...
.venv\Scripts\python.exe scripts\docx_cli.py create ...
.venv\Scripts\python.exe scripts\docx_cli.py outline-check ...
.venv\Scripts\python.exe scripts\docx_cli.py outline-export ...
.venv\Scripts\python.exe scripts\docx_cli.py patch ...
.venv\Scripts\python.exe scripts\docx_cli.py render ...
```
## 0. 新建 DOCX
### 命令
```powershell
.venv\Scripts\python.exe scripts\docx_create.py --spec-file D:\work\create.json --report D:\work\create.report.json
```
或统一走 CLI
```powershell
.venv\Scripts\python.exe scripts\docx_cli.py create --spec-file D:\work\create.json --report D:\work\create.report.json
```
### spec JSON
```json
{
"output_docx": "D:/work/generated-outline.docx",
"title": "目录测试",
"blocks": [
{"type": "heading", "level": 1, "text": "技术标目录"},
{"type": "heading", "level": 2, "text": "项目总体方案"},
{"type": "paragraph", "text": "这里是说明文字"},
{"type": "list", "items": ["系统架构设计", "实施部署方案"]},
{"type": "table", "rows": [["章节", "说明"], ["5.1", "总体设计"]]},
{"type": "page_break"}
]
}
```
### 支持的 block 类型
- `heading`
- 必填:`text`
- 可选:`level`,范围 `1-9`
- `paragraph`
- 必填:`text`
- 可选:`style`
- `list`
- 必填:`items`
- 可选:`style`,默认 `List Bullet`
- `table`
- 必填:`rows`,二维数组且列数一致
- 可选:`style`
- `page_break`
### 输出
报告 JSON 包含:
- `status`
- `output_docx`
- `block_count`
- `blocks`
- `final_summary`
## 0.1 目录门禁检查
### 命令
```powershell
.venv\Scripts\python.exe scripts\outline_check.py --outline-file D:\work\outline.json --report D:\work\outline.check.json
```
或统一走 CLI
```powershell
.venv\Scripts\python.exe scripts\docx_cli.py outline-check --outline-file D:\work\outline.json --report D:\work\outline.check.json
```
### 输入约定
- 顶层为 `blocks`
- 目录节点使用 `type=heading`
- 目录层级使用 `level`
- 子节点放在 `children`
最小示例:
```json
{
"blocks": [
{
"type": "heading",
"level": 1,
"text": "技术标目录",
"children": [
{
"type": "heading",
"level": 2,
"text": "总体设计方案",
"children": [
{"type": "heading", "level": 3, "text": "建设目标与原则"}
]
}
]
}
]
}
```
### 当前检查内容
- 抽象标题是否直接作为叶子节点
- `children` 类型是否合法
- block 是否为对象
## 0.2 目录阶段最终导出
本节只描述 `outline_export.py` 的接口,不定义目录阶段 workflow。
目录阶段的循环下钻、逐级检查、逐级写出 JSON 规则,以 `references/outline-stage.md` 为唯一准则。
`outline_export.py` 只在目录已经全部定稿、且无法继续下钻后调用,用于生成最终正式产物。
### 命令
```powershell
.venv\Scripts\python.exe scripts\outline_export.py --spec-file D:\work\outline-export.json --report D:\work\outline-export.report.json
```
或统一走 CLI
```powershell
.venv\Scripts\python.exe scripts\docx_cli.py outline-export --spec-file D:\work\outline-export.json --report D:\work\outline-export.report.json
```
### 输入约定
```json
{
"technical_outline": {
"title": "技术标目录",
"blocks": []
},
"business_outline": {
"title": "商务及其他目录",
"blocks": []
},
"technical_outline_json": "D:/work/final_outline_technical.json",
"business_outline_json": "D:/work/final_outline_business_other.json",
"technical_docx": "D:/final/技术标_目录版.docx",
"business_docx": "D:/final/商务及其他_目录版.docx"
}
```
### 输出
- 写出最终版 `work/final_outline_technical.json`
- 写出最终版 `work/final_outline_business_other.json`
- 写出两份目录版 DOCX
- 返回两份导出报告
## 1. 索引
### 命令
```powershell
.venv\Scripts\python.exe scripts\docx_index.py --docx D:\work\bid.docx --out D:\work\bid.index.json
```
### 输出
输出 JSON 顶层字段:
- `status`
- `docx`
- `summary`
- `nodes`
`nodes` 中每个节点至少包含:
- `node_id`
- `node_type`
- `text`
- `style_name`
- `heading_level`
- `path`
- `ordinal`
- `parent_id`
- `anchor`
当前支持的 `node_type`
- `heading`
- `paragraph`
- `list_item`
- `table`
- `table_row`
- `table_cell`
- `image_placeholder`
### 适用场景
- 给现有模板标书建立可检索结构
- 判断某章是否存在
- 为后续 query / patch 提供稳定锚点
## 2. 查询
### 命令
```powershell
.venv\Scripts\python.exe scripts\docx_query.py --docx D:\work\bid.docx --query-file D:\work\query.json --out D:\work\query.result.json
```
### 查询 JSON
```json
{
"match_mode": "heading_text",
"value": "项目实施方案"
}
```
### 支持的 `match_mode`
- `exact_text`
- `contains_text`
- `regex`
- `heading_path`
- `heading_text`
- `table_title`
- `style_name`
- `node_type`
- `anchor`
- `node_id`
### 常用附加字段
- `node_type`
- `style_name`
- `heading_level`
- `occurrence`
- `allow_multiple`
- `context_window`
### 查询结果
结果 JSON 包含:
- `matches`
- `match_count`
- `ambiguous`
- `best_match`
- `candidate_anchors`
- `errors`
- `warnings`
默认原则:
- 单命中才适合直接 patch
- 多命中默认视为歧义
- 如果需要用第 N 个命中,必须显式传 `occurrence`
## 3. Patch
### 命令
```powershell
.venv\Scripts\python.exe scripts\docx_patch.py --patch-file D:\work\patch.json --report D:\work\patch.report.json --render-check
```
### patch JSON 顶层结构
```json
{
"source_docx": "D:/work/source.docx",
"output_docx": "D:/work/output.docx",
"operations": []
}
```
默认写入新文件。
只有明确要原地修改时,才设置:
```json
{
"in_place": true
}
```
### operation 字段
- `op`
- `target`
- `content`
- `content_type`
- `on_ambiguous`
- `on_missing`
支持的 `op`
- `insert_before`
- `insert_after`
- `replace_node`
- `replace_text`
- `delete_node`
支持的 `content_type`
- `paragraphs`
- `heading`
- `table`
- `list`
### 示例 1在某章节后插入正文
```json
{
"source_docx": "D:/work/source.docx",
"output_docx": "D:/work/output.docx",
"operations": [
{
"op": "insert_after",
"target": {
"match_mode": "heading_text",
"value": "项目实施方案"
},
"content_type": "paragraphs",
"content": [
"本项目实施总体目标是确保系统平滑上线并满足验收要求。",
"实施阶段按照调研、部署、联调、试运行和验收五个步骤推进。"
]
}
]
}
```
### 示例 2替换指定文本
```json
{
"source_docx": "D:/work/source.docx",
"output_docx": "D:/work/output.docx",
"operations": [
{
"op": "replace_text",
"target": {
"match_mode": "contains_text",
"value": "质保期"
},
"old_text": "一年",
"new_text": "三年"
}
]
}
```
### 示例 3替换整个节点
```json
{
"source_docx": "D:/work/source.docx",
"output_docx": "D:/work/output.docx",
"operations": [
{
"op": "replace_node",
"target": {
"match_mode": "heading_text",
"value": "售后服务方案"
},
"content_type": "heading",
"content": {
"text": "售后服务与运维保障",
"level": 2
}
}
]
}
```
## 4. 渲染校验
### 命令
```powershell
.venv\Scripts\python.exe scripts\render_docx.py --docx D:\work\output.docx --out-dir D:\work\render --report D:\work\render.report.json
```
### 行为
脚本会尝试:
1. DOCX 转 PDF
2. PDF 渲染页面图片
3. 输出渲染报告
### 报告字段
- `status`
- `docx`
- `pdf`
- `page_count`
- `images`
- `errors`
- `warnings`
如果系统缺少 `soffice` 或图片渲染依赖,报告会返回 `render_skipped` 或带 warning而不是直接把 patch 结果判定为失败。
## 5. 适合 AI 的使用策略
当 AI 写标书时,优先按下面顺序工作:
1. 先 `index`
2. 再 `query`
3. 确认命中唯一
4. 生成 patch JSON
5. 执行 `patch`
6. 执行 `render`
不要在以下情况下直接 patch
- 查询结果为空
- 查询结果有多个候选但未明确选择
- 还没确认当前章节属于商务标还是技术标
- 需要插入的大段正文还未完成事实校验

View File

@ -1,51 +0,0 @@
# 补证与深挖
## 搜索边界
只能在当前项目目录内深挖。
优先找:
1. `rfp/` 下全部文件。
2. 当前项目根目录下的其他输入材料。
3. 当前项目已有解析结果。
4. 主 DOCX 里的章节引用、附表号、分册名和互引关系。
## 推荐顺序
1. 先通读主文档,找评分、技术规范、附件、分册、附表。
2. 再按近义词搜索:`评分办法`、`评审办法`、`技术规范`、`附件`、`附表`、`分册`。
3. 若主文档是混合件或模板件,再从内部引用关系反推缺失件。
## 什么时候必须继续找
出现以下情况时,不能直接停稿:
1. 找不到评分办法。
2. 找不到技术规范或关键技术指标。
3. 星号项只有总括,没有逐条清单。
4. 原文有展开引导句,但展开项不完整。
5. 文档像模板、节选件或混合件。
6. 标题树里出现 `XX`、示例文本或历史项目残留。
## 推断边界
允许保守推断,但不能把推断当正式事实。
禁止靠推断补成正式事实的内容:
1. 具体分值
2. 具体评分标准
3. 逐条星号项
4. 明示资格门槛
5. 最高限价数值
## 什么时候才能停
只有完成以下动作后,才允许停在候选稿:
1. 已检索当前项目目录下全部候选输入来源。
2. 已复查主文档里的评分、技术规范、附件和分册信号。
3. 已把目录细化到当前证据支持的最深安全层级。
4. 已区分确认层和高风险推断层。
5. 已落盘候选目录事实源和候选目录文件。

View File

@ -2,33 +2,97 @@
## 目标
目录阶段先产出一份完整 canonical outline 用于门禁检查,再拆成两份正式目录:
- `work/final_outline_technical.json`
- `work/final_outline_business_other.json`
可成两份正式目录:技术目录和商务目录(包含除技术的其它部分)
若未通过定稿门禁,则停止目录输出,不生成任何正式双目录,也不生成目录 Word。
## 目录总门禁 (Outline Master Gates)
## 唯一执行流程
<Rule>
1. 如果该 AI 支持子代理功能,则必须创建子代理来生成 canonical outline 或其技术投影视图。
2. 主代理必须充当质检员。主代理在接收到子代理的目录后,必须执行叶子节点合法性检查。
3. 一票否决权:只要发现技术目录停留在“方案”“概述”等二级节点,未下钻到具体的“原则、架构、内容、模块、流程”,主代理必须将其打回,要求子代理重写,绝不允许放行。
4. 被否决重新修改目录结构时,坚决杜绝为了通过检验而做的适配修改,必须生成符合本 skill 要求、适配招标文件的专业目录结构。
5. 门禁检查对象是 canonical outline 或 `final_outline_technical.json`,不是商务及其他中的技术占位目录。
</Rule>
本文件是目录阶段的唯一流程定义来源。`SKILL.md` 中若有摘要性表述,只用于导航,不再单独定义目录流程。
## 最小流程
### 结构表达
1. 根据读取到的 `rfp/` 主文档和当前项目候选材料,建立评分点、风险点、证据点和正式交付边界,形成 canonical outline 骨架。
2. 继续下钻到当前证据支持的最深安全层级。
3. 子代或主代理生成 canonical outline 后,主代理运行 `scripts/outline_linter.py` 检查。
4. 若有 ERROR把报告尤其是 `[ERROR][code] 路径 | breadcrumb | message` 原样发回子代理,让它只修目录,不写正文,并重试。
5. 直到 linter 通过,再用 `write-large-json` 落盘:
1. 目录阶段的最小结构化输入统一使用 `heading(level/text/children)`
2. 任何层级的目录都必须先结构化,再检查,再决定是否继续下钻。
### 逐级下钻循环
1. 根据读取到的 `rfp/` 主文档和当前项目候选材料,建立评分点、风险点、证据点和正式交付边界。
2. 目录生成必须严格按层执行,禁止一次性直接生成完整目录。
3. 每一轮只允许生成“当前层级的直接子节点”,不得预先生成孙级及以下节点。
4. 先生成当前层级目录,并写出当前层级中间 JSON。
5. 对当前层级目录执行检查核对。
6. 若检查未通过,则停止后续下钻,禁止生成任何正式目录 JSON 和目录版 Word。
7. 若检查通过,则将当前层级节点记为“已批准可下钻节点”。
8. 下一轮只能基于上一轮“已批准可下钻节点”生成其直接子节点,不得跨节点、跨层级展开。
9. 重复“生成当前层级 -> 写出当前层级 JSON -> 检查核对 -> 标记已批准节点 -> 继续下钻”,直到当前层级所有节点都无法继续下钻为止。
10. 只有在全部层级完成检查核对后,才允许执行最终目录导出。
### 层级执行约束
1. 生成一级目录时,一级节点不得携带任何二级以下 `children`
2. 生成某个一级节点的二级目录时,只允许补充该一级节点的直接 `children`,不得同时补充其三级节点。
3. 生成某个二级节点的三级目录时,只允许补充该二级节点的直接 `children`,不得同时补充其四级节点。
4. 任何节点在其父节点未通过当前轮检查前,不得进入下一层下钻。
5. 未经当前轮检查通过的节点,不得写入最终目录。
6. 不得以“先完整想好再拆回各层”的方式规避逐级流程;若最终目录中的下级节点没有对应上轮批准依据,视为流程违规。
### 中间产物要求
1. 每一轮都必须落盘当前层级的中间 JSON不得只在对话中描述。
2. 中间 JSON 只承载本轮新增的直接子节点,不得混入更深层级内容。
3. 若缺少任一层级的中间 JSON 或检查记录,则视为目录流程未完成,不得导出正式目录。
### 调试输出要求
1. 每完成一轮层级生成后,必须先输出一行简短调试信息,再进入检查步骤。
2. 每完成一轮检查后,必须输出一行检查结果调试信息。
3. 调试输出只允许描述当前轮次、当前节点、执行状态,不得展开成长篇解释。
4. 调试输出应尽量固定格式,便于人工核对逐级流程是否被跳过。
5. 若当前轮次包含多个节点,应逐个节点输出,不得用“本轮已完成”笼统代替。
推荐格式:
```text
[DEBUG] level=1 node=ROOT action=generate status=done
[DEBUG] level=1 node=ROOT action=check status=passed
[DEBUG] level=2 node=八、服务方案 action=generate status=done
[DEBUG] level=2 node=八、服务方案 action=check status=passed
[DEBUG] level=3 node=8.3 VR智能培训中心建设方案 action=generate status=done
[DEBUG] level=3 node=8.3 VR智能培训中心建设方案 action=check status=failed
```
### 收敛与最终产物
1. “无法继续下钻”是指当前层级节点已经到达证据支持的最深安全层级,继续下钻只会制造空泛标题、伪细分或重复切面。
2. 当招标文件、采购清单、技术参数表、分项报价表、供货一览表已经出现可识别的系统、子系统、设备或服务对象时,技术目录必须优先按这些已明示对象继续下钻。
3. “无法继续下钻”必须基于本轮已检查通过的节点逐个判断,不得在上层轮次提前宣告整个目录收敛。
4. 最终目录导出只能发生在循环结束之后,不得在中途为了查看效果提前生成目录版 Word。
5. 最终导出产物为:
- `work/final_outline_technical.json`
- `work/final_outline_business_other.json`
6. 再分别执行 `render-outline --bundle technical``render-outline --bundle business-other`
- `final/技术标_目录版.docx`
- `final/商务及其他_目录版.docx`
### Mermaid 流程图
```mermaid
flowchart TD
A[整理目录结构边界<br/>建立评分点 风险点 证据点] --> B[生成当前层级目录<br/>统一使用 heading(level/text/children)]
B --> C[写出当前层级中间 JSON]
C --> D[执行目录检查核对]
D --> E{检查是否通过}
E -- 否 --> F[停止后续下钻<br/>禁止生成正式目录 JSON 和目录版 Word]
E -- 是 --> G[标记已批准可下钻节点]
G --> H{是否还能继续下钻}
H -- 是 --> I[仅对已批准节点生成下一层直接子节点]
I --> C
H -- 否 --> J[执行最终目录导出]
J --> K[生成 work/final_outline_technical.json]
J --> L[生成 work/final_outline_business_other.json]
J --> M[生成 final/技术标_目录版.docx]
J --> N[生成 final/商务及其他_目录版.docx]
```
## 拆分规则
@ -36,10 +100,6 @@
2. 商务及其他目录保留 business/other 目录,并在技术内容应出现的位置保留技术占位。
3. 若招标文件明确规定分册、分标或顺序,技术占位必须出现在原规定位置。
4. 若招标文件未明确规定位置,则默认在 unified/canonical outline 中技术部分入口位置保留一个一级占位章节。
5. 商务及其他中的技术占位节点必须满足:
- `workflow_bucket=other`
- 稳定占位 ID
- 可渲染为目录与正文中的转引说明
6. 商务及其他中的技术占位不能成为技术门禁放行依据。
## 抽象标题处理与下钻强制约束(核心规则)
@ -95,18 +155,18 @@
2. 技术类章节是否已经下钻到第三级或第四级?(要求:是)
3. 技术方案下,是否同时包含了[原则]、[架构]、[内容/模块](要求:是)
4. 所有的评分点是否都已在目录中体现?(要求:是)
5. 双目录是否都已从同一 canonical outline 拆分,并保持稳定节点 ID(要求:是)
6. 商务及其他中的技术节点是否只保留占位,不承载技术正文?(要求:是)
5. 商务及其他中的技术节点是否只保留占位,不承载技术正文(要求:是)
6. 招标文件、采购清单、技术参数表、分项报价表、供货一览表已经出现可识别的系统/子系统/设备清单时,技术目录是否已经按照要求下钻?(要求:是)
```
## 定稿门禁
正式双目录必须同时满足:
1. canonical outline 中评分点、风险点、证据点都有承载位。
1. 第一流程中评分点、风险点、证据点都有承载位。
2. 技术标中的抽象标题已下钻,直到能承载正文为止。
3. 商务及其他中的技术部分只保留占位,位置正确。
4. 显式章节、附表、附件承载位没有被无故遗漏。
5. 标题层级、编号、归属关系合法,防止自动或手动编号混淆。
6. 每个最终目录叶子节点都已标注 `workflow_bucket`
6. 最终目录中的每个节点都能追溯到某一轮已通过检查的层级结果,不存在跳过当前层级检查直接写入的后代节点
7. 不得用“目录已经想好”或“逻辑上已定稿”代替文件存在检查。

View File

@ -1,185 +0,0 @@
# 输出契约
## 目录阶段
### `work/outline_strategy.json`
目录阶段工作事实源。至少包含:
```json
{
"project_name": "项目名称",
"status": "candidate",
"blocking_issues": [],
"review_flags": []
}
```
规则:
1. `status` 推荐用 `candidate`、`blocked`、`ready_for_final`。
2. 未过门禁时,不得直接写成正式双目录文件。
### `work/final_outline_technical.json`
技术标正式目录事实源。至少包含:
```json
{
"project_name": "项目名称",
"bundle": "technical",
"doc_title": "技术标(目录版)",
"sections": [
{
"id": "node-001",
"title": "技术标正式目录标题",
"workflow_bucket": "technical",
"children": []
}
]
}
```
规则:
1. 只有通过目录门禁后才允许生成。
2. `id` 必须稳定,并沿用 canonical outline 的稳定节点 ID。
3. `bundle` 固定为 `technical`
4. `workflow_bucket` 只允许 `business`、`technical`、`other`。
5. 技术标目录必须保留完整技术展开结构,不得用技术占位替代。
### `work/final_outline_business_other.json`
商务及其他正式目录事实源。至少包含:
```json
{
"project_name": "项目名称",
"bundle": "business_other",
"doc_title": "商务及其他(目录版)",
"sections": [
{
"id": "node-101",
"title": "商务及其他正式目录标题",
"workflow_bucket": "business",
"children": []
},
{
"id": "placeholder-technical-entry",
"title": "技术标内容说明",
"workflow_bucket": "other",
"placeholder_kind": "technical_redirect",
"children": []
}
]
}
```
规则:
1. 只有通过目录门禁后才允许生成。
2. 真实业务节点沿用 canonical outline 的稳定 ID。
3. 技术占位节点使用稳定占位 ID不得每轮变化。
4. `bundle` 固定为 `business_other`
5. 技术占位优先放在招标文件规定的位置;若招标文件未规定,则放在 unified outline 中技术部分入口位置。
6. 技术占位节点归 `workflow_bucket=other`,只承担转引,不承载技术正文。
7. 双目录首次生成的同一轮,禁止同时生成双正文事实源。
8. 正式目录落盘后,必须先生成:
- `final/技术标_目录版.docx`
- `final/商务及其他_目录版.docx`
再允许进入正文阶段。
## 正文阶段
### `work/final_bid_content_technical.json`
技术标正文事实源。至少包含:
```json
{
"project_name": "项目名称",
"bundle": "technical",
"doc_title": "技术标",
"nodes": [
{
"outline_id": "node-001",
"title": "技术标正式目录标题",
"workflow_bucket": "technical",
"status": "drafted",
"content": {
"paragraphs": [],
"bullets": [],
"tables": [],
"images": []
},
"children": []
}
]
}
```
规则:
1. `outline_id` 必须回链到 `final_outline_technical.json`
2. `bundle` 固定为 `technical`
3. `status` 只允许 `pending`、`drafted`、`blocked`。
4. 只能为 `final_outline_technical.json` 中的叶子节点写正文;若 `outline_id` 对应节点仍有子节点,则不得直接填充为完成稿。
5. 在 `work/final_outline_technical.json``final/技术标_目录版.docx` 不存在时,禁止生成此文件。
### `work/final_bid_content_business_other.json`
商务及其他正文事实源。至少包含:
```json
{
"project_name": "项目名称",
"bundle": "business_other",
"doc_title": "商务及其他",
"nodes": [
{
"outline_id": "node-101",
"title": "商务及其他正式目录标题",
"workflow_bucket": "business",
"status": "drafted",
"content": {
"paragraphs": [],
"bullets": [],
"tables": [],
"images": []
},
"children": []
},
{
"outline_id": "placeholder-technical-entry",
"title": "技术标内容说明",
"workflow_bucket": "other",
"status": "drafted",
"content": {
"paragraphs": [
"技术响应内容详见《技术标》文件。"
],
"bullets": [],
"tables": [],
"images": []
},
"children": []
}
]
}
```
规则:
1. `outline_id` 必须回链到 `final_outline_business_other.json`
2. `bundle` 固定为 `business_other`
3. `status` 只允许 `pending`、`drafted`、`blocked`。
4. business 阶段只更新 `workflow_bucket=business`other/finalize 阶段只更新 `workflow_bucket=other`
5. 技术占位节点只允许写转引说明,不得展开技术正文。
6. 只能为 `final_outline_business_other.json` 中的叶子节点写正文;若 `outline_id` 对应节点仍有子节点,则不得直接填充为完成稿。
7. 在 `work/final_outline_business_other.json``final/商务及其他_目录版.docx` 不存在时,禁止生成此文件。
## 渲染边界
1. 渲染脚本不判断目录是否合法。
2. 渲染脚本不自动扩写正文。
3. 正式稿的双产物归属由 AI 在 `bundle`、标题、封面和默认文件名中明确表达。

View File

@ -1,45 +0,0 @@
# 招标文件拆解
## 目标
把招标文件拆成后续可复用的事实源,服务目录、正文、表格和图表生成。
## 重点信号
重点识别:
- 评分、打分、评审、分值
- 废标、否决、无效投标
- 星号、关键条款、实质性响应
- 架构、部署、接口、安全、数据库、网络
- 营业执照、资质、授权、业绩、报价
## 至少要抽的事实
1. 项目名称和包号
2. 招标范围和建设目标
3. 评分办法和评分点
4. 星号项、否决项、资格门槛
5. 工期、服务期、交付地点
6. 部署、接口、安全、验收等技术边界
7. 商务材料和证明材料要求
## 处理原则
1. 能确定的写入事实源。
2. 不能确定的写入 `review_flags`,不要伪装成硬事实。
3. 看到展开引导句时,把它当目录下钻信号。
4. 看到“应附”“后附”“证明材料顺序对应”时,把它当证据承载信号。
5. 看到 `XX项目`、示例文本、历史项目名、占位字段时,把它当模板污染候选。
6. 看到章节号、分册号、附表号时,保留原始结构信号,不要自行修正。
## 推荐落盘
- `work/document_graph.json`
- `work/rfp_constraints.json`
- `work/evaluation_model.json`
## 和目录阶段的关系
1. 拆解结果必须能支持目录阶段判断“评委先看什么、每个评分点和证据点挂哪章”。
2. 如果拆解结果还停在“有技术方案/实施方案/售后方案”这种抽象层级,就还不能直接定目录。

View File

@ -1,36 +0,0 @@
# 表格与得分表达
## 表格的作用
表格是承载信息的工具,不是自动合法的目录标题。
常见表格包括:
1. 技术响应表
2. 商务偏离表
3. 进度计划表
4. 团队配置表
5. SLA 表
6. 软硬件配置表
7. 附件索引表
## 对目录的约束
1. 发现了表,不等于目录已经够细。
2. 一个标题下如果实际承载多张不同用途的表,通常说明目录还要继续下钻。
3. 不能因为已经列出几张表,就把技术正文停在“总体技术方案”这一层。
## 生成规则
1. 先回答“这张表服务哪个评审主题”。
2. 招标文件有明确条款时,按条款逐项响应。
3. 无明确条款时,使用保守字段,不虚构商务事实。
4. 表格标题要高信息密度避免“表1”“参数表”“响应表”。
5. 表格编号应在章节内顺序编号。
6. 术语必须和正文、图示保持一致。
## 得分表达
1. 对应具体评分点,不要写空泛口号。
2. 能量化就量化到工期、响应时限、巡检频次、培训场次、可用性等。
3. 得分表达必须和正文、附件、偏差表一致。

View File

@ -1,80 +0,0 @@
# 技术节点规则
只用于填写 `work/final_bid_content_technical.json``workflow_bucket=technical` 的已定稿目录节点,不允许删减任何定稿目录,只能新增技术子层级。
## 开写前先想清楚
先回答四个问题:
1. 项目总目标是什么。
2. 建设主线是什么。
3. 哪些是主系统,哪些是支撑系统。
4. 评委最先看哪几个主题。
如果这四个问题没想清楚,不要直接写正文。
## 展开规则
1. “总体方案”不是终点,必须继续展开到架构、模块、流程或接口。
2. “实施方案”不是终点,必须继续展开到阶段、资源、工期、交付件和验收。
3. “培训方案”不是终点,必须继续展开到对象、课程、方式、频次和考核。
4. “售后方案”不是终点,必须继续展开到响应机制、质保、巡检、升级和备件。
5. 没有企业专属事实时,可以补专业方案;不能补伪造业绩、认证、参数或授权。
6. “平台建设方案”“系统建设方案”“中控与教学管理平台建设方案”不是终点,必须继续展开到模块、功能、角色、流程、接口、部署或考核机制。
7. 不允许直接对仍含子节点的父节点写成完整正文并跳过其子节点。
8. 若当前叶子节点仍显抽象,必须回退目录阶段补目录,而不是继续硬写正文。
## 正文最低要求
技术正文不能只写“满足要求”“无偏离”“详见附件”。
对明显需要技术标的项目,正文通常至少要覆盖这些主题中的大部分:
1. 需求理解和建设目标。
2. 总体技术路线和总体架构。
3. 分系统或分模块设计。
4. 关键软硬件、接口和集成关系。
5. 实施组织、阶段、里程碑和联调。
6. 培训、验收、运维和售后。
7. 质量控制、风险控制和应急保障。
## 图表规则
1. 先回答“这张图/表帮评委看什么”,再决定是否生成。
2. 架构图说明系统关系,流程图说明步骤,拓扑图说明部署,进度表说明计划,对照表说明条款或配置。
3. 图表标题必须高信息密度不能叫“系统架构图”“表1”。
4. 图、表、正文必须使用同一套术语和编号。
## 一致性
1. 每个技术节点都应能回链到原文条款、评分点、风险点或材料证据。
2. 架构、部署、实施、培训、验收、运维之间不能互相打架。
3. 若正文仍显得篇幅过小、层级过浅、内容过散,应直接判定未完成。
4. 星号条款响应、偏差表、正文说明、附件证据之间必须互相可回链。
5. 商务及其他中的技术占位不能替代技术标中的任何实质章节。
## 边界
1. 用户资料优先于 AI 自拟内容;与招标文件冲突时,以招标文件为准。
2. 技术正文只能基于已存在的 `work/final_outline_technical.json` 叶子节点展开,不得跳过目录门禁。
3. 不得依赖 `work/final_bid_content_business_other.json` 承载任何技术实质内容。
## 只有同时满足以下条件,技术部分才算完成
- 已明确项目总目标、建设主线、主系统、支撑系统。
- 已明确总体架构、部署关系、系统边界、数据/控制关系。
- 每个主要系统都有独立承载位,不得用一段总述覆盖。
- 已覆盖实施组织、进度阶段、安装调试、联调、试运行。
- 已覆盖培训、验收、运维、售后、质保、风险控制。
- 星号条款和关键评审条款已逐条承载。
### 评委可以直接回答
- 建设什么
- 怎么建设
- 怎么实施
- 怎么验收
- 怎么运维
任一主系统如果仍然只有概述段,没有展开到“功能/流程/模块/接口/实施/验收”层级,则视为未完成。
任一技术父节点若直接被当成正文终点使用,也视为未完成。

View File

@ -0,0 +1,59 @@
# 招标文件拆解
## 目标
把招标文件拆成后续可复用的事实源,服务目录、正文、表格和图表生成。
## 重点信号
重点识别:
- 评分、打分、评审、分值
- 废标、否决、无效投标
- 星号、关键条款、实质性响应
- 架构、部署、接口、安全、数据库、网络
- 营业执照、资质、授权、业绩、报价
## 至少要抽的事实
1. 项目名称和包号
2. 招标范围和建设目标
3. 评分办法和评分点
4. 星号项、否决项、资格门槛
5. 工期、服务期、交付地点
6. 部署、接口、安全、验收等技术边界
7. 商务材料和证明材料要求
## 如何理解标书
### 评分项 / 废标项 / 合规项
在理解资料和目录设计阶段,必须先把招标要求按业务风险和评审作用分成三层:
1. `废标/否决项`
- 指资格门槛、实质性响应、星号项、无效投标触发项、必须逐条满足的硬约束。
- 这些内容优先级最高,必须先确认是否有承载位、是否有证据、是否存在缺件或高风险。
- 若发现缺失或不确定,不得用泛化正文掩盖,必须显式标记风险、阻塞或占位说明。
2. `合规项`
- 指招标文件明确要求提供、但不一定直接计分的资格、声明、附件、表格、承诺、响应材料。
- 这些内容必须完整进入目录和交付物,不能因为“不加分”而省略。
- 若证据不足,只能按缺件规则处理,不能伪造完成。
3. `评分项`
- 指评标办法中有明确分值、等级、比较维度或加分导向的内容。
- 这些内容必须优先影响目录结构、技术展开深度、图表配置和正文篇幅。
- 同一章节若同时承载评分项与合规项,应优先按评分逻辑组织,再补足合规要求。
处理顺序:
- 先看目录,再看正文
- 先看评分办法,再看正文
- 先看星号项,再看正文
- 先看否决项,再看正文
- 先看商务材料,再看正文
- 先看技术材料,再看正文
1. 先锁定 `废标/否决项`,确保不漏项。
2. 再补齐 `合规项`,确保正式交付结构完整。
3. 最后围绕 `评分项` 优化目录颗粒度、技术展开和证据呈现。

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,54 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from common import markdown_table, read_json, write_json, write_text
def normalize_spec(spec: Any) -> list[dict[str, Any]]:
if isinstance(spec, dict):
tables = spec.get("tables", [])
elif isinstance(spec, list):
tables = spec
else:
tables = []
return [item for item in tables if isinstance(item, dict)]
def save_table(out_dir: Path, file_name: str, title: str, headers: list[str], rows: list[list[str]]) -> dict[str, Any]:
content = "\n".join([f"# {title}", "", markdown_table(headers, rows)])
path = out_dir / file_name
write_text(path, content)
return {
"title": title,
"path": str(path),
"headers": headers,
"rows": rows,
}
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
tables = normalize_spec(read_json(Path(args.spec).resolve()))
manifest: list[dict[str, Any]] = []
for index, table in enumerate(tables, start=1):
title = table.get("title") or f"表格{index}"
headers = table.get("headers") or []
rows = table.get("rows") or []
file_name = table.get("file_name") or f"table_{index}.md"
manifest.append(save_table(out_dir, file_name, title, headers, rows))
write_json(out_dir / "tables_manifest.json", manifest)
if __name__ == "__main__":
main()

View File

@ -1,94 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from matplotlib.patches import FancyArrowPatch, FancyBboxPatch
from common import find_font_path, read_json, write_json
def get_font(size: int = 12) -> FontProperties | None:
font_path = find_font_path()
if font_path:
return FontProperties(fname=str(font_path), size=size)
return None
def add_box(ax, xy, width, height, text, facecolor, font):
patch = FancyBboxPatch(
xy,
width,
height,
boxstyle="round,pad=0.02,rounding_size=0.04",
facecolor=facecolor,
edgecolor="#1E3A8A",
linewidth=1.5,
)
ax.add_patch(patch)
ax.text(xy[0] + width / 2, xy[1] + height / 2, text, ha="center", va="center", fontproperties=font, fontsize=12)
def add_arrow(ax, start, end):
ax.add_patch(FancyArrowPatch(start, end, arrowstyle="->", mutation_scale=16, linewidth=1.5, color="#1E3A8A"))
def normalize_spec(spec: Any) -> list[dict[str, Any]]:
if isinstance(spec, dict):
diagrams = spec.get("diagrams", [])
elif isinstance(spec, list):
diagrams = spec
else:
diagrams = []
return [item for item in diagrams if isinstance(item, dict)]
def draw_diagram(diagram: dict[str, Any], out_path: Path) -> None:
font = get_font(11)
fig, ax = plt.subplots(figsize=(diagram.get("width", 10), diagram.get("height", 6)))
ax.axis("off")
for box in diagram.get("boxes", []):
add_box(
ax,
tuple(box.get("xy", [0.1, 0.1])),
box.get("width", 0.2),
box.get("height", 0.14),
box.get("text", ""),
box.get("facecolor", "#DBEAFE"),
font,
)
for arrow in diagram.get("arrows", []):
add_arrow(ax, tuple(arrow.get("start", [0, 0])), tuple(arrow.get("end", [1, 1])))
title = diagram.get("title", out_path.stem)
ax.set_title(title, fontproperties=get_font(14))
fig.savefig(out_path, dpi=180, bbox_inches="tight")
plt.close(fig)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
diagrams = normalize_spec(read_json(Path(args.spec).resolve()))
manifest: list[dict[str, Any]] = []
for index, diagram in enumerate(diagrams, start=1):
file_name = diagram.get("file_name") or f"diagram_{index}.png"
path = out_dir / file_name
draw_diagram(diagram, path)
manifest.append({"title": diagram.get("title", path.stem), "path": str(path)})
write_json(out_dir / "diagram_manifest.json", manifest)
if __name__ == "__main__":
main()

View File

@ -1,247 +0,0 @@
from __future__ import annotations
import json
import os
import re
import tempfile
from pathlib import Path
from typing import Any
import yaml
REPO_ROOT = Path(__file__).resolve().parents[2]
INPUT_ROOT = REPO_ROOT / "input"
OUTPUT_ROOT = REPO_ROOT / "output"
VALID_BUNDLES = ("technical", "business-other")
BUNDLE_ALIASES = {
"technical": "technical",
"business-other": "business-other",
"business_other": "business-other",
}
BUNDLE_DEFAULTS: dict[str, dict[str, str]] = {
"technical": {
"outline_json": "final_outline_technical.json",
"content_json": "final_bid_content_technical.json",
"outline_docx": "技术标_目录版.docx",
"bid_docx": "技术标.docx",
"outline_doc_title": "技术标(目录版)",
"outline_toc_title": "目录",
"bid_doc_title": "技术标",
"bid_toc_title": "目录",
},
"business-other": {
"outline_json": "final_outline_business_other.json",
"content_json": "final_bid_content_business_other.json",
"outline_docx": "商务及其他_目录版.docx",
"bid_docx": "商务及其他.docx",
"outline_doc_title": "商务及其他(目录版)",
"outline_toc_title": "目录",
"bid_doc_title": "商务及其他",
"bid_toc_title": "目录",
},
}
BANNED_WORDS = ["可能", "大概", "应该", "我觉得", "AI建议", "待确认"]
# Weak filename hints only. These hints may help AI label discovered files,
# but they must never be treated as workflow routing, directory semantics,
# or mandatory material categories.
MATERIAL_CATALOG = [
{"key": "business_license", "label": "营业执照副本", "keywords": ["营业执照", "license"]},
{"key": "qualification_certificate", "label": "资质证书", "keywords": ["资质", "证书", "许可", "qualification"]},
{"key": "legal_representative_id", "label": "法定代表人身份证明", "keywords": ["法人", "法定代表人", "身份证明"]},
{"key": "authorization_letter", "label": "授权委托书", "keywords": ["授权", "委托书", "authorization"]},
{"key": "project_manager_certificate", "label": "项目经理证书", "keywords": ["项目经理", "pmp", "建造师"]},
{"key": "similar_project_case", "label": "类似项目业绩证明", "keywords": ["业绩", "案例", "合同", "验收", "case"]},
{"key": "quotation_basis", "label": "报价依据说明", "keywords": ["报价", "清单", "预算", "quote", "price"]},
]
RESERVED_PROJECT_DIRS = {
"rfp",
"work",
"reports",
"final",
"__pycache__",
".git",
".hg",
".svn",
".idea",
".vscode",
".venv",
"venv",
"node_modules",
}
def ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
def write_text(path: Path, text: str) -> None:
ensure_dir(path.parent)
path.write_text(text, encoding="utf-8", newline="\n")
def write_json(path: Path, data: Any) -> None:
ensure_dir(path.parent)
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
def write_json_atomic(path: Path, data: Any, *, indent: int = 2, ensure_ascii: bool = False) -> None:
ensure_dir(path.parent)
temp_path: Path | None = None
encoder = json.JSONEncoder(ensure_ascii=ensure_ascii, indent=indent)
try:
with tempfile.NamedTemporaryFile(
mode="w",
encoding="utf-8",
newline="\n",
dir=str(path.parent),
prefix=f"{path.stem}.",
suffix=".tmp",
delete=False,
) as temp_file:
temp_path = Path(temp_file.name)
for chunk in encoder.iterencode(data):
temp_file.write(chunk)
temp_file.flush()
os.fsync(temp_file.fileno())
temp_path.replace(path)
except Exception:
if temp_path and temp_path.exists():
temp_path.unlink(missing_ok=True)
raise
def read_json(path: Path) -> Any:
return json.loads(path.read_text(encoding="utf-8-sig"))
def load_yaml(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
data = yaml.safe_load(path.read_text(encoding="utf-8-sig"))
return data if isinstance(data, dict) else {}
def normalize_text(text: str) -> str:
return re.sub(r"\s+", " ", text or "").strip()
def normalize_bundle(bundle: str | None) -> str | None:
if bundle is None:
return None
normalized = BUNDLE_ALIASES.get(bundle.strip())
if normalized:
return normalized
raise ValueError(f"不支持的 bundle: {bundle}。允许值:{', '.join(VALID_BUNDLES)}")
def ensure_output_layout(project_dir: Path) -> dict[str, Path]:
output_root = project_dir
layout = {
"root": output_root,
"final": output_root / "final",
"artifacts": output_root / "work",
"tables": output_root / "work",
"reports": output_root / "reports",
"work": output_root / "work",
}
for path in layout.values():
ensure_dir(path)
return layout
def get_bundle_defaults(bundle: str) -> dict[str, str]:
normalized = normalize_bundle(bundle)
if normalized is None:
raise ValueError("bundle 不能为空。")
return BUNDLE_DEFAULTS[normalized]
def get_bundle_outline_path(output_layout: dict[str, Path], bundle: str) -> Path:
return output_layout["work"] / get_bundle_defaults(bundle)["outline_json"]
def get_bundle_content_path(output_layout: dict[str, Path], bundle: str) -> Path:
return output_layout["work"] / get_bundle_defaults(bundle)["content_json"]
def get_bundle_outline_docx_path(output_layout: dict[str, Path], bundle: str) -> Path:
return output_layout["final"] / get_bundle_defaults(bundle)["outline_docx"]
def get_bundle_bid_docx_path(output_layout: dict[str, Path], bundle: str) -> Path:
return output_layout["final"] / get_bundle_defaults(bundle)["bid_docx"]
def find_rfp_docx(project_dir: Path) -> Path:
rfp_dir = project_dir / "rfp"
if not rfp_dir.exists():
raise FileNotFoundError(f"未找到招标文件目录: {rfp_dir}")
docx_files = sorted(rfp_dir.glob("*.docx"))
if not docx_files:
raise FileNotFoundError(f"未找到 DOCX 招标文件: {rfp_dir}")
return docx_files[0]
def get_project_config(project_dir: Path) -> dict[str, Any]:
return load_yaml(project_dir / "config" / "project.yaml")
def is_reserved_project_entry(path: Path) -> bool:
return path.name.lower() in RESERVED_PROJECT_DIRS
def is_hidden_project_entry(path: Path) -> bool:
return path.name.startswith(".")
def iter_material_entries(project_dir: Path) -> list[Path]:
if not project_dir.exists():
return []
entries: list[Path] = []
for entry in sorted(project_dir.iterdir()):
if is_reserved_project_entry(entry) or is_hidden_project_entry(entry):
continue
entries.append(entry)
return entries
def safe_filename(name: str) -> str:
return re.sub(r'[<>:"/\\\\|?*]+', "_", name).strip(" .") or "untitled"
def markdown_table(headers: list[str], rows: list[list[str]]) -> str:
lines = [
"| " + " | ".join(headers) + " |",
"| " + " | ".join(["---"] * len(headers)) + " |",
]
for row in rows:
lines.append("| " + " | ".join(row) + " |")
return "\n".join(lines)
def get_font_candidates() -> list[Path]:
windir = Path("C:/Windows/Fonts")
return [
windir / "msyh.ttc",
windir / "msyhbd.ttc",
windir / "simhei.ttf",
windir / "simsun.ttc",
]
def find_font_path() -> Path | None:
for path in get_font_candidates():
if path.exists():
return path
return None
def list_files(path: Path) -> list[Path]:
if not path.exists():
return []
return [item for item in sorted(path.rglob("*")) if item.is_file()]

View File

@ -1,35 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from common import ensure_output_layout, get_bundle_bid_docx_path, get_bundle_content_path, normalize_bundle
from render_bid_docx import build_docx
from common import read_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument("--content")
parser.add_argument("--out")
parser.add_argument("--bundle")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
output_layout = ensure_output_layout(project_dir)
bundle = normalize_bundle(args.bundle)
content_path = Path(args.content).resolve() if args.content else (
get_bundle_content_path(output_layout, bundle) if bundle else output_layout["work"] / "final_bid_content.json"
)
if not content_path.exists():
raise FileNotFoundError(f"未找到正文事实源: {content_path}。正文节点内容应由 AI 按已定稿目录填写,然后再调用本脚本渲染。")
out_path = Path(args.out).resolve() if args.out else (
get_bundle_bid_docx_path(output_layout, bundle) if bundle else output_layout["final"] / "投标文件.docx"
)
build_docx(read_json(content_path), out_path)
if __name__ == "__main__":
main()

98
scripts/docx_cli.py Normal file
View File

@ -0,0 +1,98 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import (
apply_patch_document,
create_docx_document,
export_outline_artifacts,
index_document,
query_nodes,
read_json,
render_docx,
write_json,
)
from outline_check import check_outline
def main() -> None:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="command", required=True)
index_parser = subparsers.add_parser("index")
index_parser.add_argument("--docx", required=True)
index_parser.add_argument("--out", required=True)
query_parser = subparsers.add_parser("query")
query_parser.add_argument("--docx", required=True)
query_parser.add_argument("--query-file", required=True)
query_parser.add_argument("--out", required=True)
create_parser = subparsers.add_parser("create")
create_parser.add_argument("--spec-file", required=True)
create_parser.add_argument("--report", required=True)
check_parser = subparsers.add_parser("outline-check")
check_parser.add_argument("--outline-file", required=True)
check_parser.add_argument("--report", required=True)
export_parser = subparsers.add_parser("outline-export")
export_parser.add_argument("--spec-file", required=True)
export_parser.add_argument("--report", required=True)
patch_parser = subparsers.add_parser("patch")
patch_parser.add_argument("--patch-file", required=True)
patch_parser.add_argument("--report", required=True)
patch_parser.add_argument("--render-check", action="store_true")
patch_parser.add_argument("--render-dir")
render_parser = subparsers.add_parser("render")
render_parser.add_argument("--docx", required=True)
render_parser.add_argument("--out-dir", required=True)
render_parser.add_argument("--report")
args = parser.parse_args()
if args.command == "index":
write_json(Path(args.out).resolve(), index_document(Path(args.docx).resolve()))
return
if args.command == "query":
index_data = index_document(Path(args.docx).resolve())
query_data = read_json(Path(args.query_file).resolve())
write_json(Path(args.out).resolve(), query_nodes(index_data, query_data))
return
if args.command == "create":
spec_data = read_json(Path(args.spec_file).resolve())
write_json(Path(args.report).resolve(), create_docx_document(spec_data))
return
if args.command == "outline-check":
outline_data = read_json(Path(args.outline_file).resolve())
write_json(Path(args.report).resolve(), check_outline(outline_data))
return
if args.command == "outline-export":
export_data = read_json(Path(args.spec_file).resolve())
write_json(Path(args.report).resolve(), export_outline_artifacts(export_data))
return
if args.command == "patch":
patch_data = read_json(Path(args.patch_file).resolve())
report = apply_patch_document(patch_data)
if args.render_check:
output_docx = Path(report["output_docx"]).resolve()
render_dir = Path(args.render_dir).resolve() if args.render_dir else output_docx.parent / f"{output_docx.stem}_render"
report["render"] = render_docx(output_docx, render_dir)
write_json(Path(args.report).resolve(), report)
return
report = render_docx(Path(args.docx).resolve(), Path(args.out_dir).resolve())
if args.report:
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

21
scripts/docx_create.py Normal file
View File

@ -0,0 +1,21 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import create_docx_document, read_json, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec-file", required=True)
parser.add_argument("--report", required=True)
args = parser.parse_args()
spec_data = read_json(Path(args.spec_file).resolve())
report = create_docx_document(spec_data)
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

View File

@ -3,17 +3,16 @@ from __future__ import annotations
import argparse
from pathlib import Path
from common import read_json
from render_outline_docx import build_docx
from docx_ops_lib import index_document, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--outline", required=True)
parser.add_argument("--docx", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
build_docx(read_json(Path(args.outline).resolve()), Path(args.out).resolve())
write_json(Path(args.out).resolve(), index_document(Path(args.docx).resolve()))
if __name__ == "__main__":

853
scripts/docx_ops_lib.py Normal file
View File

@ -0,0 +1,853 @@
from __future__ import annotations
import json
import re
import shutil
import subprocess
from dataclasses import dataclass
from hashlib import sha1
from pathlib import Path
from typing import Any, Iterator
from docx import Document
from docx.document import Document as DocxDocument
from docx.oxml import OxmlElement
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph
try:
from pdf2image import convert_from_path
except ImportError: # pragma: no cover
convert_from_path = None
try:
from docx.oxml.ns import qn
except ImportError: # pragma: no cover
qn = None
NAMESPACES = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
TEXT_WINDOW_DEFAULT = 40
@dataclass
class NodeRecord:
node_id: str
node_type: str
text: str
style_name: str | None
heading_level: int | None
path: list[str]
ordinal: int
parent_id: str | None
anchor: str
container: str
table_index: int | None = None
row_index: int | None = None
cell_index: int | None = None
block_index: int | None = None
xml_path: str | None = None
has_image: bool = False
object_ref: Any = None
def to_dict(self) -> dict[str, Any]:
return {
"node_id": self.node_id,
"node_type": self.node_type,
"text": self.text,
"style_name": self.style_name,
"heading_level": self.heading_level,
"path": self.path,
"ordinal": self.ordinal,
"parent_id": self.parent_id,
"anchor": self.anchor,
"container": self.container,
"table_index": self.table_index,
"row_index": self.row_index,
"cell_index": self.cell_index,
"block_index": self.block_index,
"xml_path": self.xml_path,
"has_image": self.has_image,
}
class QueryError(RuntimeError):
pass
def read_json(path: Path) -> Any:
with path.open("r", encoding="utf-8-sig") as handle:
return json.load(handle)
def write_json(path: Path, payload: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="\n") as handle:
json.dump(payload, handle, ensure_ascii=False, indent=2)
handle.write("\n")
def heading_level_for_style(style_name: str | None) -> int | None:
if not style_name:
return None
compact_style = normalize_text(style_name)
match = re.match(r"Heading\s+(\d+)$", compact_style, flags=re.IGNORECASE)
if match:
return int(match.group(1))
match = re.match(r"标题\s*(\d+)$", compact_style)
return int(match.group(1)) if match else None
def normalize_text(value: str) -> str:
return re.sub(r"\s+", " ", value or "").strip()
def create_docx_document(spec_data: dict[str, Any]) -> dict[str, Any]:
output_docx = Path(spec_data["output_docx"]).resolve()
blocks = spec_data.get("blocks", [])
if not isinstance(blocks, list):
raise QueryError("blocks must be a list")
output_docx.parent.mkdir(parents=True, exist_ok=True)
document = Document()
title = spec_data.get("title")
if title:
document.core_properties.title = str(title)
block_reports: list[dict[str, Any]] = []
def render_block(block: dict[str, Any], index_path: list[int]) -> None:
if not isinstance(block, dict):
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} must be an object")
block_type = block.get("type", "paragraph")
if block_type == "heading":
level = int(block.get("level", 1))
if level < 1 or level > 9:
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} heading level must be between 1 and 9")
text = str(block.get("text", ""))
paragraph = document.add_paragraph(style=f"Heading {level}")
paragraph.add_run(text)
block_reports.append({"index": ".".join(str(part) for part in index_path), "type": block_type, "text": summarize_text(text), "level": level})
children = block.get("children", [])
if children and not isinstance(children, list):
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} children must be a list")
if isinstance(children, list):
for child_index, child in enumerate(children):
render_block(child, index_path + [child_index])
return
if block_type == "paragraph":
text = str(block.get("text", ""))
paragraph = document.add_paragraph()
style_name = block.get("style")
if style_name:
try:
paragraph.style = str(style_name)
except KeyError:
pass
paragraph.add_run(text)
block_reports.append({"index": ".".join(str(part) for part in index_path), "type": block_type, "text": summarize_text(text)})
return
if block_type == "list":
items = block.get("items", [])
if not isinstance(items, list):
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} items must be a list")
style_name = str(block.get("style", "List Bullet"))
for item in items:
paragraph = document.add_paragraph()
try:
paragraph.style = style_name
except KeyError:
pass
paragraph.add_run(str(item))
block_reports.append({"index": ".".join(str(part) for part in index_path), "type": block_type, "item_count": len(items)})
return
if block_type == "table":
rows = block.get("rows", [])
if not isinstance(rows, list) or not rows or not isinstance(rows[0], list) or not rows[0]:
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} rows must be a non-empty 2D list")
table = document.add_table(rows=0, cols=len(rows[0]))
style_name = block.get("style")
if style_name:
try:
table.style = str(style_name)
except KeyError:
pass
for row_values in rows:
if not isinstance(row_values, list) or len(row_values) != len(rows[0]):
raise QueryError(f"block {'.'.join(str(part) for part in index_path)} table rows must have equal column counts")
row = table.add_row()
for cell_index, value in enumerate(row_values):
row.cells[cell_index].text = str(value)
block_reports.append({"index": ".".join(str(part) for part in index_path), "type": block_type, "row_count": len(rows), "column_count": len(rows[0])})
return
if block_type == "page_break":
document.add_page_break()
block_reports.append({"index": ".".join(str(part) for part in index_path), "type": block_type})
return
raise QueryError(f"unsupported block type: {block_type}")
for index, block in enumerate(blocks):
render_block(block, [index])
document.save(str(output_docx))
final_index = index_document(output_docx)
return {
"status": "ok",
"output_docx": str(output_docx),
"block_count": len(blocks),
"blocks": block_reports,
"final_summary": final_index["summary"],
}
def export_outline_artifacts(payload: dict[str, Any]) -> dict[str, Any]:
technical_outline = payload.get("technical_outline")
business_outline = payload.get("business_outline")
technical_json = Path(payload["technical_outline_json"]).resolve()
business_json = Path(payload["business_outline_json"]).resolve()
technical_docx = Path(payload["technical_docx"]).resolve()
business_docx = Path(payload["business_docx"]).resolve()
for outline_name, outline in (("technical_outline", technical_outline), ("business_outline", business_outline)):
if not isinstance(outline, dict):
raise QueryError(f"{outline_name} must be an object")
if not isinstance(outline.get("blocks"), list):
raise QueryError(f"{outline_name}.blocks must be a list")
write_json(technical_json, technical_outline)
write_json(business_json, business_outline)
technical_report = create_docx_document(
{
"output_docx": str(technical_docx),
"title": str(technical_outline.get("title", "技术标目录")),
"blocks": technical_outline["blocks"],
}
)
business_report = create_docx_document(
{
"output_docx": str(business_docx),
"title": str(business_outline.get("title", "商务及其他目录")),
"blocks": business_outline["blocks"],
}
)
return {
"status": "ok",
"technical_outline_json": str(technical_json),
"business_outline_json": str(business_json),
"technical_docx": str(technical_docx),
"business_docx": str(business_docx),
"technical_report": technical_report,
"business_report": business_report,
}
def slugify_text(value: str, *, limit: int = 32) -> str:
compact = normalize_text(value)
if not compact:
return "empty"
compact = re.sub(r"[^\w\u4e00-\u9fff-]+", "-", compact, flags=re.UNICODE)
compact = re.sub(r"-+", "-", compact).strip("-").lower()
return compact[:limit] or "empty"
def summarize_text(value: str, *, limit: int = 80) -> str:
return normalize_text(value)[:limit]
def iter_block_items(parent: DocxDocument | _Cell) -> Iterator[Paragraph | Table]:
parent_element = parent.element.body if isinstance(parent, DocxDocument) else parent._tc
for child in parent_element.iterchildren():
if child.tag.endswith("}p"):
yield Paragraph(child, parent)
elif child.tag.endswith("}tbl"):
yield Table(child, parent)
def paragraph_has_image(paragraph: Paragraph) -> bool:
return bool(paragraph._element.xpath(".//w:drawing"))
def paragraph_is_list_item(paragraph: Paragraph) -> bool:
style_name = paragraph.style.name if paragraph.style else ""
if style_name.lower().startswith("list"):
return True
p_pr = paragraph._element.pPr
return p_pr is not None and p_pr.numPr is not None
def build_anchor(path: list[str], node_type: str, text: str, ordinal: int) -> str:
seed = "|".join(["/".join(path), node_type, summarize_text(text, limit=32), str(ordinal)])
digest = sha1(seed.encode("utf-8")).hexdigest()[:10]
slug = slugify_text(text, limit=24)
path_slug = slugify_text("-".join(path), limit=24)
return f"{path_slug}:{node_type}:{slug}:{ordinal}:{digest}"
def _index_document_core(document: Document) -> list[NodeRecord]:
nodes: list[NodeRecord] = []
heading_stack: list[str] = []
heading_ids: dict[int, str] = {}
ordinal = 0
def current_parent_id() -> str | None:
if not heading_ids:
return None
return heading_ids[max(heading_ids)]
def add_record(
*,
node_type: str,
text: str,
style_name: str | None,
heading_level: int | None,
path: list[str],
parent_id: str | None,
container: str,
object_ref: Any,
table_index: int | None = None,
row_index: int | None = None,
cell_index: int | None = None,
block_index: int | None = None,
xml_path: str | None = None,
has_image: bool = False,
) -> NodeRecord:
nonlocal ordinal
ordinal += 1
node_id = f"n-{ordinal:05d}"
record = NodeRecord(
node_id=node_id,
node_type=node_type,
text=normalize_text(text),
style_name=style_name,
heading_level=heading_level,
path=path,
ordinal=ordinal,
parent_id=parent_id,
anchor=build_anchor(path, node_type, text, ordinal),
container=container,
table_index=table_index,
row_index=row_index,
cell_index=cell_index,
block_index=block_index,
xml_path=xml_path,
has_image=has_image,
object_ref=object_ref,
)
nodes.append(record)
return record
for block_index, block in enumerate(iter_block_items(document)):
if isinstance(block, Paragraph):
text = normalize_text(block.text)
style_name = block.style.name if block.style else None
level = heading_level_for_style(style_name)
if level is not None:
while len(heading_stack) >= level:
heading_stack.pop()
heading_stack.append(text or f"Heading {level}")
heading_ids = {key: value for key, value in heading_ids.items() if key < level}
record = add_record(
node_type="heading",
text=text,
style_name=style_name,
heading_level=level,
path=list(heading_stack),
parent_id=heading_ids.get(level - 1),
container="document",
object_ref=block,
block_index=block_index,
has_image=paragraph_has_image(block),
)
heading_ids[level] = record.node_id
if record.has_image:
add_record(
node_type="image_placeholder",
text=text or "[image]",
style_name=style_name,
heading_level=level,
path=list(heading_stack),
parent_id=record.node_id,
container="document",
object_ref=block,
block_index=block_index,
has_image=True,
)
continue
record = add_record(
node_type="list_item" if paragraph_is_list_item(block) else "paragraph",
text=text,
style_name=style_name,
heading_level=None,
path=list(heading_stack),
parent_id=current_parent_id(),
container="document",
object_ref=block,
block_index=block_index,
has_image=paragraph_has_image(block),
)
if record.has_image:
add_record(
node_type="image_placeholder",
text=text or "[image]",
style_name=style_name,
heading_level=None,
path=list(heading_stack),
parent_id=record.node_id,
container="document",
object_ref=block,
block_index=block_index,
has_image=True,
)
else:
table_text = "\n".join(
" | ".join(normalize_text(cell.text) for cell in row.cells)
for row in block.rows
)
table_record = add_record(
node_type="table",
text=table_text,
style_name=block.style.name if block.style else None,
heading_level=None,
path=list(heading_stack),
parent_id=current_parent_id(),
container="document",
object_ref=block,
block_index=block_index,
xml_path=f"table[{block_index}]",
)
for row_index, row in enumerate(block.rows):
row_text = " | ".join(normalize_text(cell.text) for cell in row.cells)
row_record = add_record(
node_type="table_row",
text=row_text,
style_name=table_record.style_name,
heading_level=None,
path=list(heading_stack),
parent_id=table_record.node_id,
container="table",
object_ref=row,
table_index=block_index,
row_index=row_index,
xml_path=f"table[{block_index}]/row[{row_index}]",
)
for cell_index, cell in enumerate(row.cells):
add_record(
node_type="table_cell",
text="\n".join(
normalize_text(paragraph.text)
for paragraph in cell.paragraphs
if normalize_text(paragraph.text)
),
style_name=None,
heading_level=None,
path=list(heading_stack),
parent_id=row_record.node_id,
container="table",
object_ref=cell,
table_index=block_index,
row_index=row_index,
cell_index=cell_index,
xml_path=f"table[{block_index}]/row[{row_index}]/cell[{cell_index}]",
)
return nodes
def index_document(docx_path: Path) -> dict[str, Any]:
document = Document(str(docx_path))
nodes = _index_document_core(document)
return {
"status": "ok",
"docx": str(docx_path),
"summary": {
"node_count": len(nodes),
"heading_count": sum(1 for node in nodes if node.node_type == "heading"),
"paragraph_count": sum(1 for node in nodes if node.node_type == "paragraph"),
"list_item_count": sum(1 for node in nodes if node.node_type == "list_item"),
"table_count": sum(1 for node in nodes if node.node_type == "table"),
"image_placeholder_count": sum(1 for node in nodes if node.node_type == "image_placeholder"),
},
"nodes": [node.to_dict() for node in nodes],
}
def query_nodes(index_data: dict[str, Any], query: dict[str, Any]) -> dict[str, Any]:
nodes = index_data.get("nodes", [])
mode = query.get("match_mode", "contains_text")
value = query.get("value")
if value is None and mode not in {"node_type"}:
raise QueryError("query.value is required")
node_type_filter = query.get("node_type")
style_name_filter = query.get("style_name")
heading_level = query.get("heading_level")
allow_multiple = bool(query.get("allow_multiple", False))
occurrence = query.get("occurrence")
window = int(query.get("context_window", TEXT_WINDOW_DEFAULT))
def node_matches(node: dict[str, Any]) -> bool:
if node_type_filter and node.get("node_type") != node_type_filter:
return False
if style_name_filter and node.get("style_name") != style_name_filter:
return False
if heading_level is not None and node.get("heading_level") != heading_level:
return False
node_text = node.get("text", "")
if mode == "exact_text":
return node_text == value
if mode == "contains_text":
return value in node_text
if mode == "regex":
return re.search(value, node_text) is not None
if mode == "heading_path":
return node.get("node_type") == "heading" and " > ".join(node.get("path", [])) == value
if mode == "heading_text":
return node.get("node_type") == "heading" and node_text == value
if mode == "table_title":
path_parts = node.get("path", [])
return node.get("node_type") == "table" and bool(path_parts) and path_parts[-1] == value
if mode == "style_name":
return node.get("style_name") == value
if mode == "node_type":
return node.get("node_type") == query.get("value")
if mode == "anchor":
return node.get("anchor") == value
if mode == "node_id":
return node.get("node_id") == value
raise QueryError(f"unsupported match_mode: {mode}")
matches = [node for node in nodes if node_matches(node)]
if occurrence is not None:
matches = [matches[occurrence]] if 0 <= occurrence < len(matches) else []
ambiguous = len(matches) > 1 and not allow_multiple
best_match = matches[0] if len(matches) == 1 or (allow_multiple and matches) else None
def with_context(node: dict[str, Any]) -> dict[str, Any]:
text = node.get("text", "")
return {
**node,
"context": {
"before": text[:window],
"after": text[-window:] if text else "",
},
}
return {
"status": "ok",
"query": query,
"match_count": len(matches),
"ambiguous": ambiguous,
"best_match": with_context(best_match) if best_match else None,
"candidate_anchors": [match["anchor"] for match in matches],
"matches": [with_context(match) for match in matches],
"errors": ["query matched multiple nodes"] if ambiguous else [],
"warnings": [],
}
def find_records(index_data: dict[str, Any], query: dict[str, Any]) -> list[dict[str, Any]]:
result = query_nodes(index_data, query)
if result["ambiguous"] and query.get("on_ambiguous", "error") == "error":
raise QueryError("query matched multiple nodes")
if result["match_count"] == 0 and query.get("on_missing", "error") == "error":
raise QueryError("query matched no nodes")
return result["matches"]
def clone_run_format(source_run: Any, target_run: Any) -> None:
target_run.bold = source_run.bold
target_run.italic = source_run.italic
target_run.underline = source_run.underline
target_run.font.name = source_run.font.name
target_run.font.size = source_run.font.size
if source_run.font.color and source_run.font.color.rgb:
target_run.font.color.rgb = source_run.font.color.rgb
if qn and source_run._element.rPr is not None and source_run._element.rPr.rFonts is not None:
east_asia = source_run._element.rPr.rFonts.get(qn("w:eastAsia"))
if east_asia:
target_run._element.get_or_add_rPr().rFonts.set(qn("w:eastAsia"), east_asia)
def clear_paragraph(paragraph: Paragraph) -> None:
p_element = paragraph._element
for child in list(p_element):
if child.tag.endswith("}r") or child.tag.endswith("}hyperlink"):
p_element.remove(child)
def replace_text_in_paragraph(paragraph: Paragraph, old_text: str, new_text: str) -> bool:
if old_text not in paragraph.text:
return False
for run in paragraph.runs:
if old_text in run.text:
run.text = run.text.replace(old_text, new_text, 1)
return True
existing_runs = list(paragraph.runs)
first_run = existing_runs[0] if existing_runs else paragraph.add_run()
clear_paragraph(paragraph)
new_run = paragraph.add_run(new_text)
if existing_runs:
clone_run_format(first_run, new_run)
return True
def delete_block(block: Paragraph | Table) -> None:
element = block._element
parent = element.getparent()
if parent is not None:
parent.remove(element)
def insert_paragraph_relative(target: Paragraph | Table, *, after: bool, style_name: str | None = None) -> Paragraph:
new_p = OxmlElement("w:p")
if after:
target._element.addnext(new_p)
else:
target._element.addprevious(new_p)
paragraph = Paragraph(new_p, target._parent)
if style_name:
try:
paragraph.style = style_name
except KeyError:
pass
return paragraph
def append_paragraph_contents(paragraph: Paragraph, text: str, source: Paragraph | None = None) -> None:
if source is not None and source.style is not None:
paragraph.style = source.style
paragraph.paragraph_format.left_indent = source.paragraph_format.left_indent
paragraph.paragraph_format.right_indent = source.paragraph_format.right_indent
paragraph.paragraph_format.first_line_indent = source.paragraph_format.first_line_indent
paragraph.paragraph_format.space_before = source.paragraph_format.space_before
paragraph.paragraph_format.space_after = source.paragraph_format.space_after
paragraph.paragraph_format.line_spacing = source.paragraph_format.line_spacing
paragraph.alignment = source.alignment
if source is not None and source.runs:
run = paragraph.add_run(text)
clone_run_format(source.runs[0], run)
else:
paragraph.add_run(text)
def create_table_after(target: Paragraph | Table, rows: list[list[str]], style_name: str | None = None) -> Table:
parent = target._parent
cols = len(rows[0]) if rows else 1
table = parent.add_table(rows=0, cols=cols)
if style_name:
try:
table.style = style_name
except KeyError:
pass
for row_values in rows:
row = table.add_row()
for index, value in enumerate(row_values):
row.cells[index].text = value
target._element.addnext(table._element)
return table
def build_live_index(document: Document) -> tuple[dict[str, Any], dict[str, NodeRecord]]:
nodes = _index_document_core(document)
return {
"status": "ok",
"summary": {"node_count": len(nodes)},
"nodes": [node.to_dict() for node in nodes],
}, {node.anchor: node for node in nodes}
def insert_blocks(record: NodeRecord, operation: dict[str, Any], *, after: bool) -> None:
content_type = operation.get("content_type", "paragraphs")
content = operation.get("content")
if record.node_type not in {"paragraph", "list_item", "heading", "table"}:
raise QueryError("insert operations only support block nodes")
target = record.object_ref
if content_type == "paragraphs":
paragraphs = content if isinstance(content, list) else [str(content)]
previous: Paragraph | Table = target
for index, paragraph_text in enumerate(paragraphs):
new_paragraph = insert_paragraph_relative(
previous,
after=after if index == 0 else True,
style_name=record.style_name if record.node_type in {"paragraph", "list_item"} else "Normal",
)
source_paragraph = target if isinstance(target, Paragraph) and record.node_type in {"paragraph", "list_item"} else None
append_paragraph_contents(new_paragraph, str(paragraph_text), source=source_paragraph)
previous = new_paragraph
return
if content_type == "heading":
payload = content if isinstance(content, dict) else {"text": str(content)}
level = int(payload.get("level", record.heading_level or 1))
new_paragraph = insert_paragraph_relative(target, after=after, style_name=f"Heading {level}")
append_paragraph_contents(new_paragraph, str(payload.get("text", "")), source=target if isinstance(target, Paragraph) else None)
try:
new_paragraph.style = f"Heading {level}"
except KeyError:
pass
return
if content_type == "list":
items = content if isinstance(content, list) else []
previous: Paragraph | Table = target
for index, item in enumerate(items):
new_paragraph = insert_paragraph_relative(
previous,
after=after if index == 0 else True,
style_name="List Bullet",
)
source_paragraph = target if isinstance(target, Paragraph) and record.node_type == "list_item" else None
append_paragraph_contents(new_paragraph, str(item), source=source_paragraph)
try:
new_paragraph.style = "List Bullet"
except KeyError:
pass
previous = new_paragraph
return
if content_type == "table":
rows = content.get("rows") if isinstance(content, dict) else content
if not isinstance(rows, list) or not rows:
raise QueryError("table content must provide rows")
style_name = None
if isinstance(target, Table) and target.style is not None:
style_name = target.style.name
create_table_after(target, rows, style_name=style_name)
return
raise QueryError(f"unsupported content_type: {content_type}")
def replace_block(record: NodeRecord, operation: dict[str, Any]) -> None:
target = record.object_ref
insert_blocks(record, operation, after=False)
delete_block(target)
def apply_patch_document(patch_data: dict[str, Any]) -> dict[str, Any]:
source_docx = Path(patch_data["source_docx"]).resolve()
output_docx = Path(patch_data.get("output_docx", source_docx)).resolve()
in_place = bool(patch_data.get("in_place", False))
if not in_place and output_docx == source_docx:
raise QueryError("output_docx must differ from source_docx unless in_place is true")
output_docx.parent.mkdir(parents=True, exist_ok=True)
if not in_place:
shutil.copy2(source_docx, output_docx)
document = Document(str(output_docx))
operations = patch_data.get("operations", [])
operation_reports: list[dict[str, Any]] = []
for index, operation in enumerate(operations):
live_index, record_map = build_live_index(document)
matches = find_records(live_index, operation.get("target", {}))
if len(matches) > 1 and operation.get("on_ambiguous", "error") == "error":
raise QueryError(f"operation {index} matched multiple nodes")
selected = matches if operation.get("allow_multiple") else matches[:1]
if not selected and operation.get("on_missing", "error") == "error":
raise QueryError(f"operation {index} matched no nodes")
affected: list[dict[str, Any]] = []
for match in selected:
record = record_map[match["anchor"]]
before_summary = summarize_text(record.text)
op_name = operation["op"]
if op_name == "replace_text":
old_text = operation["old_text"]
new_text = operation["new_text"]
if record.node_type not in {"paragraph", "list_item", "heading"}:
raise QueryError("replace_text only supports paragraph-like nodes")
if not replace_text_in_paragraph(record.object_ref, old_text, new_text):
raise QueryError(f"text not found in node {record.anchor}")
elif op_name == "delete_node":
if record.node_type not in {"paragraph", "list_item", "heading", "table"}:
raise QueryError("delete_node only supports block nodes")
delete_block(record.object_ref)
elif op_name == "insert_before":
insert_blocks(record, operation, after=False)
elif op_name == "insert_after":
insert_blocks(record, operation, after=True)
elif op_name == "replace_node":
replace_block(record, operation)
else:
raise QueryError(f"unsupported op: {op_name}")
affected.append(
{
"anchor": record.anchor,
"node_type": record.node_type,
"before": before_summary,
"op": op_name,
}
)
document.save(str(output_docx))
operation_reports.append(
{
"index": index,
"op": operation["op"],
"match_count": len(selected),
"affected": affected,
}
)
document.save(str(output_docx))
final_index = index_document(output_docx)
return {
"status": "ok",
"source_docx": str(source_docx),
"output_docx": str(output_docx),
"in_place": in_place,
"operation_count": len(operations),
"operations": operation_reports,
"errors": [],
"warnings": [],
"final_summary": final_index["summary"],
}
def render_docx(docx_path: Path, out_dir: Path) -> dict[str, Any]:
out_dir.mkdir(parents=True, exist_ok=True)
pdf_path = out_dir / f"{docx_path.stem}.pdf"
png_dir = out_dir / "pages"
png_dir.mkdir(parents=True, exist_ok=True)
soffice = shutil.which("soffice")
if not soffice:
return {
"status": "render_skipped",
"docx": str(docx_path),
"pdf": None,
"page_count": 0,
"images": [],
"errors": [],
"warnings": ["LibreOffice/soffice not found"],
}
process = subprocess.run(
[soffice, "--headless", "--convert-to", "pdf", "--outdir", str(out_dir), str(docx_path)],
capture_output=True,
text=True,
encoding="utf-8",
)
if process.returncode != 0 or not pdf_path.exists():
return {
"status": "error",
"docx": str(docx_path),
"pdf": str(pdf_path),
"page_count": 0,
"images": [],
"errors": [process.stderr.strip() or "failed to convert docx to pdf"],
"warnings": [],
}
images: list[str] = []
warnings: list[str] = []
if convert_from_path is None:
warnings.append("pdf2image not installed")
else:
try:
for page_number, image in enumerate(convert_from_path(str(pdf_path)), start=1):
image_path = png_dir / f"page-{page_number:03d}.png"
image.save(str(image_path), "PNG")
images.append(str(image_path))
except Exception as exc: # pragma: no cover
warnings.append(f"PNG render skipped: {exc}")
return {
"status": "ok",
"docx": str(docx_path),
"pdf": str(pdf_path),
"page_count": len(images),
"images": images,
"errors": [],
"warnings": warnings,
}

27
scripts/docx_patch.py Normal file
View File

@ -0,0 +1,27 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import apply_patch_document, read_json, render_docx, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--patch-file", required=True)
parser.add_argument("--report", required=True)
parser.add_argument("--render-check", action="store_true")
parser.add_argument("--render-dir")
args = parser.parse_args()
patch_data = read_json(Path(args.patch_file).resolve())
report = apply_patch_document(patch_data)
if args.render_check:
output_docx = Path(report["output_docx"]).resolve()
render_dir = Path(args.render_dir).resolve() if args.render_dir else output_docx.parent / f"{output_docx.stem}_render"
report["render"] = render_docx(output_docx, render_dir)
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

22
scripts/docx_query.py Normal file
View File

@ -0,0 +1,22 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import index_document, query_nodes, read_json, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--docx", required=True)
parser.add_argument("--query-file", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
index_data = index_document(Path(args.docx).resolve())
query_data = read_json(Path(args.query_file).resolve())
write_json(Path(args.out).resolve(), query_nodes(index_data, query_data))
if __name__ == "__main__":
main()

View File

@ -1,29 +0,0 @@
from __future__ import annotations
import argparse
import shutil
import subprocess
from pathlib import Path
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--docx", required=True)
parser.add_argument("--outdir", required=True)
args = parser.parse_args()
soffice = shutil.which("soffice")
if not soffice:
raise FileNotFoundError("未检测到 LibreOffice/soffice无法导出 PDF。")
docx_path = Path(args.docx).resolve()
out_dir = Path(args.outdir).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
subprocess.run(
[soffice, "--headless", "--convert-to", "pdf", "--outdir", str(out_dir), str(docx_path)],
check=True,
)
if __name__ == "__main__":
main()

View File

@ -1,68 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from common import ensure_output_layout, find_rfp_docx, write_text
from parse_docx import build_document_graph
from scan_project_materials import build_inventory
LEGACY_WORK_FILES = [
"evidence_graph.json",
"missing_materials.json",
"outline_candidates.json",
"outline_final.json",
"outline_review.md",
"outline_review_report.json",
"outline_spec.json",
"outline_spec.reviewed.json",
"project_profile.json",
"rfp_outline.md",
"source_tables.json",
"stage_gates.json",
]
def cleanup_legacy_work_files(work_dir: Path) -> None:
for file_name in LEGACY_WORK_FILES:
path = work_dir / file_name
if path.exists():
path.unlink()
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument("--out")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
output_layout = ensure_output_layout(project_dir)
work_dir = Path(args.out).resolve() if args.out else output_layout["work"]
work_dir.mkdir(parents=True, exist_ok=True)
cleanup_legacy_work_files(work_dir)
docx_path = find_rfp_docx(project_dir)
document_graph = build_document_graph(docx_path)
inventory = build_inventory(project_dir)
from common import write_json
write_json(work_dir / "document_graph.json", document_graph)
write_json(work_dir / "material_inventory.json", inventory)
summary = [
f"# {project_dir.name} 基础解析结果",
"",
"- 已完成 DOCX 结构化解析。",
f"- 原文结构:{work_dir / 'document_graph.json'}",
f"- 通用材料盘点:{work_dir / 'material_inventory.json'}",
"",
"说明:本脚本只负责基础解析与落盘,不负责评分点、目录或正文判断。",
]
write_text(output_layout["reports"] / "parse_summary.md", "\n".join(summary))
if __name__ == "__main__":
main()

View File

@ -1,113 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from PIL import Image, ImageDraw, ImageFont
from common import ensure_dir, find_font_path, read_json, write_json, write_text
def load_font(size: int):
font_path = find_font_path()
if font_path:
return ImageFont.truetype(str(font_path), size=size)
return ImageFont.load_default()
def wrap_text(draw: ImageDraw.ImageDraw, text: str, font, width: int) -> list[str]:
lines: list[str] = []
current = ""
for char in text:
candidate = current + char
if draw.textlength(candidate, font=font) <= width:
current = candidate
continue
if current:
lines.append(current)
current = char
if current:
lines.append(current)
return lines
def render_placeholder(path: Path, title: str, purpose: str, hint: str) -> None:
width, height = 1280, 720
image = Image.new("RGB", (width, height), "#F8FAFC")
draw = ImageDraw.Draw(image)
title_font = load_font(42)
body_font = load_font(26)
small_font = load_font(22)
draw.rounded_rectangle((40, 40, width - 40, height - 40), radius=24, outline="#2563EB", width=6)
draw.rectangle((80, 90, width - 80, 180), fill="#DBEAFE")
draw.text((110, 110), f"待补附件占位:{title}", fill="#1D4ED8", font=title_font)
lines = [
f"材料用途:{purpose or '待 AI 补充'}",
f"替换提示:{hint or '待 AI 补充'}",
"使用方式:请将本占位图替换为真实扫描件或盖章材料。",
"注意事项:本图仅为占位,不代表已响应事实。",
]
y = 240
for line in lines:
for segment in wrap_text(draw, line, body_font, width - 220):
draw.text((110, y), segment, fill="#0F172A", font=body_font)
y += 48
y += 12
draw.text((110, height - 120), "状态:待替换", fill="#B91C1C", font=small_font)
ensure_dir(path.parent)
image.save(path)
def normalize_spec(spec: Any) -> list[dict[str, Any]]:
if isinstance(spec, dict):
items = spec.get("items", [])
elif isinstance(spec, list):
items = spec
else:
items = []
return [item for item in items if isinstance(item, dict)]
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
items = normalize_spec(read_json(Path(args.spec).resolve()))
out_dir = Path(args.out).resolve()
ensure_dir(out_dir)
manifest: list[dict[str, Any]] = []
for index, item in enumerate(items, start=1):
file_name = item.get("file_name") or f"placeholder_{index}.png"
image_path = out_dir / file_name
title = item.get("label") or item.get("title") or f"附件{index}"
purpose = item.get("purpose") or ""
hint = item.get("hint") or item.get("replacement_hint") or ""
render_placeholder(image_path, title, purpose, hint)
manifest.append(
{
"title": title,
"path": str(image_path),
"purpose": purpose,
"hint": hint,
}
)
write_json(out_dir / "placeholder_manifest.json", manifest)
lines = ["# 占位图清单", ""]
if manifest:
lines.extend([f"- {item['title']}{item['path']}" for item in manifest])
else:
lines.append("- 未生成占位图。")
write_text(out_dir / "placeholder_manifest.md", "\n".join(lines))
if __name__ == "__main__":
main()

View File

@ -1,59 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx import Document
from common import write_json, write_text
def extract_docx_text(docx_path: Path) -> str:
document = Document(docx_path)
chunks: list[str] = []
for paragraph in document.paragraphs:
if paragraph.text.strip():
chunks.append(paragraph.text.strip())
for table in document.tables:
for row in table.rows:
for cell in row.cells:
if cell.text.strip():
chunks.append(cell.text.strip())
return "\n".join(chunks)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--docx", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
docx_path = Path(args.docx).resolve()
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
text = extract_docx_text(docx_path)
report = {
"docx": str(docx_path),
"character_count": len(text),
"line_count": len([line for line in text.splitlines() if line.strip()]),
}
write_json(out_dir / "docx_text_inspection.json", report)
write_text(out_dir / "docx_text_dump.txt", text)
write_text(
out_dir / "docx_text_inspection.md",
"\n".join(
[
f"# {docx_path.name} 机械文本检查",
"",
f"- 文本字符数:{report['character_count']}",
f"- 非空行数:{report['line_count']}",
"",
"说明:本脚本只做 DOCX 文本提取与基础统计,不负责一致性、合规性或投标判断。",
]
),
)
if __name__ == "__main__":
main()

177
scripts/outline_check.py Normal file
View File

@ -0,0 +1,177 @@
from __future__ import annotations
import argparse
import re
from pathlib import Path
from docx_ops_lib import QueryError, read_json, write_json
ILLEGAL_LEAF_TITLES = {
"技术方案",
"服务方案",
"实施方案",
"服务保障及措施",
"售后服务和质保期服务计划",
"项目理解",
"解决方案",
"系统设计",
"平台建设方案",
"系统建设方案",
"总体方案",
"培训方案",
"运维方案",
}
TECHNICAL_ROOT_TITLES = {
"技术标目录",
"服务方案",
"技术方案",
"实施方案",
"服务保障及措施",
"售后服务和质保期服务计划",
}
GENERIC_TECHNICAL_PATTERNS = (
r"方案$",
r"设计$",
r"系统$",
r"平台$",
r"架构$",
r"建设内容$",
r"总体思路$",
r"总体要求$",
r"总体架构$",
r"功能设计$",
r"集成方案$",
r"响应方案$",
r"实施技术方案$",
r"部署方案$",
r"管理方案$",
r"验收方案$",
r"测试方案$",
r"试运行方案$",
r"保障措施$",
r"服务计划$",
)
SPECIFIC_CHILD_HINTS = (
"原则",
"目标",
"架构",
"模块",
"功能",
"内容",
"配置",
"清单",
"流程",
"机制",
"计划",
"步骤",
"标准",
"参数",
"接口",
"部署",
"测试",
"验收",
"培训",
"应急",
"风险",
"保障",
"响应",
"巡检",
"维护",
"更新",
"子系统",
)
def _normalize_heading(text: str) -> str:
compact = re.sub(r"\s+", "", text or "")
compact = re.sub(r"^[一二三四五六七八九十0-9]+[、\.]\s*", "", compact)
compact = re.sub(r"^\(?[0-9一二三四五六七八九十]+\)?\s*", "", compact)
compact = re.sub(r"^[0-9]+(\.[0-9]+)*\s*", "", compact)
return compact
def _is_technical_context(path: list[str]) -> bool:
return any(_normalize_heading(part) in TECHNICAL_ROOT_TITLES for part in path)
def _looks_generic_technical_heading(text: str) -> bool:
normalized = _normalize_heading(text)
if normalized in ILLEGAL_LEAF_TITLES:
return True
return any(re.search(pattern, normalized) for pattern in GENERIC_TECHNICAL_PATTERNS)
def _has_specific_children(children: list[dict]) -> bool:
child_texts = [_normalize_heading(str(child.get("text", "")).strip()) for child in children if isinstance(child, dict)]
return any(
any(hint in child_text for hint in SPECIFIC_CHILD_HINTS)
and not _looks_generic_technical_heading(child_text)
for child_text in child_texts
)
def _walk_blocks(blocks: list[dict], path: list[str], issues: list[dict]) -> None:
for index, block in enumerate(blocks):
if not isinstance(block, dict):
issues.append({"type": "invalid_block", "path": " > ".join(path + [str(index)]), "message": "block must be an object"})
continue
text = str(block.get("text", "")).strip()
block_type = block.get("type", "heading")
children = block.get("children", [])
current_path = path + ([text] if text else [str(index)])
if block_type == "heading":
if text in ILLEGAL_LEAF_TITLES and not children:
issues.append(
{
"type": "illegal_leaf",
"path": " > ".join(current_path),
"message": f"abstract heading '{text}' cannot be a leaf",
}
)
if children and not isinstance(children, list):
issues.append({"type": "invalid_children", "path": " > ".join(current_path), "message": "children must be a list"})
continue
if isinstance(children, list):
if _is_technical_context(current_path):
normalized = _normalize_heading(text)
direct_heading_children = [child for child in children if isinstance(child, dict) and child.get("type", "heading") == "heading"]
if _looks_generic_technical_heading(normalized) and direct_heading_children and not _has_specific_children(direct_heading_children):
issues.append(
{
"type": "insufficient_technical_breakdown",
"path": " > ".join(current_path),
"message": f"technical heading '{text}' is still too generic; expand to subsystem/module/process level",
}
)
_walk_blocks(children, current_path, issues)
def check_outline(payload: dict) -> dict:
blocks = payload.get("blocks", [])
if not isinstance(blocks, list):
raise QueryError("blocks must be a list")
issues: list[dict] = []
_walk_blocks(blocks, [], issues)
return {
"status": "ok" if not issues else "failed",
"issue_count": len(issues),
"issues": issues,
}
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--outline-file", required=True)
parser.add_argument("--report", required=True)
args = parser.parse_args()
payload = read_json(Path(args.outline_file).resolve())
report = check_outline(payload)
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

21
scripts/outline_export.py Normal file
View File

@ -0,0 +1,21 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import export_outline_artifacts, read_json, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec-file", required=True)
parser.add_argument("--report", required=True)
args = parser.parse_args()
payload = read_json(Path(args.spec_file).resolve())
report = export_outline_artifacts(payload)
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

View File

@ -1,516 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
outline_linter.py
用途
cn-it-bid-writer canonical outline 或技术标目录做目录阶段门禁静态检查
把你在 outline-stage.md 里写的硬规则变成可执行的校验器不通过就退出非 0
特点
- 只依赖 Python 标准库方便放进任意项目仓库
- 尽量兼容不同 JSON 结构支持 title/name/heading/text支持 children/items/sections/nodes
- 输出两种格式text默认/json
- Exit code0 通过2 未通过 ERROR1 运行时错误文件/JSON 读取失败等
基本会拦住这些常见问题
- 技术方案/实施方案/默认非法终点出现在叶子节点
- 技术类叶子节点深度不足默认 <3
- 技术类叶子节点停留在概述/说明等抽象层级
- 抽象标题节点只有 1 个子标题违反下钻不能单一
- 技术方案类根节点缺少原则/架构/模块结构化切面
- 叶子节点缺 workflow_bucket
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional, Set, Tuple
# -------------------------
# 可按需调整的规则配置
# -------------------------
# 来自 references/outline-stage.md 的“默认非法终点”(禁止作为叶子节点)
FORBIDDEN_LEAF_TITLES: List[str] = [
"技术方案",
"服务方案",
"实施方案",
"服务保障及措施",
"售后服务和质保期服务计划",
"项目理解",
"解决方案",
"系统设计",
"平台建设方案",
"系统建设方案",
"总体方案",
"培训方案",
"运维方案",
]
# 技术方案“核心门禁”:需要同时具备的切面(原则/架构/模块)
TECH_SCHEME_CORE_TITLES: Set[str] = {
"技术方案",
"解决方案",
"系统设计",
"平台建设方案",
"系统建设方案",
"总体方案",
}
# 维度识别(用于判断“原则/架构/模块/计划/风控”是否出现)
DIMENSION_KEYWORDS: Dict[str, List[str]] = {
"principles": ["原则", "目标", "策略", "标准", "规范", "总体要求", "建设目标", "设计目标", "设计原则", "总体原则"],
"architecture": [
"架构",
"总体设计",
"总体架构",
"逻辑架构",
"物理架构",
"数据架构",
"应用架构",
"技术架构",
"安全架构",
"部署架构",
"拓扑",
"选型",
"技术路线",
"设计方案",
"总体设计方案",
],
"modules": ["模块", "功能", "子系统", "组件", "内容", "建设内容", "实现", "接口", "数据流", "能力", "清单", "功能清单", "功能模块"],
"process": ["流程", "机制", "管理", "运行机制", "管理机制", "运维流程", "业务流程", "实施流程", "变更", "发布", "运维机制", "交付流程", "实施步骤", "实施方法"],
"plan_acceptance": ["计划", "进度", "里程碑", "阶段", "实施计划", "测试", "验收", "验收标准", "交付", "上线", "试运行", "培训计划", "培训安排", "质量计划"],
"assurance_risk": ["保障", "风控", "风险", "应急", "预案", "安全保障", "质量保障", "服务保障", "组织保障", "保密", "灾备"],
}
# 技术上下文识别(用于决定是否执行“技术叶子深度”等规则)
TECH_CONTEXT_HINTS: List[str] = [
"技术部分",
"技术文件",
"技术标",
"技术响应",
"技术方案",
"系统设计",
"系统建设",
"平台建设",
"集成方案",
"总体架构",
"技术架构",
]
# -------------------------
# JSON 结构兼容(字段名兜底)
# -------------------------
TITLE_KEYS = ("title", "name", "heading", "text", "label")
CHILD_KEYS = ("children", "items", "subsections", "sections", "nodes", "sub", "subs")
BUCKET_KEYS = ("workflow_bucket", "bucket", "wf_bucket")
# -------------------------
# 工具函数
# -------------------------
def normalize_title(raw: Any) -> str:
"""
标题归一化用于匹配非法终点等规则
会做
- 去掉前缀编号'5.1 ' / '一、' / '(一)' / '第1章'
- 去掉末尾括号注释'技术方案(详细)' -> '技术方案'
- 合并多余空格
"""
if raw is None:
return ""
t = str(raw).strip()
# 去掉前缀: (一) 或 (1)
t = re.sub(r"^\s*[(]\s*([一二三四五六七八九十百千万]+|\d+)\s*[)]\s*", "", t)
# 去掉前缀: 5. / 5.1 / 5) / 5、...
t = re.sub(r"^\s*\d+(?:\.\d+)*\s*[、\.\)\]]\s*", "", t)
t = re.sub(r"^\s*\d+(?:\.\d+)*\s+", "", t)
# 去掉前缀: 一、 / 1、
t = re.sub(r"^\s*([一二三四五六七八九十百千万]+|\d+)\s*[、\.]\s*", "", t)
# 去掉前缀: 第X章/节/部分/篇
t = re.sub(r"^\s*第\s*([一二三四五六七八九十百千万]+|\d+)\s*[章节部分篇]\s*", "", t)
t = t.strip()
# 去掉末尾括号注释(最长 30 字,避免误删大段)
t = re.sub(r"[\(][^\)]{0,30}[\)]\s*$", "", t).strip()
# 合并多余空格
t = re.sub(r"\s+", " ", t).strip()
return t
def get_title(node: Any) -> str:
if isinstance(node, dict):
for k in TITLE_KEYS:
v = node.get(k)
if isinstance(v, (str, int, float)):
return str(v)
return ""
def get_children(node: Any) -> List[Any]:
if isinstance(node, dict):
for k in CHILD_KEYS:
v = node.get(k)
if isinstance(v, list):
return v
return []
def get_bucket(node: Any) -> Optional[str]:
if isinstance(node, dict):
for k in BUCKET_KEYS:
v = node.get(k)
if isinstance(v, str):
v = v.strip()
return v or None
return None
def dimensions_from_title(title_clean: str) -> Set[str]:
hits: Set[str] = set()
for dim, kws in DIMENSION_KEYWORDS.items():
for kw in kws:
if kw and kw in title_clean:
hits.add(dim)
break
return hits
def is_tech_context_title(title_clean: str) -> bool:
# 明确的技术章节/根节点提示
if title_clean in TECH_SCHEME_CORE_TITLES:
return True
if any(h in title_clean for h in TECH_CONTEXT_HINTS):
return True
# 兜底:包含“技术”且是章节型表达
if "技术" in title_clean and any(s in title_clean for s in ("部分", "文件", "", "方案", "响应", "章节")):
return True
return False
def is_shallow_technical_leaf(title_clean: str) -> bool:
"""
用于拦截方案概述/总体说明这类典型浅层叶子节点
"""
shallow_exact = {"概述", "方案概述", "项目概述", "总体概述", "总体说明"}
if title_clean in shallow_exact:
return True
if title_clean.endswith(("概述", "介绍", "说明")):
return True
if "概述" in title_clean and len(title_clean) <= 8:
return True
return False
# -------------------------
# 数据结构
# -------------------------
@dataclass
class NodeInfo:
parent: Optional[int]
idx_path: List[int]
path_no: str
breadcrumb: str
depth: int
title_raw: str
title_clean: str
bucket: Optional[str]
is_leaf: bool
child_count: int
tech_context: bool
subtree_dims: Set[str]
@dataclass
class Issue:
severity: str # ERROR / WARN
code: str
message: str
path_no: str
breadcrumb: str
depth: int
bucket: Optional[str] = None
title: Optional[str] = None
# -------------------------
# 核心 lint 逻辑
# -------------------------
def extract_root_nodes(obj: Any) -> List[Any]:
"""
尽量兼容不同 JSON 形态
- 顶层是 list直接当作根节点列表
- 顶层是 dict优先取 outline/nodes/children/items/sections 这类字段
- 其他包装成单节点列表
"""
if isinstance(obj, list):
return obj
if isinstance(obj, dict):
for k in ("outline", "nodes", "children", "items", "sections"):
v = obj.get(k)
if isinstance(v, list):
return v
return [obj]
return [obj]
def lint_outline_obj(obj: Any, *, min_tech_depth: int = 3, strict: bool = False) -> Tuple[List[NodeInfo], List[Issue]]:
"""
返回(nodes, issues)
issues severity=ERROR 的数量 >0 即视为未通过
"""
forbidden_set: Set[str] = {normalize_title(x) for x in FORBIDDEN_LEAF_TITLES}
abstract_set: Set[str] = set(forbidden_set)
tech_scheme_set: Set[str] = {normalize_title(x) for x in TECH_SCHEME_CORE_TITLES}
nodes = extract_root_nodes(obj)
infos: List[NodeInfo] = []
issues: List[Issue] = []
def walk(node: Any, parent_idx: Optional[int], idx_path: List[int], title_path: List[str], parent_tech: bool) -> Set[str]:
if not isinstance(node, dict):
path_no = ".".join(str(i) for i in idx_path)
breadcrumb = " > ".join(title_path + [str(node)])
issues.append(Issue("ERROR", "F001", f"节点不是对象(dict),无法解析: {type(node).__name__}", path_no, breadcrumb, len(idx_path)))
return set()
title_raw = get_title(node) or "<NO_TITLE>"
title_clean = normalize_title(title_raw)
bucket = get_bucket(node)
children = get_children(node)
child_count = len(children)
is_leaf = child_count == 0
tech_context = parent_tech or (bucket == "technical") or is_tech_context_title(title_clean)
path_no = ".".join(str(i) for i in idx_path)
breadcrumb = " > ".join(title_path + [title_raw])
# 先创建 infosubtree_dims 稍后补齐)
info = NodeInfo(
parent=parent_idx,
idx_path=idx_path,
path_no=path_no,
breadcrumb=breadcrumb,
depth=len(idx_path),
title_raw=title_raw,
title_clean=title_clean,
bucket=bucket,
is_leaf=is_leaf,
child_count=child_count,
tech_context=tech_context,
subtree_dims=set(),
)
my_idx = len(infos)
infos.append(info)
# ---------- 叶子节点检查 ----------
if is_leaf:
# B001: workflow_bucket 必须存在
if not bucket:
issues.append(Issue("ERROR", "B001", "叶子节点缺少 workflow_bucket 标注", path_no, breadcrumb, info.depth, bucket, title_clean))
else:
# B002: bucket 值合法性warn
if bucket not in ("business", "technical", "other"):
issues.append(Issue("WARN", "B002", f"workflow_bucket 值不在 business/technical/other 中: {bucket!r}", path_no, breadcrumb, info.depth, bucket, title_clean))
# L001: 禁止抽象标题作为叶子节点
if title_clean in forbidden_set:
issues.append(Issue("ERROR", "L001", f"默认非法终点标题出现在叶子节点: {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# 技术叶子深度门禁(仅当处于技术上下文且 bucket=technical 或父级是技术上下文)
if tech_context and (bucket == "technical" or parent_tech):
# T001: 技术叶子深度不足
if info.depth < min_tech_depth:
issues.append(Issue("ERROR", "T001", f"技术类叶子节点目录深度不足: depth={info.depth} < {min_tech_depth}", path_no, breadcrumb, info.depth, bucket, title_clean))
# T003: 概述类叶子(典型浅层错误)
if is_shallow_technical_leaf(title_clean):
issues.append(Issue("ERROR", "T003", f"技术类叶子节点疑似停留在“概述/说明”等抽象层级,应继续下钻: {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# T004: 严格模式——技术叶子以“方案”结尾也不放行(可按需开启)
if strict and title_clean.endswith("方案") and title_clean not in forbidden_set:
issues.append(Issue("ERROR", "T004", f"严格模式:技术类叶子节点标题以“方案”结尾,建议继续下钻到原则/架构/模块等: {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# ---------- 非叶子节点检查 ----------
else:
# A002: 抽象标题下钻不能只有一个子标题
if title_clean in abstract_set and child_count == 1:
issues.append(Issue("ERROR", "A002", f"抽象标题节点仅有 1 个子标题,违反“下钻不能是单一的”约束: {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# A003: 技术方案核心节点建议 >=3 个子标题(更像“完整切面”)
if title_clean in tech_scheme_set and child_count < 3:
issues.append(Issue("ERROR", "A003", f"技术方案类节点子标题过少(建议>=3),无法形成结构化切面: {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# ---------- 递归:先算子树维度 ----------
subtree_dims = set()
subtree_dims |= dimensions_from_title(title_clean)
for i, ch in enumerate(children, start=1):
subtree_dims |= walk(ch, my_idx, idx_path + [i], title_path + [title_raw], tech_context)
info.subtree_dims = subtree_dims
# A001: 抽象标题被当叶子(放在递归后也可以,但这里再次兜底)
if title_clean in abstract_set and is_leaf:
issues.append(Issue("ERROR", "A001", f"抽象标题节点被当作叶子节点(禁止作为终点): {title_clean}", path_no, breadcrumb, info.depth, bucket, title_clean))
# T002: 技术方案核心节点必须具备:原则/架构/模块
if title_clean in tech_scheme_set:
required = {"principles", "architecture", "modules"}
if strict:
# 严格模式再要求:计划/验收、保障/风控(贴近你 outline-stage.md 里的“结构化切面”)
required |= {"plan_acceptance", "assurance_risk"}
missing = required - subtree_dims
if missing:
issues.append(Issue("ERROR", "T002", f"技术方案类章节缺少关键结构化切面: 缺 {sorted(missing)}", path_no, breadcrumb, info.depth, bucket, title_clean))
return subtree_dims
for idx, node in enumerate(nodes, start=1):
walk(node, None, [idx], [], False)
return infos, issues
# -------------------------
# 报告输出
# -------------------------
def compute_checklist(issues: List[Issue]) -> Dict[str, Any]:
"""
对齐 outline-stage.md 强制自检清单能自动检查的部分
"""
illegal_leaf_tech_or_impl = any(i.code in ("L001", "A001") and i.title in ("技术方案", "实施方案") for i in issues)
tech_depth_bad = any(i.code == "T001" for i in issues)
tech_scheme_missing = any(i.code == "T002" for i in issues)
return {
"no_illegal_leaf_tech_or_impl": not illegal_leaf_tech_or_impl,
"tech_depth_ok": not tech_depth_bad,
"tech_scheme_has_pri_arch_mod": not tech_scheme_missing,
"scoring_covered": None, # 本脚本不检查评分点覆盖(需要 evaluation_model / 映射规则)
}
def render_text_report(*, outline_path: str, strict: bool, issues: List[Issue], checklist: Dict[str, Any]) -> str:
def sort_key(i: Issue):
sev_rank = 0 if i.severity == "ERROR" else 1
parts = []
for p in i.path_no.split("."):
try:
parts.append(int(p))
except Exception:
parts.append(10**9)
return (sev_rank, parts, i.code)
issues_sorted = sorted(issues, key=sort_key)
err_cnt = sum(1 for i in issues if i.severity == "ERROR")
warn_cnt = sum(1 for i in issues if i.severity == "WARN")
lines: List[str] = []
lines.append("outline-linter report")
lines.append(f"- outline: {outline_path}")
lines.append(f"- strict: {strict}")
lines.append("")
lines.append("【目录深度强制自检】")
lines.append(
f"1. 是否存在直接以“技术方案”或“实施方案”作为叶子节点的章节?(要求:否) => "
f"{'' if checklist.get('no_illegal_leaf_tech_or_impl') else '是(未通过)'}"
)
lines.append(
f"2. 技术类章节是否已经下钻到第三级或第四级?(要求:是) => "
f"{'' if checklist.get('tech_depth_ok') else '否(未通过)'}"
)
lines.append(
f"3. 技术方案下,是否同时包含了[原则]、[架构]、[内容/模块](要求:是) => "
f"{'' if checklist.get('tech_scheme_has_pri_arch_mod') else '否(未通过)'}"
)
lines.append("4. 所有的评分点是否都已在目录中体现?(要求:是) => 未检查(需要 evaluation_model / mapping)")
lines.append("")
lines.append(f"Issues: ERROR={err_cnt}, WARN={warn_cnt}")
if issues_sorted:
lines.append("")
lines.append("Details:")
for i in issues_sorted:
lines.append(f"[{i.severity}][{i.code}] {i.path_no} | {i.breadcrumb} | {i.message}")
return "\n".join(lines)
def build_json_report(*, outline_path: str, strict: bool, issues: List[Issue], checklist: Dict[str, Any]) -> Dict[str, Any]:
err_cnt = sum(1 for i in issues if i.severity == "ERROR")
warn_cnt = sum(1 for i in issues if i.severity == "WARN")
return {
"outline": outline_path,
"strict": strict,
"passed": err_cnt == 0,
"error_count": err_cnt,
"warning_count": warn_cnt,
"checklist": checklist,
"issues": [asdict(i) for i in issues],
}
# -------------------------
# CLI
# -------------------------
def main(argv: Optional[List[str]] = None) -> int:
parser = argparse.ArgumentParser(description="Lint a canonical outline or technical outline against outline-stage gates.")
parser.add_argument("--outline", required=True, help="Path to canonical outline JSON or work/final_outline_technical.json")
parser.add_argument("--min-tech-depth", type=int, default=3, help="Minimum depth for technical leaf nodes. Default: 3")
parser.add_argument("--strict", action="store_true", help="Enable stricter checks (more ERRORs).")
parser.add_argument("--format", choices=("text", "json"), default="text", help="Output format. Default: text")
args = parser.parse_args(argv)
try:
with open(args.outline, "r", encoding="utf-8") as f:
obj = json.load(f)
except FileNotFoundError:
print(f"[FATAL] outline not found: {args.outline}", file=sys.stderr)
return 1
except json.JSONDecodeError as e:
print(f"[FATAL] invalid JSON: {args.outline}: {e}", file=sys.stderr)
return 1
_, issues = lint_outline_obj(obj, min_tech_depth=args.min_tech_depth, strict=args.strict)
checklist = compute_checklist(issues)
if args.format == "json":
report = build_json_report(outline_path=args.outline, strict=args.strict, issues=issues, checklist=checklist)
print(json.dumps(report, ensure_ascii=False, indent=2))
else:
print(render_text_report(outline_path=args.outline, strict=args.strict, issues=issues, checklist=checklist))
# exit code: 0 pass, 2 fail, 1 runtime error
has_error = any(i.severity == "ERROR" for i in issues)
return 2 if has_error else 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -1,171 +0,0 @@
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Any
from docx import Document
from docx.document import Document as DocxDocument
from docx.table import Table
from docx.text.paragraph import Paragraph
from common import write_json
CHAPTER_RE = re.compile(r"^第[一二三四五六七八九十百零]+[章节篇部]\s*(.+)?$")
NUMBERED_RE = re.compile(r"^(?P<number>\d+(?:\.\d+){0,8})[.、.\s)]*(?P<title>.+)$")
def normalize_text(value: str) -> str:
return re.sub(r"\s+", " ", value or "").strip()
def table_to_rows(table: Table) -> list[list[str]]:
rows: list[list[str]] = []
for row in table.rows:
values = [normalize_text(cell.text) for cell in row.cells]
if any(values):
rows.append(values)
return rows
def is_heading_style(style_name: str) -> bool:
normalized = normalize_text(style_name).replace(" ", "").lower()
return normalized.startswith("heading") or normalized.startswith("标题")
def parse_heading(text: str, style_name: str) -> dict[str, Any] | None:
source = normalize_text(text)
if not source:
return None
chapter_match = CHAPTER_RE.match(source)
if chapter_match:
return {
"level": 1,
"number": source.split(" ", 1)[0],
"title": source,
"kind": "chapter",
}
if is_heading_style(style_name):
match = NUMBERED_RE.match(source)
if match:
return {
"level": len(match.group("number").split(".")) + 1,
"number": match.group("number"),
"title": normalize_text(match.group("title")),
"kind": "numbered",
}
return {
"level": 2,
"number": "",
"title": source,
"kind": "styled",
}
numbered_match = NUMBERED_RE.match(source)
if numbered_match and len(source) <= 120:
return {
"level": len(numbered_match.group("number").split(".")) + 1,
"number": numbered_match.group("number"),
"title": normalize_text(numbered_match.group("title")),
"kind": "numbered",
}
return None
def iter_blocks(document: DocxDocument) -> list[dict[str, Any]]:
blocks: list[dict[str, Any]] = []
paragraph_index = 0
table_index = 0
for child in document.element.body.iterchildren():
if child.tag.endswith("}p"):
paragraph = Paragraph(child, document)
text = normalize_text(paragraph.text)
if not text:
continue
style_name = paragraph.style.name if paragraph.style else "Normal"
blocks.append(
{
"id": f"p-{paragraph_index}",
"kind": "paragraph",
"text": text,
"style": style_name,
"heading": parse_heading(text, style_name),
}
)
paragraph_index += 1
elif child.tag.endswith("}tbl"):
rows = table_to_rows(Table(child, document))
if not rows:
continue
blocks.append(
{
"id": f"t-{table_index}",
"kind": "table",
"text": "\n".join(" | ".join(row) for row in rows),
"rows": rows,
}
)
table_index += 1
return blocks
def extract_images(document: Document) -> list[dict[str, Any]]:
images: list[dict[str, Any]] = []
image_index = 0
for rel in document.part.rels.values():
target_ref = getattr(rel, "target_ref", "")
if "image" not in target_ref:
continue
images.append(
{
"id": f"img-{image_index}",
"target_ref": target_ref,
}
)
image_index += 1
return images
def build_document_graph(docx_path: Path) -> dict[str, Any]:
document = Document(docx_path)
blocks = iter_blocks(document)
headings = [block["heading"] | {"block_id": block["id"]} for block in blocks if block.get("heading")]
tables = [
{
"id": block["id"],
"rows": block["rows"],
"row_count": len(block["rows"]),
"column_count": max(len(row) for row in block["rows"]) if block["rows"] else 0,
}
for block in blocks
if block["kind"] == "table"
]
return {
"source_docx": str(docx_path),
"blocks": blocks,
"headings": headings,
"tables": tables,
"images": extract_images(document),
"summary": {
"block_count": len(blocks),
"paragraph_count": len([block for block in blocks if block["kind"] == "paragraph"]),
"table_count": len(tables),
"image_count": len(extract_images(document)),
},
}
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--docx", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
graph = build_document_graph(Path(args.docx).resolve())
write_json(Path(args.out).resolve(), graph)
if __name__ == "__main__":
main()

View File

@ -1,206 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
from common import get_bundle_defaults, normalize_bundle, read_json
FONT_NAME = "Microsoft YaHei"
def set_run_font(run, size: int = 12, bold: bool = False) -> None:
run.font.name = FONT_NAME
run._element.rPr.rFonts.set(qn("w:eastAsia"), FONT_NAME)
run.font.size = Pt(size)
run.font.bold = bold
def set_style_font(style, size: int, bold: bool = False) -> None:
style.font.name = FONT_NAME
style._element.rPr.rFonts.set(qn("w:eastAsia"), FONT_NAME)
style.font.size = Pt(size)
style.font.bold = bold
def add_toc(doc: Document) -> None:
paragraph = doc.add_paragraph()
run = paragraph.add_run()
fld_char_begin = OxmlElement("w:fldChar")
fld_char_begin.set(qn("w:fldCharType"), "begin")
instr_text = OxmlElement("w:instrText")
instr_text.set(qn("xml:space"), "preserve")
instr_text.text = 'TOC \\o "1-3" \\h \\z \\u'
fld_char_separate = OxmlElement("w:fldChar")
fld_char_separate.set(qn("w:fldCharType"), "separate")
placeholder = OxmlElement("w:t")
placeholder.text = "打开文档后右键更新目录"
fld_char_end = OxmlElement("w:fldChar")
fld_char_end.set(qn("w:fldCharType"), "end")
run._r.append(fld_char_begin)
run._r.append(instr_text)
run._r.append(fld_char_separate)
run._r.append(placeholder)
run._r.append(fld_char_end)
def add_paragraph(doc: Document, text: str, size: int = 12, bold: bool = False, align=WD_ALIGN_PARAGRAPH.JUSTIFY):
paragraph = doc.add_paragraph()
paragraph.alignment = align
paragraph.paragraph_format.space_after = Pt(6)
paragraph.paragraph_format.line_spacing = 1.5
run = paragraph.add_run(text)
set_run_font(run, size=size, bold=bold)
return paragraph
def add_bullets(doc: Document, items: list[str]) -> None:
for item in items:
paragraph = doc.add_paragraph(style="List Bullet")
set_run_font(paragraph.add_run(item))
def add_table(doc: Document, title: str, headers: list[str], rows: list[list[str]]) -> None:
if title:
add_paragraph(doc, title, bold=True)
table = doc.add_table(rows=1, cols=len(headers))
table.style = "Table Grid"
for index, header in enumerate(headers):
table.rows[0].cells[index].text = header
for row in rows:
cells = table.add_row().cells
for index, value in enumerate(row):
cells[index].text = value
doc.add_paragraph()
def add_image(doc: Document, path: Path, title: str | None = None, width_cm: float = 16.0) -> None:
if title:
add_paragraph(doc, title, bold=True)
doc.add_picture(str(path), width=Cm(width_cm))
def coerce_content_block(value: Any) -> dict[str, Any]:
if isinstance(value, str):
return {"paragraphs": [value]}
if isinstance(value, dict):
return value
return {}
def normalize_section_payload(section: dict[str, Any]) -> dict[str, Any]:
content = coerce_content_block(section.get("content"))
return {
"title": section.get("title", ""),
"paragraphs": list(section.get("paragraphs", content.get("paragraphs", []))),
"bullets": list(section.get("bullets", content.get("bullets", []))),
"tables": list(section.get("tables", content.get("tables", []))),
"images": list(section.get("images", content.get("images", []))),
"children": [normalize_section_payload(child) for child in section.get("children", [])],
}
def normalize_sections(spec: dict[str, Any]) -> list[dict[str, Any]]:
nodes = spec.get("nodes")
if isinstance(nodes, list):
return [normalize_section_payload(node) for node in nodes if isinstance(node, dict)]
sections = spec.get("sections", [])
if not isinstance(sections, list):
return []
if any(isinstance(item, dict) and any(key in item for key in ("outline_id", "workflow_bucket", "status", "content")) for item in sections):
return [normalize_section_payload(section) for section in sections if isinstance(section, dict)]
return [section for section in sections if isinstance(section, dict)]
def add_section(doc: Document, section: dict[str, Any], numbers: tuple[int, ...] = ()) -> None:
prefix = ".".join(str(number) for number in numbers) + " " if numbers else ""
title = section.get("title", "")
if title:
level = min(max(len(numbers), 1), 3)
doc.add_heading(prefix + title, level=level)
for paragraph_text in section.get("paragraphs", []):
add_paragraph(doc, paragraph_text)
bullets = section.get("bullets", [])
if bullets:
add_bullets(doc, bullets)
for table in section.get("tables", []):
add_table(doc, table.get("title", ""), table.get("headers", []), table.get("rows", []))
for image in section.get("images", []):
image_path = Path(image["path"]).resolve()
add_image(doc, image_path, image.get("title"), float(image.get("width_cm", 16)))
for index, child in enumerate(section.get("children", []), start=1):
add_section(doc, child, numbers + (index,))
def build_docx(spec: dict[str, Any], out_path: Path) -> None:
doc = Document()
section = doc.sections[0]
section.top_margin = Cm(2.54)
section.bottom_margin = Cm(2.54)
section.left_margin = Cm(3.0)
section.right_margin = Cm(2.5)
set_style_font(doc.styles["Normal"], 12)
set_style_font(doc.styles["Heading 1"], 16, True)
set_style_font(doc.styles["Heading 2"], 14, True)
set_style_font(doc.styles["Heading 3"], 12, True)
bundle = normalize_bundle(spec.get("bundle"))
bundle_defaults = get_bundle_defaults(bundle) if bundle else None
doc_title = spec.get("doc_title") or (bundle_defaults["bid_doc_title"] if bundle_defaults else "投标文件")
toc_title = spec.get("toc_title") or (bundle_defaults["bid_toc_title"] if bundle_defaults else "目录")
cover = doc.add_paragraph()
cover.alignment = WD_ALIGN_PARAGRAPH.CENTER
cover_specs = [
(spec.get("project_name", "项目名称待补充"), 22, True),
(doc_title, 28, True),
(spec.get("subtitle", ""), 16, True),
("", 12, False),
(f"投标人:{spec.get('bidder_name', '投标人名称待补充')}", 14, False),
]
for text, size, bold in cover_specs:
if not text and size != 12:
continue
run = cover.add_run(text + "\n")
set_run_font(run, size=size, bold=bold)
if size >= 22:
run.font.color.rgb = RGBColor(0x1E, 0x3A, 0x8A)
if spec.get("include_toc", True):
doc.add_page_break()
doc.add_heading(toc_title, level=1)
add_toc(doc)
for index, section_spec in enumerate(normalize_sections(spec), start=1):
doc.add_page_break()
add_section(doc, section_spec, (index,))
out_path.parent.mkdir(parents=True, exist_ok=True)
doc.save(out_path)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--content", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
build_docx(read_json(Path(args.content).resolve()), Path(args.out).resolve())
if __name__ == "__main__":
main()

22
scripts/render_docx.py Normal file
View File

@ -0,0 +1,22 @@
from __future__ import annotations
import argparse
from pathlib import Path
from docx_ops_lib import render_docx, write_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--docx", required=True)
parser.add_argument("--out-dir", required=True)
parser.add_argument("--report")
args = parser.parse_args()
report = render_docx(Path(args.docx).resolve(), Path(args.out_dir).resolve())
if args.report:
write_json(Path(args.report).resolve(), report)
if __name__ == "__main__":
main()

View File

@ -1,50 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from common import read_json, write_text
def render_sections(sections: list[dict[str, Any]]) -> list[str]:
lines: list[str] = []
for section in sections:
heading = section.get("heading")
if heading:
lines.extend([f"## {heading}", ""])
for paragraph in section.get("paragraphs", []):
lines.append(paragraph)
if section.get("paragraphs"):
lines.append("")
bullets = section.get("bullets", [])
if bullets:
lines.extend([f"- {item}" for item in bullets])
lines.append("")
return lines
def build_markdown(spec: dict[str, Any]) -> str:
if "markdown" in spec:
return str(spec["markdown"])
lines = [f"# {spec.get('title', '报告')}", ""]
summary = spec.get("summary")
if summary:
lines.extend([summary, ""])
lines.extend(render_sections(spec.get("sections", [])))
return "\n".join(lines).rstrip() + "\n"
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--spec", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
spec = read_json(Path(args.spec).resolve())
write_text(Path(args.out).resolve(), build_markdown(spec))
if __name__ == "__main__":
main()

View File

@ -1,131 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Pt
from common import get_bundle_defaults, normalize_bundle, read_json
FONT_NAME = "Microsoft YaHei"
TOC_HINT = "打开文档后右键更新目录"
HEADING_FONT_SIZES = {1: 16, 2: 14, 3: 12}
MAX_HEADING_LEVEL = 9
def set_run_font(run, size: int = 12, bold: bool = False) -> None:
run.bold = bold
run.font.size = Pt(size)
run.font.name = FONT_NAME
rpr = run._element.get_or_add_rPr()
rfonts = rpr.rFonts
if rfonts is None:
rfonts = OxmlElement("w:rFonts")
rpr.append(rfonts)
rfonts.set(qn("w:eastAsia"), FONT_NAME)
rfonts.set(qn("w:ascii"), FONT_NAME)
rfonts.set(qn("w:hAnsi"), FONT_NAME)
def configure_style(style, size: int, bold: bool = False) -> None:
style.font.name = FONT_NAME
style.font.size = Pt(size)
style.font.bold = bold
rpr = style._element.get_or_add_rPr()
rfonts = rpr.rFonts
if rfonts is None:
rfonts = OxmlElement("w:rFonts")
rpr.append(rfonts)
rfonts.set(qn("w:eastAsia"), FONT_NAME)
rfonts.set(qn("w:ascii"), FONT_NAME)
rfonts.set(qn("w:hAnsi"), FONT_NAME)
def add_toc(paragraph) -> None:
run = paragraph.add_run()
fld_begin = OxmlElement("w:fldChar")
fld_begin.set(qn("w:fldCharType"), "begin")
instr = OxmlElement("w:instrText")
instr.set(qn("xml:space"), "preserve")
instr.text = 'TOC \\o "1-9" \\h \\z \\u'
fld_sep = OxmlElement("w:fldChar")
fld_sep.set(qn("w:fldCharType"), "separate")
text = OxmlElement("w:t")
text.text = TOC_HINT
fld_end = OxmlElement("w:fldChar")
fld_end.set(qn("w:fldCharType"), "end")
run._r.append(fld_begin)
run._r.append(instr)
run._r.append(fld_sep)
run._r.append(text)
run._r.append(fld_end)
def add_cover(doc: Document, project_name: str, doc_title: str) -> None:
p = doc.add_paragraph()
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
set_run_font(p.add_run(project_name), 18, True)
p = doc.add_paragraph()
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
set_run_font(p.add_run(doc_title), 22, True)
def normalize_outline_item(item: dict[str, Any]) -> dict[str, Any]:
return {
"title": item["title"],
"children": [normalize_outline_item(child) for child in item.get("children", [])],
}
def render_outline_nodes(doc: Document, items: list[dict[str, Any]], prefix: tuple[int, ...] = ()) -> None:
for index, item in enumerate(items, start=1):
numbers = prefix + (index,)
level = min(len(numbers), MAX_HEADING_LEVEL)
font_size = HEADING_FONT_SIZES.get(level, 12)
paragraph = doc.add_paragraph(style=f"Heading {level}")
set_run_font(paragraph.add_run(f"{'.'.join(str(number) for number in numbers)} {item['title']}"), font_size, True)
render_outline_nodes(doc, item.get("children", []), numbers)
def build_docx(outline_spec: dict[str, Any], out_path: Path) -> None:
doc = Document()
configure_style(doc.styles["Normal"], 12)
for level in range(1, MAX_HEADING_LEVEL + 1):
configure_style(doc.styles[f"Heading {level}"], HEADING_FONT_SIZES.get(level, 12), True)
bundle = normalize_bundle(outline_spec.get("bundle"))
bundle_defaults = get_bundle_defaults(bundle) if bundle else None
doc_title = outline_spec.get("doc_title") or (bundle_defaults["outline_doc_title"] if bundle_defaults else "投标文件(目录版)")
toc_title = outline_spec.get("toc_title") or (bundle_defaults["outline_toc_title"] if bundle_defaults else "目录")
add_cover(doc, outline_spec.get("project_name", "项目名称待补充"), doc_title)
doc.add_page_break()
title = doc.add_paragraph()
set_run_font(title.add_run(toc_title), 16, True)
add_toc(doc.add_paragraph())
doc.add_page_break()
sections = [normalize_outline_item(section) for section in outline_spec.get("sections", [])]
render_outline_nodes(doc, sections)
out_path.parent.mkdir(parents=True, exist_ok=True)
doc.save(str(out_path))
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--outline", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
build_docx(read_json(Path(args.outline).resolve()), Path(args.out).resolve())
if __name__ == "__main__":
main()

View File

@ -1,35 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from common import ensure_output_layout, get_bundle_outline_docx_path, get_bundle_outline_path, normalize_bundle
from render_outline_docx import build_docx
from common import read_json
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument("--outline")
parser.add_argument("--out")
parser.add_argument("--bundle")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
output_layout = ensure_output_layout(project_dir)
bundle = normalize_bundle(args.bundle)
outline_path = Path(args.outline).resolve() if args.outline else (
get_bundle_outline_path(output_layout, bundle) if bundle else output_layout["work"] / "final_outline.json"
)
if not outline_path.exists():
raise FileNotFoundError(f"未找到目录事实源: {outline_path}。目录判断应由 AI 完成,然后再调用本脚本渲染。")
out_path = Path(args.out).resolve() if args.out else (
get_bundle_outline_docx_path(output_layout, bundle) if bundle else output_layout["final"] / "投标文件_目录版.docx"
)
build_docx(read_json(outline_path), out_path)
if __name__ == "__main__":
main()

View File

@ -1,80 +0,0 @@
from __future__ import annotations
import argparse
import subprocess
import sys
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
def run(script_name: str, *extra_args: str) -> None:
subprocess.run([sys.executable, str(SCRIPT_DIR / script_name), *extra_args], check=True)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument(
"--tool",
required=True,
help="低层开发辅助操作。真正 workflow 由 SKILL.md 定义,不由本脚本编排。",
)
parser.add_argument("--outline")
parser.add_argument("--content")
parser.add_argument("--input")
parser.add_argument("--out")
parser.add_argument("--bundle")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
tool_aliases = {
"extract": "parse-rfp",
"review-outline": "render-outline",
"compose": "render-bid",
"scan-materials": "scan-project",
}
selected_tool = tool_aliases.get(args.tool, args.tool)
if selected_tool == "parse-rfp":
run("extract_rfp_docx.py", "--project", str(project_dir))
return
if selected_tool == "scan-project":
run("scan_project_materials.py", "--project", str(project_dir))
return
if selected_tool == "render-outline":
extra_args = ["--project", str(project_dir)]
if args.outline:
extra_args.extend(["--outline", args.outline])
if args.out:
extra_args.extend(["--out", args.out])
if args.bundle:
extra_args.extend(["--bundle", args.bundle])
run("review_outline_and_generate_toc.py", *extra_args)
return
if selected_tool == "render-bid":
extra_args = ["--project", str(project_dir)]
if args.content:
extra_args.extend(["--content", args.content])
if args.out:
extra_args.extend(["--out", args.out])
if args.bundle:
extra_args.extend(["--bundle", args.bundle])
run("compose_bid_docx.py", *extra_args)
return
if selected_tool == "write-large-json":
extra_args: list[str] = []
if not args.input:
raise ValueError("write-large-json 模式必须提供 --input。")
if not args.out:
raise ValueError("write-large-json 模式必须提供 --out。")
extra_args.extend(["--input", args.input, "--out", args.out])
run("write_large_json.py", *extra_args)
return
allowed = ["parse-rfp", "scan-project", "render-outline", "render-bid", "write-large-json"]
raise ValueError(f"不支持的 tool: {args.tool}。允许值:{', '.join(allowed)}")
if __name__ == "__main__":
main()

View File

@ -1,58 +0,0 @@
from __future__ import annotations
import argparse
import subprocess
import sys
from pathlib import Path
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument(
"--mode",
required=True,
help="低层开发辅助操作。真正 workflow 由 SKILL.md 定义,不由 CLI 固定。",
)
parser.add_argument("--outline")
parser.add_argument("--content")
parser.add_argument("--input")
parser.add_argument("--out")
parser.add_argument("--bundle")
args = parser.parse_args()
mode_aliases = {
"extract": "parse-rfp",
"review-outline": "render-outline",
"compose": "render-bid",
"scan-materials": "scan-project",
"safe-write-json": "write-large-json",
}
selected_mode = mode_aliases.get(args.mode, args.mode)
valid_modes = {"parse-rfp", "scan-project", "render-outline", "render-bid", "write-large-json"}
if selected_mode not in valid_modes:
raise ValueError(f"不支持的 mode: {args.mode}。允许值:{', '.join(sorted(valid_modes))}")
command = [
sys.executable,
str(Path(__file__).resolve().parent / "run_project_pipeline.py"),
"--project",
args.project,
"--tool",
selected_mode,
]
if args.outline:
command.extend(["--outline", args.outline])
if args.content:
command.extend(["--content", args.content])
if args.input:
command.extend(["--input", args.input])
if args.out:
command.extend(["--out", args.out])
if args.bundle:
command.extend(["--bundle", args.bundle])
subprocess.run(command, check=True)
if __name__ == "__main__":
main()

View File

@ -1,97 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
from common import MATERIAL_CATALOG, ensure_output_layout, iter_material_entries, list_files, safe_filename, write_json, write_text
def match_catalog(file_name: str) -> dict[str, Any] | None:
lowered = file_name.lower()
for item in MATERIAL_CATALOG:
if any(keyword.lower() in lowered for keyword in item["keywords"]):
return item
return None
def serialize_files(project_dir: Path, paths: list[Path], source_root: Path) -> list[dict[str, Any]]:
results: list[dict[str, Any]] = []
for path in paths:
catalog = match_catalog(path.name)
results.append(
{
"name": path.name,
"path": str(path),
"relative_path": str(path.relative_to(project_dir)),
"source_root": source_root.name,
"source_root_path": str(source_root),
"matched_catalog_key": catalog["key"] if catalog else "",
"matched_catalog_label": catalog["label"] if catalog else "",
"safe_name": safe_filename(path.stem),
}
)
return results
def build_inventory(project_dir: Path) -> dict[str, Any]:
scan_roots: list[dict[str, str]] = []
material_files: list[dict[str, Any]] = []
for entry in iter_material_entries(project_dir):
scan_roots.append(
{
"name": entry.name,
"path": str(entry),
"type": "file" if entry.is_file() else "directory",
}
)
files = [entry] if entry.is_file() else list_files(entry)
material_files.extend(serialize_files(project_dir, files, entry))
material_files.sort(key=lambda item: item["relative_path"])
return {
"project_name": project_dir.name,
"project_dir": str(project_dir),
"scan_roots": scan_roots,
"material_files": material_files,
"summary": {
"scan_root_count": len(scan_roots),
"file_count": len(material_files),
"hinted_count": len([item for item in material_files if item["matched_catalog_key"]]),
},
}
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument("--out")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
output_path = Path(args.out).resolve() if args.out else ensure_output_layout(project_dir)["work"] / "material_inventory.json"
inventory = build_inventory(project_dir)
write_json(output_path, inventory)
report_lines = [f"# {inventory['project_name']} 材料盘点", ""]
report_lines.append(f"- 扫描根:{inventory['summary']['scan_root_count']}")
report_lines.append(f"- 发现材料:{inventory['summary']['file_count']}")
report_lines.append(f"- 命中弱提示:{inventory['summary']['hinted_count']}")
report_lines.extend(["", "## 扫描根"])
report_lines.extend(
[f"- {item['name']} ({item['type']})" for item in inventory["scan_roots"]]
or ["- 暂无"]
)
report_lines.extend(["", "## 发现的材料"])
report_lines.extend(
[
f"- {item['relative_path']}"
+ (f" [提示:{item['matched_catalog_label']}]" if item["matched_catalog_label"] else "")
for item in inventory["material_files"]
]
or ["- 暂无"]
)
write_text(output_path.with_suffix(".md"), "\n".join(report_lines))
if __name__ == "__main__":
main()

View File

@ -1,76 +0,0 @@
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Any
from common import read_json, write_json
def compile_patterns(contains: list[str], regexes: list[str]) -> tuple[list[str], list[re.Pattern[str]]]:
compiled = [re.compile(pattern, re.IGNORECASE) for pattern in regexes]
return contains, compiled
def match_block(block: dict[str, Any], contains: list[str], regexes: list[re.Pattern[str]], kinds: set[str], heading_only: bool, block_ids: set[str]) -> bool:
if kinds and block.get("kind") not in kinds:
return False
if block_ids and block.get("id") not in block_ids:
return False
if heading_only and not block.get("heading"):
return False
text = block.get("text", "")
if contains and not all(term.lower() in text.lower() for term in contains):
return False
if regexes and not all(regex.search(text) for regex in regexes):
return False
return True
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--graph", required=True)
parser.add_argument("--contains", action="append", default=[])
parser.add_argument("--regex", action="append", default=[])
parser.add_argument("--kind", action="append", default=[])
parser.add_argument("--heading-only", action="store_true")
parser.add_argument("--block-id", action="append", default=[])
parser.add_argument("--limit", type=int, default=20)
parser.add_argument("--out")
args = parser.parse_args()
graph = read_json(Path(args.graph).resolve())
contains, regexes = compile_patterns(args.contains, args.regex)
kinds = set(args.kind)
block_ids = set(args.block_id)
matches = [
block
for block in graph.get("blocks", [])
if match_block(block, contains, regexes, kinds, args.heading_only, block_ids)
][: args.limit]
result = {
"query": {
"contains": contains,
"regex": args.regex,
"kind": args.kind,
"heading_only": args.heading_only,
"block_ids": args.block_id,
"limit": args.limit,
},
"matches": matches,
"count": len(matches),
}
if args.out:
write_json(Path(args.out).resolve(), result)
else:
print(json.dumps(result, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()

View File

@ -1,17 +0,0 @@
$ErrorActionPreference = "Stop"
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$skillRoot = Split-Path -Parent $scriptDir
$venvPath = Join-Path $skillRoot ".venv"
if (-not (Test-Path $venvPath)) {
python -m venv $venvPath
}
$pythonExe = Join-Path $venvPath "Scripts\python.exe"
& $pythonExe -m pip install --upgrade pip
& $pythonExe -m pip install python-docx lxml pandas Pillow matplotlib PyYAML
Write-Host "Virtual environment is ready:" $venvPath

View File

@ -1,37 +0,0 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
from common import write_json_atomic
def load_json_input(path: Path) -> Any:
if not path.exists():
raise FileNotFoundError(f"未找到输入 JSON 文件: {path}")
try:
return json.loads(path.read_text(encoding="utf-8-sig"))
except json.JSONDecodeError as exc:
raise ValueError(f"输入文件不是合法 JSON: {path}") from exc
def write_large_json(input_path: Path, output_path: Path) -> None:
data = load_json_input(input_path)
write_json_atomic(output_path, data, indent=2, ensure_ascii=False)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--input", required=True, help="UTF-8/UTF-8-SIG JSON 源文件路径")
parser.add_argument("--out", required=True, help="目标 JSON 文件路径")
args = parser.parse_args()
input_path = Path(args.input).resolve()
output_path = Path(args.out).resolve()
write_large_json(input_path, output_path)
if __name__ == "__main__":
main()