Skill-BidCreater/scripts/extract_rfp_docx.py
2026-03-09 22:20:38 +08:00

69 lines
2.0 KiB
Python

from __future__ import annotations
import argparse
from pathlib import Path
from common import ensure_output_layout, find_rfp_docx, write_text
from parse_docx import build_document_graph
from scan_project_materials import build_inventory
LEGACY_WORK_FILES = [
"evidence_graph.json",
"missing_materials.json",
"outline_candidates.json",
"outline_final.json",
"outline_review.md",
"outline_review_report.json",
"outline_spec.json",
"outline_spec.reviewed.json",
"project_profile.json",
"rfp_outline.md",
"source_tables.json",
"stage_gates.json",
]
def cleanup_legacy_work_files(work_dir: Path) -> None:
for file_name in LEGACY_WORK_FILES:
path = work_dir / file_name
if path.exists():
path.unlink()
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--project", required=True)
parser.add_argument("--out")
args = parser.parse_args()
project_dir = Path(args.project).resolve()
output_layout = ensure_output_layout(project_dir)
work_dir = Path(args.out).resolve() if args.out else output_layout["work"]
work_dir.mkdir(parents=True, exist_ok=True)
cleanup_legacy_work_files(work_dir)
docx_path = find_rfp_docx(project_dir)
document_graph = build_document_graph(docx_path)
inventory = build_inventory(project_dir)
from common import write_json
write_json(work_dir / "document_graph.json", document_graph)
write_json(work_dir / "material_inventory.json", inventory)
summary = [
f"# {project_dir.name} 基础解析结果",
"",
"- 已完成 DOCX 结构化解析。",
f"- 原文结构:{work_dir / 'document_graph.json'}",
f"- 通用材料盘点:{work_dir / 'material_inventory.json'}",
"",
"说明:本脚本只负责基础解析与落盘,不负责评分点、目录或正文判断。",
]
write_text(output_layout["reports"] / "parse_summary.md", "\n".join(summary))
if __name__ == "__main__":
main()