from __future__ import annotations import argparse import json import re from pathlib import Path CHECKS = { "title_line": lambda text: text.lstrip().startswith("# "), "when_to_use_section": lambda text: bool(re.search(r"(?m)^## When to Use\s*$", text)), "steps_section": lambda text: bool(re.search(r"(?m)^## Steps\s*$", text)), "numbered_step": lambda text: bool(re.search(r"(?m)^1\. ", text)), } def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--task-dir", required=True) parser.add_argument("--artifact", required=True) parser.add_argument("--output", required=True) return parser.parse_args() def load_rubric_keys(rubric_text: str) -> list[str]: keys: list[str] = [] for line in rubric_text.splitlines(): if not line.startswith("- "): continue key = line[2:].split(":", 1)[0].strip() if key: keys.append(key) return keys def main() -> int: args = parse_args() task_dir = Path(args.task_dir).resolve() artifact_path = (task_dir / args.artifact).resolve() output_path = Path(args.output).resolve() prompt_text = (task_dir / "prompt.md").read_text(encoding="utf-8") rubric_text = (task_dir / "rubric.md").read_text(encoding="utf-8") artifact_text = artifact_path.read_text(encoding="utf-8") checks: dict[str, bool] = {} for key in load_rubric_keys(rubric_text): evaluator = CHECKS.get(key) if evaluator is None: raise ValueError(f"unsupported rubric check: {key}") checks[key] = evaluator(artifact_text) passed_checks = sum(1 for passed in checks.values() if passed) total_checks = len(checks) result = { "score": float(passed_checks), "metrics": { "passed_checks": passed_checks, "total_checks": total_checks, "violation_count": total_checks - passed_checks, }, "details": { "prompt": prompt_text.strip(), "checks": checks, }, } output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") return 0 if __name__ == "__main__": raise SystemExit(main())