75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
CHECKS = {
|
|
"title_line": lambda text: text.lstrip().startswith("# "),
|
|
"when_to_use_section": lambda text: bool(re.search(r"(?m)^## When to Use\s*$", text)),
|
|
"steps_section": lambda text: bool(re.search(r"(?m)^## Steps\s*$", text)),
|
|
"numbered_step": lambda text: bool(re.search(r"(?m)^1\. ", text)),
|
|
}
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--task-dir", required=True)
|
|
parser.add_argument("--artifact", required=True)
|
|
parser.add_argument("--output", required=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def load_rubric_keys(rubric_text: str) -> list[str]:
|
|
keys: list[str] = []
|
|
for line in rubric_text.splitlines():
|
|
if not line.startswith("- "):
|
|
continue
|
|
key = line[2:].split(":", 1)[0].strip()
|
|
if key:
|
|
keys.append(key)
|
|
return keys
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
task_dir = Path(args.task_dir).resolve()
|
|
artifact_path = (task_dir / args.artifact).resolve()
|
|
output_path = Path(args.output).resolve()
|
|
|
|
prompt_text = (task_dir / "prompt.md").read_text(encoding="utf-8")
|
|
rubric_text = (task_dir / "rubric.md").read_text(encoding="utf-8")
|
|
artifact_text = artifact_path.read_text(encoding="utf-8")
|
|
|
|
checks: dict[str, bool] = {}
|
|
for key in load_rubric_keys(rubric_text):
|
|
evaluator = CHECKS.get(key)
|
|
if evaluator is None:
|
|
raise ValueError(f"unsupported rubric check: {key}")
|
|
checks[key] = evaluator(artifact_text)
|
|
|
|
passed_checks = sum(1 for passed in checks.values() if passed)
|
|
total_checks = len(checks)
|
|
result = {
|
|
"score": float(passed_checks),
|
|
"metrics": {
|
|
"passed_checks": passed_checks,
|
|
"total_checks": total_checks,
|
|
"violation_count": total_checks - passed_checks,
|
|
},
|
|
"details": {
|
|
"prompt": prompt_text.strip(),
|
|
"checks": checks,
|
|
},
|
|
}
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|