from __future__ import annotations import argparse import json import sys from pathlib import Path ROOT_DIR = Path(__file__).resolve().parents[1] if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from engine.artifact_manager import ArtifactManager from engine.decision_engine import decide_candidate from engine.runner import run_command from engine.scorer import parse_score_output from engine.task_loader import load_task def _resolve_repo_path(repo_root: Path, raw_path: str) -> Path: path = Path(raw_path) if path.is_absolute(): return path.resolve() return (repo_root / path).resolve() def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--task", required=True) return parser.parse_args() def _append_record(repo_root: Path, results_file: str, record: dict[str, object]) -> None: results_path = _resolve_repo_path(repo_root, results_file) results_path.parent.mkdir(parents=True, exist_ok=True) with results_path.open("a", encoding="utf-8", newline="") as handle: handle.write(json.dumps(record, ensure_ascii=False) + "\n") def _emit_record(repo_root: Path, task_id: str, results_file: str, status: str, reason: str, candidate_score: float | None, diff_summary: str) -> int: record = { "task_id": task_id, "status": status, "reason": reason, "candidate_score": candidate_score, "diff_summary": diff_summary, } _append_record(repo_root, results_file, record) print(json.dumps(record, ensure_ascii=False)) return 0 def main() -> int: args = parse_args() repo_root = ROOT_DIR.resolve() task_path = _resolve_repo_path(repo_root, args.task) task = load_task(task_path) artifact_manager = ArtifactManager(task) snapshot = artifact_manager.snapshot() diff_summary = artifact_manager.diff_summary(snapshot) run_result = run_command( task.runner.command, _resolve_repo_path(repo_root, task.runner.cwd), task.runner.timeout_seconds, ) if run_result.exit_code != 0: return _emit_record( repo_root=repo_root, task_id=task.id, results_file=task.logging.results_file, status="crash", reason=f"command failed with exit code {run_result.exit_code}", candidate_score=None, diff_summary=diff_summary, ) scorer_result = run_command( task.scorer.command, repo_root, task.runner.timeout_seconds, ) if scorer_result.exit_code != 0: return _emit_record( repo_root=repo_root, task_id=task.id, results_file=task.logging.results_file, status="crash", reason=f"scorer failed with exit code {scorer_result.exit_code}", candidate_score=None, diff_summary=diff_summary, ) try: score_result = parse_score_output( scorer_result.stdout, score_field=task.scorer.parse.score_field, metrics_field=task.scorer.parse.metrics_field, ) except (KeyError, TypeError, ValueError) as exc: return _emit_record( repo_root=repo_root, task_id=task.id, results_file=task.logging.results_file, status="crash", reason=f"score parse failed: {exc}", candidate_score=None, diff_summary=diff_summary, ) decision = decide_candidate( baseline=None, candidate=score_result, objective=task.objective, constraints=task.constraints, tie_breakers=task.policy.tie_breakers, run_result=run_result, ) return _emit_record( repo_root=repo_root, task_id=task.id, results_file=task.logging.results_file, status=decision.status, reason=decision.reason, candidate_score=decision.candidate_score, diff_summary=diff_summary, ) if __name__ == "__main__": raise SystemExit(main())