163 lines
4.8 KiB
Python
163 lines
4.8 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
ROOT_DIR = Path(__file__).resolve().parents[1]
|
|
if str(ROOT_DIR) not in sys.path:
|
|
sys.path.insert(0, str(ROOT_DIR))
|
|
|
|
from engine.artifact_manager import ArtifactManager
|
|
from engine.decision_engine import decide_candidate
|
|
from engine.runner import run_command
|
|
from engine.scorer import parse_score_output
|
|
from engine.task_loader import load_task
|
|
|
|
|
|
def _resolve_repo_path(repo_root: Path, raw_path: str) -> Path:
|
|
path = Path(raw_path)
|
|
if path.is_absolute():
|
|
return path.resolve()
|
|
return (repo_root / path).resolve()
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--task", required=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def _append_record(repo_root: Path, results_file: str, record: dict[str, object]) -> None:
|
|
results_path = _resolve_repo_path(repo_root, results_file)
|
|
results_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with results_path.open("a", encoding="utf-8", newline="") as handle:
|
|
handle.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
|
|
|
|
def _emit_record(repo_root: Path, task_id: str, results_file: str, status: str, reason: str, candidate_score: float | None, diff_summary: str) -> int:
|
|
record = {
|
|
"task_id": task_id,
|
|
"status": status,
|
|
"reason": reason,
|
|
"candidate_score": candidate_score,
|
|
"diff_summary": diff_summary,
|
|
}
|
|
_append_record(repo_root, results_file, record)
|
|
print(json.dumps(record, ensure_ascii=False))
|
|
return 0
|
|
|
|
|
|
def _finalize_outcome(
|
|
*,
|
|
repo_root: Path,
|
|
task_id: str,
|
|
results_file: str,
|
|
artifact_manager: ArtifactManager,
|
|
snapshot,
|
|
status: str,
|
|
reason: str,
|
|
candidate_score: float | None,
|
|
) -> int:
|
|
diff_summary = artifact_manager.diff_summary(snapshot)
|
|
if status in {"discard", "crash"}:
|
|
artifact_manager.restore(snapshot)
|
|
exit_code = 1 if status == "crash" else 0
|
|
_emit_record(
|
|
repo_root=repo_root,
|
|
task_id=task_id,
|
|
results_file=results_file,
|
|
status=status,
|
|
reason=reason,
|
|
candidate_score=candidate_score,
|
|
diff_summary=diff_summary,
|
|
)
|
|
return exit_code
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
repo_root = ROOT_DIR.resolve()
|
|
task_path = _resolve_repo_path(repo_root, args.task)
|
|
task = load_task(task_path)
|
|
|
|
artifact_manager = ArtifactManager(task)
|
|
snapshot = artifact_manager.snapshot()
|
|
|
|
run_result = run_command(
|
|
task.runner.command,
|
|
_resolve_repo_path(repo_root, task.runner.cwd),
|
|
task.runner.timeout_seconds,
|
|
)
|
|
if run_result.exit_code != 0:
|
|
return _finalize_outcome(
|
|
repo_root=repo_root,
|
|
task_id=task.id,
|
|
results_file=task.logging.results_file,
|
|
artifact_manager=artifact_manager,
|
|
snapshot=snapshot,
|
|
status="crash",
|
|
reason=f"command failed with exit code {run_result.exit_code}",
|
|
candidate_score=None,
|
|
)
|
|
|
|
scorer_result = run_command(
|
|
task.scorer.command,
|
|
repo_root,
|
|
task.runner.timeout_seconds,
|
|
)
|
|
if scorer_result.exit_code != 0:
|
|
return _finalize_outcome(
|
|
repo_root=repo_root,
|
|
task_id=task.id,
|
|
results_file=task.logging.results_file,
|
|
artifact_manager=artifact_manager,
|
|
snapshot=snapshot,
|
|
status="crash",
|
|
reason=f"scorer failed with exit code {scorer_result.exit_code}",
|
|
candidate_score=None,
|
|
)
|
|
|
|
try:
|
|
score_result = parse_score_output(
|
|
scorer_result.stdout,
|
|
score_field=task.scorer.parse.score_field,
|
|
metrics_field=task.scorer.parse.metrics_field,
|
|
)
|
|
except (KeyError, TypeError, ValueError) as exc:
|
|
return _finalize_outcome(
|
|
repo_root=repo_root,
|
|
task_id=task.id,
|
|
results_file=task.logging.results_file,
|
|
artifact_manager=artifact_manager,
|
|
snapshot=snapshot,
|
|
status="crash",
|
|
reason=f"score parse failed: {exc}",
|
|
candidate_score=None,
|
|
)
|
|
|
|
decision = decide_candidate(
|
|
baseline=None,
|
|
candidate=score_result,
|
|
objective=task.objective,
|
|
constraints=task.constraints,
|
|
tie_breakers=task.policy.tie_breakers,
|
|
run_result=run_result,
|
|
)
|
|
|
|
return _finalize_outcome(
|
|
repo_root=repo_root,
|
|
task_id=task.id,
|
|
results_file=task.logging.results_file,
|
|
artifact_manager=artifact_manager,
|
|
snapshot=snapshot,
|
|
status=decision.status,
|
|
reason=decision.reason,
|
|
candidate_score=decision.candidate_score,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|