Fix crash rollback and exit semantics
This commit is contained in:
parent
141f6e138b
commit
1632aefa85
@ -49,6 +49,33 @@ def _emit_record(repo_root: Path, task_id: str, results_file: str, status: str,
|
||||
return 0
|
||||
|
||||
|
||||
def _finalize_outcome(
|
||||
*,
|
||||
repo_root: Path,
|
||||
task_id: str,
|
||||
results_file: str,
|
||||
artifact_manager: ArtifactManager,
|
||||
snapshot,
|
||||
status: str,
|
||||
reason: str,
|
||||
candidate_score: float | None,
|
||||
) -> int:
|
||||
diff_summary = artifact_manager.diff_summary(snapshot)
|
||||
if status in {"discard", "crash"}:
|
||||
artifact_manager.restore(snapshot)
|
||||
exit_code = 1 if status == "crash" else 0
|
||||
_emit_record(
|
||||
repo_root=repo_root,
|
||||
task_id=task_id,
|
||||
results_file=results_file,
|
||||
status=status,
|
||||
reason=reason,
|
||||
candidate_score=candidate_score,
|
||||
diff_summary=diff_summary,
|
||||
)
|
||||
return exit_code
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
repo_root = ROOT_DIR.resolve()
|
||||
@ -64,14 +91,15 @@ def main() -> int:
|
||||
task.runner.timeout_seconds,
|
||||
)
|
||||
if run_result.exit_code != 0:
|
||||
return _emit_record(
|
||||
return _finalize_outcome(
|
||||
repo_root=repo_root,
|
||||
task_id=task.id,
|
||||
results_file=task.logging.results_file,
|
||||
artifact_manager=artifact_manager,
|
||||
snapshot=snapshot,
|
||||
status="crash",
|
||||
reason=f"command failed with exit code {run_result.exit_code}",
|
||||
candidate_score=None,
|
||||
diff_summary=artifact_manager.diff_summary(snapshot),
|
||||
)
|
||||
|
||||
scorer_result = run_command(
|
||||
@ -80,14 +108,15 @@ def main() -> int:
|
||||
task.runner.timeout_seconds,
|
||||
)
|
||||
if scorer_result.exit_code != 0:
|
||||
return _emit_record(
|
||||
return _finalize_outcome(
|
||||
repo_root=repo_root,
|
||||
task_id=task.id,
|
||||
results_file=task.logging.results_file,
|
||||
artifact_manager=artifact_manager,
|
||||
snapshot=snapshot,
|
||||
status="crash",
|
||||
reason=f"scorer failed with exit code {scorer_result.exit_code}",
|
||||
candidate_score=None,
|
||||
diff_summary=artifact_manager.diff_summary(snapshot),
|
||||
)
|
||||
|
||||
try:
|
||||
@ -97,14 +126,15 @@ def main() -> int:
|
||||
metrics_field=task.scorer.parse.metrics_field,
|
||||
)
|
||||
except (KeyError, TypeError, ValueError) as exc:
|
||||
return _emit_record(
|
||||
return _finalize_outcome(
|
||||
repo_root=repo_root,
|
||||
task_id=task.id,
|
||||
results_file=task.logging.results_file,
|
||||
artifact_manager=artifact_manager,
|
||||
snapshot=snapshot,
|
||||
status="crash",
|
||||
reason=f"score parse failed: {exc}",
|
||||
candidate_score=None,
|
||||
diff_summary=artifact_manager.diff_summary(snapshot),
|
||||
)
|
||||
|
||||
decision = decide_candidate(
|
||||
@ -116,14 +146,15 @@ def main() -> int:
|
||||
run_result=run_result,
|
||||
)
|
||||
|
||||
return _emit_record(
|
||||
return _finalize_outcome(
|
||||
repo_root=repo_root,
|
||||
task_id=task.id,
|
||||
results_file=task.logging.results_file,
|
||||
artifact_manager=artifact_manager,
|
||||
snapshot=snapshot,
|
||||
status=decision.status,
|
||||
reason=decision.reason,
|
||||
candidate_score=decision.candidate_score,
|
||||
diff_summary=artifact_manager.diff_summary(snapshot),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -137,7 +137,85 @@ class RunTaskCliTest(unittest.TestCase):
|
||||
self.assertEqual(record["task_id"], "skill-quality")
|
||||
self.assertEqual(record["status"], "keep")
|
||||
|
||||
def test_run_task_cli_records_scorer_failure(self) -> None:
|
||||
def test_run_task_cli_restores_artifacts_after_crash(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
temp_root = Path(tmp)
|
||||
self._copy_repo_layout(temp_root)
|
||||
|
||||
task_dir = temp_root / "tasks" / "runner-crash-restores"
|
||||
fixture_dir = task_dir / "fixtures"
|
||||
fixture_dir.mkdir(parents=True)
|
||||
artifact_path = fixture_dir / "SKILL.md"
|
||||
artifact_path.write_text("# Original\n", encoding="utf-8")
|
||||
(task_dir / "task.yaml").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"id: runner-crash-restores",
|
||||
"description: Runner crash restore fixture.",
|
||||
"artifacts:",
|
||||
" include:",
|
||||
" - fixtures/SKILL.md",
|
||||
" exclude: []",
|
||||
" max_files_per_iteration: 1",
|
||||
"mutation:",
|
||||
" mode: direct_edit",
|
||||
" allowed_file_types:",
|
||||
" - .md",
|
||||
" max_changed_lines: 20",
|
||||
"runner:",
|
||||
" command: python -c \"from pathlib import Path; Path('fixtures/SKILL.md').write_text('# Mutated\\n', encoding='utf-8'); raise SystemExit(9)\"",
|
||||
" cwd: tasks/runner-crash-restores",
|
||||
" timeout_seconds: 30",
|
||||
"scorer:",
|
||||
" type: command",
|
||||
" command: python -c \"print('unused scorer')\"",
|
||||
" parse:",
|
||||
" format: json",
|
||||
" score_field: score",
|
||||
" metrics_field: metrics",
|
||||
"objective:",
|
||||
" primary_metric: score",
|
||||
" direction: maximize",
|
||||
"constraints: []",
|
||||
"policy:",
|
||||
" keep_if: better_primary",
|
||||
" tie_breakers: []",
|
||||
" on_failure: discard",
|
||||
"budget:",
|
||||
" max_iterations: 1",
|
||||
" max_failures: 1",
|
||||
"logging:",
|
||||
" results_file: work/results.jsonl",
|
||||
" candidate_dir: work/candidates",
|
||||
"",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
completed = subprocess.run(
|
||||
["uv", "run", "python", "scripts/run_task.py", "--task", "tasks/runner-crash-restores/task.yaml"],
|
||||
cwd=str(temp_root),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
check=False,
|
||||
)
|
||||
|
||||
self.assertNotEqual(completed.returncode, 0, msg=completed.stderr)
|
||||
self.assertEqual(artifact_path.read_text(encoding="utf-8"), "# Original\n")
|
||||
results_path = temp_root / "work" / "results.jsonl"
|
||||
self.assertTrue(results_path.exists())
|
||||
lines = results_path.read_text(encoding="utf-8").splitlines()
|
||||
self.assertEqual(len(lines), 1)
|
||||
record = json.loads(lines[0])
|
||||
self.assertEqual(record["task_id"], "runner-crash-restores")
|
||||
self.assertEqual(record["status"], "crash")
|
||||
self.assertEqual(record["reason"], "command failed with exit code 9")
|
||||
self.assertIn("# Original", record["diff_summary"])
|
||||
self.assertIn("# Mutated", record["diff_summary"])
|
||||
|
||||
def test_run_task_cli_returns_nonzero_on_crash_and_writes_record(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
temp_root = Path(tmp)
|
||||
self._copy_repo_layout(temp_root)
|
||||
@ -201,7 +279,7 @@ class RunTaskCliTest(unittest.TestCase):
|
||||
check=False,
|
||||
)
|
||||
|
||||
self.assertEqual(completed.returncode, 0, msg=completed.stderr)
|
||||
self.assertNotEqual(completed.returncode, 0, msg=completed.stderr)
|
||||
results_path = temp_root / "work" / "results.jsonl"
|
||||
self.assertTrue(results_path.exists())
|
||||
lines = results_path.read_text(encoding="utf-8").splitlines()
|
||||
@ -276,7 +354,7 @@ class RunTaskCliTest(unittest.TestCase):
|
||||
check=False,
|
||||
)
|
||||
|
||||
self.assertEqual(completed.returncode, 0, msg=completed.stderr)
|
||||
self.assertNotEqual(completed.returncode, 0, msg=completed.stderr)
|
||||
results_path = temp_root / "work" / "results.jsonl"
|
||||
self.assertTrue(results_path.exists())
|
||||
lines = results_path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user