CommonAutoRearsh/tests/test_execution_pipeline.py

108 lines
4.1 KiB
Python

import json
from pathlib import Path
import shutil
import subprocess
import tempfile
import unittest
from engine.decision_engine import decide_candidate
from engine.models import ConstraintSpec, ObjectiveSpec, RunResult, ScoreResult
from engine.runner import run_command
from engine.scorer import parse_score_output
class ExecutionPipelineTest(unittest.TestCase):
def test_run_command_captures_stdout(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
result = run_command("python -c \"print('ok')\"", Path(tmp), timeout_seconds=5)
self.assertEqual(result.exit_code, 0)
self.assertIn("ok", result.stdout)
def test_run_command_returns_result_on_timeout(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
result = run_command(
"python -c \"import time; time.sleep(2)\"",
Path(tmp),
timeout_seconds=1,
)
self.assertNotEqual(result.exit_code, 0)
self.assertIn("timed out", result.stderr.lower())
def test_parse_score_output_reads_primary_score(self) -> None:
score = parse_score_output(
'{"score": 4.5, "metrics": {"violation_count": 0}}',
score_field="score",
metrics_field="metrics",
)
self.assertEqual(score.primary_score, 4.5)
self.assertEqual(score.metrics["violation_count"], 0)
def test_decide_candidate_rejects_constraint_failures(self) -> None:
decision = decide_candidate(
baseline=3.0,
candidate=ScoreResult(
primary_score=5.0,
metrics={"violation_count": 1},
raw_output={"score": 5.0, "metrics": {"violation_count": 1}},
),
objective=ObjectiveSpec(primary_metric="score", direction="maximize"),
constraints=[ConstraintSpec(metric="violation_count", op="<=", value=0)],
tie_breakers=[],
run_result=RunResult(
command="python -c \"print('ok')\"",
cwd=Path("."),
exit_code=0,
runtime_seconds=0.1,
stdout="ok\n",
stderr="",
),
)
self.assertEqual(decision.status, "discard")
self.assertIn("violation_count", decision.reason)
class RunTaskCliTest(unittest.TestCase):
def test_run_task_cli_writes_results_jsonl(self) -> None:
source_root = Path(__file__).resolve().parents[1]
with tempfile.TemporaryDirectory() as tmp:
temp_root = Path(tmp)
shutil.copytree(
source_root / "engine",
temp_root / "engine",
ignore=shutil.ignore_patterns("__pycache__"),
)
for relative_dir in ("scripts", "tasks"):
source_dir = source_root / relative_dir
if source_dir.exists():
shutil.copytree(
source_dir,
temp_root / relative_dir,
ignore=shutil.ignore_patterns("__pycache__"),
)
completed = subprocess.run(
["uv", "run", "python", "scripts/run_task.py", "--task", "tasks/skill-quality/task.yaml"],
cwd=str(temp_root),
capture_output=True,
text=True,
encoding="utf-8",
check=False,
)
self.assertEqual(completed.returncode, 0, msg=completed.stderr)
results_path = temp_root / "work" / "results.jsonl"
self.assertTrue(results_path.exists())
lines = results_path.read_text(encoding="utf-8").splitlines()
self.assertEqual(len(lines), 1)
record = json.loads(lines[0])
self.assertEqual(record["task_id"], "skill-quality")
self.assertEqual(record["status"], "keep")
self.assertEqual(record["reason"], "no baseline available")
self.assertEqual(record["candidate_score"], 4.0)
self.assertEqual(record["diff_summary"], "")
if __name__ == "__main__":
unittest.main()