From 97ae78f1218b5e5816004253aafd532b66197cee Mon Sep 17 00:00:00 2001 From: sladro Date: Thu, 2 Apr 2026 11:55:55 +0800 Subject: [PATCH] feat: add execution, scoring, and decision modules --- engine/decision_engine.py | 76 ++++++++++++++++++++++++++++++++ engine/runner.py | 30 +++++++++++++ engine/scorer.py | 21 +++++++++ tests/test_execution_pipeline.py | 52 ++++++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 engine/decision_engine.py create mode 100644 engine/runner.py create mode 100644 engine/scorer.py create mode 100644 tests/test_execution_pipeline.py diff --git a/engine/decision_engine.py b/engine/decision_engine.py new file mode 100644 index 0000000..c396ea0 --- /dev/null +++ b/engine/decision_engine.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from engine.models import ConstraintSpec, DecisionResult, ObjectiveSpec, RunResult, ScoreResult + + +def _constraint_failed(candidate: ScoreResult, constraint: ConstraintSpec) -> bool: + value = candidate.metrics.get(constraint.metric) + if value is None: + return True + + if constraint.op == "<=": + return value > constraint.value + if constraint.op == ">=": + return value < constraint.value + if constraint.op == "==": + return value != constraint.value + return True + + +def decide_candidate( + baseline: float | None, + candidate: ScoreResult, + objective: ObjectiveSpec, + constraints: list[ConstraintSpec], + tie_breakers: list[dict[str, str]], + run_result: RunResult, +) -> DecisionResult: + if run_result.exit_code != 0: + return DecisionResult( + status="crash", + reason=f"command failed with exit code {run_result.exit_code}", + baseline_score=baseline, + candidate_score=candidate.primary_score, + ) + + failed_constraints = [ + constraint.metric + for constraint in constraints + if _constraint_failed(candidate, constraint) + ] + if failed_constraints: + return DecisionResult( + status="discard", + reason=f"constraint failure: {', '.join(failed_constraints)}", + baseline_score=baseline, + candidate_score=candidate.primary_score, + constraint_failures=failed_constraints, + ) + + if baseline is None: + return DecisionResult( + status="keep", + reason="no baseline available", + baseline_score=None, + candidate_score=candidate.primary_score, + ) + + if objective.direction == "maximize": + better = candidate.primary_score > baseline + else: + better = candidate.primary_score < baseline + + if better: + return DecisionResult( + status="keep", + reason="candidate improved primary score", + baseline_score=baseline, + candidate_score=candidate.primary_score, + ) + + return DecisionResult( + status="discard", + reason="candidate did not improve primary score", + baseline_score=baseline, + candidate_score=candidate.primary_score, + ) diff --git a/engine/runner.py b/engine/runner.py new file mode 100644 index 0000000..9f21061 --- /dev/null +++ b/engine/runner.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import subprocess +import time +from pathlib import Path + +from engine.models import RunResult + + +def run_command(command: str, cwd: Path, timeout_seconds: int) -> RunResult: + start = time.perf_counter() + completed = subprocess.run( + command, + cwd=str(cwd), + shell=True, + capture_output=True, + text=True, + encoding="utf-8", + timeout=timeout_seconds, + check=False, + ) + runtime_seconds = time.perf_counter() - start + return RunResult( + command=command, + cwd=cwd, + exit_code=completed.returncode, + runtime_seconds=runtime_seconds, + stdout=completed.stdout, + stderr=completed.stderr, + ) diff --git a/engine/scorer.py b/engine/scorer.py new file mode 100644 index 0000000..c3a1bf4 --- /dev/null +++ b/engine/scorer.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import json + +from engine.models import ScoreResult + + +def parse_score_output(output: str, score_field: str, metrics_field: str) -> ScoreResult: + parsed = json.loads(output) + if not isinstance(parsed, dict): + raise ValueError("score output must be a JSON object") + + metrics = parsed.get(metrics_field, {}) + if not isinstance(metrics, dict): + raise ValueError(f"{metrics_field} must be a JSON object") + + return ScoreResult( + primary_score=float(parsed[score_field]), + metrics=metrics, + raw_output=parsed, + ) diff --git a/tests/test_execution_pipeline.py b/tests/test_execution_pipeline.py new file mode 100644 index 0000000..5201fa4 --- /dev/null +++ b/tests/test_execution_pipeline.py @@ -0,0 +1,52 @@ +from pathlib import Path +import tempfile +import unittest + +from engine.decision_engine import decide_candidate +from engine.models import ConstraintSpec, ObjectiveSpec, RunResult, ScoreResult +from engine.runner import run_command +from engine.scorer import parse_score_output + + +class ExecutionPipelineTest(unittest.TestCase): + def test_run_command_captures_stdout(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + result = run_command("python -c \"print('ok')\"", Path(tmp), timeout_seconds=5) + self.assertEqual(result.exit_code, 0) + self.assertIn("ok", result.stdout) + + def test_parse_score_output_reads_primary_score(self) -> None: + score = parse_score_output( + '{"score": 4.5, "metrics": {"violation_count": 0}}', + score_field="score", + metrics_field="metrics", + ) + self.assertEqual(score.primary_score, 4.5) + self.assertEqual(score.metrics["violation_count"], 0) + + def test_decide_candidate_rejects_constraint_failures(self) -> None: + decision = decide_candidate( + baseline=3.0, + candidate=ScoreResult( + primary_score=5.0, + metrics={"violation_count": 1}, + raw_output={"score": 5.0, "metrics": {"violation_count": 1}}, + ), + objective=ObjectiveSpec(primary_metric="score", direction="maximize"), + constraints=[ConstraintSpec(metric="violation_count", op="<=", value=0)], + tie_breakers=[], + run_result=RunResult( + command="python -c \"print('ok')\"", + cwd=Path("."), + exit_code=0, + runtime_seconds=0.1, + stdout="ok\n", + stderr="", + ), + ) + self.assertEqual(decision.status, "discard") + self.assertIn("violation_count", decision.reason) + + +if __name__ == "__main__": + unittest.main()