feat: add execution, scoring, and decision modules
This commit is contained in:
parent
3172ce264b
commit
97ae78f121
76
engine/decision_engine.py
Normal file
76
engine/decision_engine.py
Normal file
@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from engine.models import ConstraintSpec, DecisionResult, ObjectiveSpec, RunResult, ScoreResult
|
||||
|
||||
|
||||
def _constraint_failed(candidate: ScoreResult, constraint: ConstraintSpec) -> bool:
|
||||
value = candidate.metrics.get(constraint.metric)
|
||||
if value is None:
|
||||
return True
|
||||
|
||||
if constraint.op == "<=":
|
||||
return value > constraint.value
|
||||
if constraint.op == ">=":
|
||||
return value < constraint.value
|
||||
if constraint.op == "==":
|
||||
return value != constraint.value
|
||||
return True
|
||||
|
||||
|
||||
def decide_candidate(
|
||||
baseline: float | None,
|
||||
candidate: ScoreResult,
|
||||
objective: ObjectiveSpec,
|
||||
constraints: list[ConstraintSpec],
|
||||
tie_breakers: list[dict[str, str]],
|
||||
run_result: RunResult,
|
||||
) -> DecisionResult:
|
||||
if run_result.exit_code != 0:
|
||||
return DecisionResult(
|
||||
status="crash",
|
||||
reason=f"command failed with exit code {run_result.exit_code}",
|
||||
baseline_score=baseline,
|
||||
candidate_score=candidate.primary_score,
|
||||
)
|
||||
|
||||
failed_constraints = [
|
||||
constraint.metric
|
||||
for constraint in constraints
|
||||
if _constraint_failed(candidate, constraint)
|
||||
]
|
||||
if failed_constraints:
|
||||
return DecisionResult(
|
||||
status="discard",
|
||||
reason=f"constraint failure: {', '.join(failed_constraints)}",
|
||||
baseline_score=baseline,
|
||||
candidate_score=candidate.primary_score,
|
||||
constraint_failures=failed_constraints,
|
||||
)
|
||||
|
||||
if baseline is None:
|
||||
return DecisionResult(
|
||||
status="keep",
|
||||
reason="no baseline available",
|
||||
baseline_score=None,
|
||||
candidate_score=candidate.primary_score,
|
||||
)
|
||||
|
||||
if objective.direction == "maximize":
|
||||
better = candidate.primary_score > baseline
|
||||
else:
|
||||
better = candidate.primary_score < baseline
|
||||
|
||||
if better:
|
||||
return DecisionResult(
|
||||
status="keep",
|
||||
reason="candidate improved primary score",
|
||||
baseline_score=baseline,
|
||||
candidate_score=candidate.primary_score,
|
||||
)
|
||||
|
||||
return DecisionResult(
|
||||
status="discard",
|
||||
reason="candidate did not improve primary score",
|
||||
baseline_score=baseline,
|
||||
candidate_score=candidate.primary_score,
|
||||
)
|
||||
30
engine/runner.py
Normal file
30
engine/runner.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from engine.models import RunResult
|
||||
|
||||
|
||||
def run_command(command: str, cwd: Path, timeout_seconds: int) -> RunResult:
|
||||
start = time.perf_counter()
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
cwd=str(cwd),
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
timeout=timeout_seconds,
|
||||
check=False,
|
||||
)
|
||||
runtime_seconds = time.perf_counter() - start
|
||||
return RunResult(
|
||||
command=command,
|
||||
cwd=cwd,
|
||||
exit_code=completed.returncode,
|
||||
runtime_seconds=runtime_seconds,
|
||||
stdout=completed.stdout,
|
||||
stderr=completed.stderr,
|
||||
)
|
||||
21
engine/scorer.py
Normal file
21
engine/scorer.py
Normal file
@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from engine.models import ScoreResult
|
||||
|
||||
|
||||
def parse_score_output(output: str, score_field: str, metrics_field: str) -> ScoreResult:
|
||||
parsed = json.loads(output)
|
||||
if not isinstance(parsed, dict):
|
||||
raise ValueError("score output must be a JSON object")
|
||||
|
||||
metrics = parsed.get(metrics_field, {})
|
||||
if not isinstance(metrics, dict):
|
||||
raise ValueError(f"{metrics_field} must be a JSON object")
|
||||
|
||||
return ScoreResult(
|
||||
primary_score=float(parsed[score_field]),
|
||||
metrics=metrics,
|
||||
raw_output=parsed,
|
||||
)
|
||||
52
tests/test_execution_pipeline.py
Normal file
52
tests/test_execution_pipeline.py
Normal file
@ -0,0 +1,52 @@
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from engine.decision_engine import decide_candidate
|
||||
from engine.models import ConstraintSpec, ObjectiveSpec, RunResult, ScoreResult
|
||||
from engine.runner import run_command
|
||||
from engine.scorer import parse_score_output
|
||||
|
||||
|
||||
class ExecutionPipelineTest(unittest.TestCase):
|
||||
def test_run_command_captures_stdout(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
result = run_command("python -c \"print('ok')\"", Path(tmp), timeout_seconds=5)
|
||||
self.assertEqual(result.exit_code, 0)
|
||||
self.assertIn("ok", result.stdout)
|
||||
|
||||
def test_parse_score_output_reads_primary_score(self) -> None:
|
||||
score = parse_score_output(
|
||||
'{"score": 4.5, "metrics": {"violation_count": 0}}',
|
||||
score_field="score",
|
||||
metrics_field="metrics",
|
||||
)
|
||||
self.assertEqual(score.primary_score, 4.5)
|
||||
self.assertEqual(score.metrics["violation_count"], 0)
|
||||
|
||||
def test_decide_candidate_rejects_constraint_failures(self) -> None:
|
||||
decision = decide_candidate(
|
||||
baseline=3.0,
|
||||
candidate=ScoreResult(
|
||||
primary_score=5.0,
|
||||
metrics={"violation_count": 1},
|
||||
raw_output={"score": 5.0, "metrics": {"violation_count": 1}},
|
||||
),
|
||||
objective=ObjectiveSpec(primary_metric="score", direction="maximize"),
|
||||
constraints=[ConstraintSpec(metric="violation_count", op="<=", value=0)],
|
||||
tie_breakers=[],
|
||||
run_result=RunResult(
|
||||
command="python -c \"print('ok')\"",
|
||||
cwd=Path("."),
|
||||
exit_code=0,
|
||||
runtime_seconds=0.1,
|
||||
stdout="ok\n",
|
||||
stderr="",
|
||||
),
|
||||
)
|
||||
self.assertEqual(decision.status, "discard")
|
||||
self.assertIn("violation_count", decision.reason)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue
Block a user