feat: add execution, scoring, and decision modules

This commit is contained in:
sladro 2026-04-02 11:55:55 +08:00
parent 3172ce264b
commit 97ae78f121
4 changed files with 179 additions and 0 deletions

76
engine/decision_engine.py Normal file
View File

@ -0,0 +1,76 @@
from __future__ import annotations
from engine.models import ConstraintSpec, DecisionResult, ObjectiveSpec, RunResult, ScoreResult
def _constraint_failed(candidate: ScoreResult, constraint: ConstraintSpec) -> bool:
value = candidate.metrics.get(constraint.metric)
if value is None:
return True
if constraint.op == "<=":
return value > constraint.value
if constraint.op == ">=":
return value < constraint.value
if constraint.op == "==":
return value != constraint.value
return True
def decide_candidate(
baseline: float | None,
candidate: ScoreResult,
objective: ObjectiveSpec,
constraints: list[ConstraintSpec],
tie_breakers: list[dict[str, str]],
run_result: RunResult,
) -> DecisionResult:
if run_result.exit_code != 0:
return DecisionResult(
status="crash",
reason=f"command failed with exit code {run_result.exit_code}",
baseline_score=baseline,
candidate_score=candidate.primary_score,
)
failed_constraints = [
constraint.metric
for constraint in constraints
if _constraint_failed(candidate, constraint)
]
if failed_constraints:
return DecisionResult(
status="discard",
reason=f"constraint failure: {', '.join(failed_constraints)}",
baseline_score=baseline,
candidate_score=candidate.primary_score,
constraint_failures=failed_constraints,
)
if baseline is None:
return DecisionResult(
status="keep",
reason="no baseline available",
baseline_score=None,
candidate_score=candidate.primary_score,
)
if objective.direction == "maximize":
better = candidate.primary_score > baseline
else:
better = candidate.primary_score < baseline
if better:
return DecisionResult(
status="keep",
reason="candidate improved primary score",
baseline_score=baseline,
candidate_score=candidate.primary_score,
)
return DecisionResult(
status="discard",
reason="candidate did not improve primary score",
baseline_score=baseline,
candidate_score=candidate.primary_score,
)

30
engine/runner.py Normal file
View File

@ -0,0 +1,30 @@
from __future__ import annotations
import subprocess
import time
from pathlib import Path
from engine.models import RunResult
def run_command(command: str, cwd: Path, timeout_seconds: int) -> RunResult:
start = time.perf_counter()
completed = subprocess.run(
command,
cwd=str(cwd),
shell=True,
capture_output=True,
text=True,
encoding="utf-8",
timeout=timeout_seconds,
check=False,
)
runtime_seconds = time.perf_counter() - start
return RunResult(
command=command,
cwd=cwd,
exit_code=completed.returncode,
runtime_seconds=runtime_seconds,
stdout=completed.stdout,
stderr=completed.stderr,
)

21
engine/scorer.py Normal file
View File

@ -0,0 +1,21 @@
from __future__ import annotations
import json
from engine.models import ScoreResult
def parse_score_output(output: str, score_field: str, metrics_field: str) -> ScoreResult:
parsed = json.loads(output)
if not isinstance(parsed, dict):
raise ValueError("score output must be a JSON object")
metrics = parsed.get(metrics_field, {})
if not isinstance(metrics, dict):
raise ValueError(f"{metrics_field} must be a JSON object")
return ScoreResult(
primary_score=float(parsed[score_field]),
metrics=metrics,
raw_output=parsed,
)

View File

@ -0,0 +1,52 @@
from pathlib import Path
import tempfile
import unittest
from engine.decision_engine import decide_candidate
from engine.models import ConstraintSpec, ObjectiveSpec, RunResult, ScoreResult
from engine.runner import run_command
from engine.scorer import parse_score_output
class ExecutionPipelineTest(unittest.TestCase):
def test_run_command_captures_stdout(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
result = run_command("python -c \"print('ok')\"", Path(tmp), timeout_seconds=5)
self.assertEqual(result.exit_code, 0)
self.assertIn("ok", result.stdout)
def test_parse_score_output_reads_primary_score(self) -> None:
score = parse_score_output(
'{"score": 4.5, "metrics": {"violation_count": 0}}',
score_field="score",
metrics_field="metrics",
)
self.assertEqual(score.primary_score, 4.5)
self.assertEqual(score.metrics["violation_count"], 0)
def test_decide_candidate_rejects_constraint_failures(self) -> None:
decision = decide_candidate(
baseline=3.0,
candidate=ScoreResult(
primary_score=5.0,
metrics={"violation_count": 1},
raw_output={"score": 5.0, "metrics": {"violation_count": 1}},
),
objective=ObjectiveSpec(primary_metric="score", direction="maximize"),
constraints=[ConstraintSpec(metric="violation_count", op="<=", value=0)],
tie_breakers=[],
run_result=RunResult(
command="python -c \"print('ok')\"",
cwd=Path("."),
exit_code=0,
runtime_seconds=0.1,
stdout="ok\n",
stderr="",
),
)
self.assertEqual(decision.status, "discard")
self.assertIn("violation_count", decision.reason)
if __name__ == "__main__":
unittest.main()