140 lines
4.7 KiB
Python
140 lines
4.7 KiB
Python
from __future__ import annotations
|
|
|
|
from engine.models import ConstraintSpec, DecisionResult, ObjectiveSpec, RunResult, ScoreResult
|
|
|
|
|
|
def _constraint_failed(candidate: ScoreResult, constraint: ConstraintSpec) -> bool:
|
|
value = candidate.metrics.get(constraint.metric)
|
|
if value is None:
|
|
return True
|
|
|
|
if constraint.op == "<=":
|
|
return value > constraint.value
|
|
if constraint.op == ">=":
|
|
return value < constraint.value
|
|
if constraint.op == "==":
|
|
return value != constraint.value
|
|
raise ValueError(f"unsupported constraint operator: {constraint.op}")
|
|
|
|
|
|
def _baseline_primary_score(baseline: float | ScoreResult | None) -> float | None:
|
|
if baseline is None:
|
|
return None
|
|
if isinstance(baseline, ScoreResult):
|
|
return baseline.primary_score
|
|
return baseline
|
|
|
|
|
|
def _apply_tie_breakers(
|
|
baseline: ScoreResult,
|
|
candidate: ScoreResult,
|
|
tie_breakers: list[dict[str, str]],
|
|
) -> tuple[str, str] | None:
|
|
for tie_breaker in tie_breakers:
|
|
if "lower" in tie_breaker:
|
|
metric = tie_breaker["lower"]
|
|
baseline_value = baseline.metrics.get(metric)
|
|
candidate_value = candidate.metrics.get(metric)
|
|
if baseline_value is None or candidate_value is None:
|
|
continue
|
|
if candidate_value < baseline_value:
|
|
return ("keep", f"candidate won tie-breaker: lower {metric}")
|
|
if candidate_value > baseline_value:
|
|
return ("discard", f"candidate lost tie-breaker: lower {metric}")
|
|
continue
|
|
|
|
if "higher" in tie_breaker:
|
|
metric = tie_breaker["higher"]
|
|
baseline_value = baseline.metrics.get(metric)
|
|
candidate_value = candidate.metrics.get(metric)
|
|
if baseline_value is None or candidate_value is None:
|
|
continue
|
|
if candidate_value > baseline_value:
|
|
return ("keep", f"candidate won tie-breaker: higher {metric}")
|
|
if candidate_value < baseline_value:
|
|
return ("discard", f"candidate lost tie-breaker: higher {metric}")
|
|
|
|
return None
|
|
|
|
|
|
def decide_candidate(
|
|
baseline: float | ScoreResult | None,
|
|
candidate: ScoreResult,
|
|
objective: ObjectiveSpec,
|
|
constraints: list[ConstraintSpec],
|
|
tie_breakers: list[dict[str, str]],
|
|
run_result: RunResult,
|
|
) -> DecisionResult:
|
|
baseline_score = _baseline_primary_score(baseline)
|
|
|
|
if run_result.exit_code != 0:
|
|
return DecisionResult(
|
|
status="crash",
|
|
reason=f"command failed with exit code {run_result.exit_code}",
|
|
baseline_score=baseline_score,
|
|
candidate_score=None,
|
|
)
|
|
|
|
failed_constraints = [
|
|
constraint.metric
|
|
for constraint in constraints
|
|
if _constraint_failed(candidate, constraint)
|
|
]
|
|
if failed_constraints:
|
|
return DecisionResult(
|
|
status="discard",
|
|
reason=f"constraint failure: {', '.join(failed_constraints)}",
|
|
baseline_score=baseline_score,
|
|
candidate_score=candidate.primary_score,
|
|
constraint_failures=failed_constraints,
|
|
)
|
|
|
|
if baseline_score is None:
|
|
return DecisionResult(
|
|
status="keep",
|
|
reason="no baseline available",
|
|
baseline_score=None,
|
|
candidate_score=candidate.primary_score,
|
|
)
|
|
|
|
if objective.direction == "maximize":
|
|
better = candidate.primary_score > baseline_score
|
|
worse = candidate.primary_score < baseline_score
|
|
else:
|
|
better = candidate.primary_score < baseline_score
|
|
worse = candidate.primary_score > baseline_score
|
|
|
|
if better:
|
|
return DecisionResult(
|
|
status="keep",
|
|
reason="candidate improved primary score",
|
|
baseline_score=baseline_score,
|
|
candidate_score=candidate.primary_score,
|
|
)
|
|
|
|
if worse:
|
|
return DecisionResult(
|
|
status="discard",
|
|
reason="candidate did not improve primary score",
|
|
baseline_score=baseline_score,
|
|
candidate_score=candidate.primary_score,
|
|
)
|
|
|
|
if isinstance(baseline, ScoreResult):
|
|
tie_breaker_result = _apply_tie_breakers(baseline, candidate, tie_breakers)
|
|
if tie_breaker_result is not None:
|
|
status, reason = tie_breaker_result
|
|
return DecisionResult(
|
|
status=status,
|
|
reason=reason,
|
|
baseline_score=baseline_score,
|
|
candidate_score=candidate.primary_score,
|
|
)
|
|
|
|
return DecisionResult(
|
|
status="discard",
|
|
reason="candidate tied primary score and did not improve tie-breakers",
|
|
baseline_score=baseline_score,
|
|
candidate_score=candidate.primary_score,
|
|
)
|