41 lines
945 B
YAML
41 lines
945 B
YAML
id: skill-quality
|
|
description: Deterministic sample task for scoring a skill document.
|
|
artifacts:
|
|
include:
|
|
- fixtures/SKILL.md
|
|
exclude: []
|
|
max_files_per_iteration: 1
|
|
mutation:
|
|
mode: direct_edit
|
|
allowed_file_types:
|
|
- .md
|
|
max_changed_lines: 20
|
|
runner:
|
|
command: python ../../scripts/evaluate_skill_task.py --task-dir . --artifact fixtures/SKILL.md --output ../../work/skill-run.json
|
|
cwd: tasks/skill-quality
|
|
timeout_seconds: 30
|
|
scorer:
|
|
type: command
|
|
command: python scripts/score_skill_task.py --input work/skill-run.json
|
|
parse:
|
|
format: json
|
|
score_field: score
|
|
metrics_field: metrics
|
|
objective:
|
|
primary_metric: score
|
|
direction: maximize
|
|
constraints:
|
|
- metric: violation_count
|
|
op: <=
|
|
value: 0
|
|
policy:
|
|
keep_if: better_primary
|
|
tie_breakers: []
|
|
on_failure: discard
|
|
budget:
|
|
max_iterations: 1
|
|
max_failures: 1
|
|
logging:
|
|
results_file: work/results.jsonl
|
|
candidate_dir: work/candidates
|