254 lines
8.3 KiB
Plaintext
254 lines
8.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ohuujbmsz7",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Autoresearch Experiment Analysis\n",
|
|
"\n",
|
|
"Analysis of autonomous hyperparameter tuning results from `results.tsv`."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "v3r8c77lxhs",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"# Load the TSV (tab-separated, 5 columns: commit, val_bpb, memory_gb, status, description)\n",
|
|
"df = pd.read_csv(\"results.tsv\", sep=\"\\t\")\n",
|
|
"df[\"val_bpb\"] = pd.to_numeric(df[\"val_bpb\"], errors=\"coerce\")\n",
|
|
"df[\"memory_gb\"] = pd.to_numeric(df[\"memory_gb\"], errors=\"coerce\")\n",
|
|
"df[\"status\"] = df[\"status\"].str.strip().str.upper()\n",
|
|
"\n",
|
|
"print(f\"Total experiments: {len(df)}\")\n",
|
|
"print(f\"Columns: {list(df.columns)}\")\n",
|
|
"df.head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0v37bji707o",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"counts = df[\"status\"].value_counts()\n",
|
|
"print(\"Experiment outcomes:\")\n",
|
|
"print(counts.to_string())\n",
|
|
"\n",
|
|
"n_keep = counts.get(\"KEEP\", 0)\n",
|
|
"n_discard = counts.get(\"DISCARD\", 0)\n",
|
|
"n_crash = counts.get(\"CRASH\", 0)\n",
|
|
"n_decided = n_keep + n_discard\n",
|
|
"if n_decided > 0:\n",
|
|
" print(f\"\\nKeep rate: {n_keep}/{n_decided} = {n_keep / n_decided:.1%}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "j887idiuu5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Show all KEPT experiments (the improvements that stuck)\n",
|
|
"kept = df[df[\"status\"] == \"KEEP\"].copy()\n",
|
|
"print(f\"KEPT experiments ({len(kept)} total):\\n\")\n",
|
|
"for i, row in kept.iterrows():\n",
|
|
" bpb = row[\"val_bpb\"]\n",
|
|
" desc = row[\"description\"]\n",
|
|
" print(f\" #{i:3d} bpb={bpb:.6f} mem={row['memory_gb']:.1f}GB {desc}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "99l0xlw0lv",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Val BPB Over Time\n",
|
|
"\n",
|
|
"Track how the best (kept) val_bpb evolves as experiments progress. The running minimum shows the \"frontier\" -- the best result achieved so far."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "79jh74veqg9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fig, ax = plt.subplots(figsize=(16, 8))\n",
|
|
"\n",
|
|
"# Filter out crashes for plotting\n",
|
|
"valid = df[df[\"status\"] != \"CRASH\"].copy()\n",
|
|
"valid = valid.reset_index(drop=True)\n",
|
|
"\n",
|
|
"baseline_bpb = valid.loc[0, \"val_bpb\"]\n",
|
|
"\n",
|
|
"# Only plot points at or below baseline (the interesting region)\n",
|
|
"below = valid[valid[\"val_bpb\"] <= baseline_bpb + 0.0005]\n",
|
|
"\n",
|
|
"# Plot discarded as faint background dots\n",
|
|
"disc = below[below[\"status\"] == \"DISCARD\"]\n",
|
|
"ax.scatter(disc.index, disc[\"val_bpb\"],\n",
|
|
" c=\"#cccccc\", s=12, alpha=0.5, zorder=2, label=\"Discarded\")\n",
|
|
"\n",
|
|
"# Plot kept experiments as prominent green dots\n",
|
|
"kept_v = below[below[\"status\"] == \"KEEP\"]\n",
|
|
"ax.scatter(kept_v.index, kept_v[\"val_bpb\"],\n",
|
|
" c=\"#2ecc71\", s=50, zorder=4, label=\"Kept\", edgecolors=\"black\", linewidths=0.5)\n",
|
|
"\n",
|
|
"# Running minimum step line\n",
|
|
"kept_mask = valid[\"status\"] == \"KEEP\"\n",
|
|
"kept_idx = valid.index[kept_mask]\n",
|
|
"kept_bpb = valid.loc[kept_mask, \"val_bpb\"]\n",
|
|
"running_min = kept_bpb.cummin()\n",
|
|
"ax.step(kept_idx, running_min, where=\"post\", color=\"#27ae60\",\n",
|
|
" linewidth=2, alpha=0.7, zorder=3, label=\"Running best\")\n",
|
|
"\n",
|
|
"# Label each kept experiment with its description\n",
|
|
"for idx, bpb in zip(kept_idx, kept_bpb):\n",
|
|
" desc = str(valid.loc[idx, \"description\"]).strip()\n",
|
|
" if len(desc) > 45:\n",
|
|
" desc = desc[:42] + \"...\"\n",
|
|
"\n",
|
|
" ax.annotate(desc, (idx, bpb),\n",
|
|
" textcoords=\"offset points\",\n",
|
|
" xytext=(6, 6), fontsize=6.0,\n",
|
|
" color=\"#1a7a3a\", alpha=0.9,\n",
|
|
" rotation=30, ha=\"left\", va=\"bottom\")\n",
|
|
"\n",
|
|
"# Reference lines\n",
|
|
"ax.axhline(y=baseline_bpb, color=\"#e74c3c\", linewidth=1,\n",
|
|
" linestyle=\"--\", alpha=0.5, label=f\"Baseline ({baseline_bpb:.4f})\")\n",
|
|
"best = kept_bpb.min()\n",
|
|
"ax.axhline(y=best, color=\"#27ae60\", linewidth=1,\n",
|
|
" linestyle=\"--\", alpha=0.5, label=f\"Best ({best:.4f})\")\n",
|
|
"\n",
|
|
"n_total = len(df)\n",
|
|
"n_kept = len(df[df[\"status\"] == \"KEEP\"])\n",
|
|
"ax.set_xlabel(\"Experiment #\", fontsize=12)\n",
|
|
"ax.set_ylabel(\"Validation BPB (lower is better)\", fontsize=12)\n",
|
|
"ax.set_title(f\"Autoresearch Progress: {n_total} Experiments, {n_kept} Kept Improvements\", fontsize=14)\n",
|
|
"ax.legend(loc=\"upper right\", fontsize=9)\n",
|
|
"ax.grid(True, alpha=0.2)\n",
|
|
"\n",
|
|
"# Y-axis: from just below best to just above baseline\n",
|
|
"margin = (baseline_bpb - best) * 0.15\n",
|
|
"ax.set_ylim(best - margin, baseline_bpb + margin)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.savefig(\"progress.png\", dpi=150, bbox_inches=\"tight\")\n",
|
|
"plt.show()\n",
|
|
"print(\"Saved to progress.png\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ce48phivyou",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Summary Statistics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "re1f8za8oj9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Summary stats\n",
|
|
"kept = df[df[\"status\"] == \"KEEP\"].copy()\n",
|
|
"baseline_bpb = df.iloc[0][\"val_bpb\"]\n",
|
|
"best_bpb = kept[\"val_bpb\"].min()\n",
|
|
"best_row = kept.loc[kept[\"val_bpb\"].idxmin()]\n",
|
|
"\n",
|
|
"print(f\"Baseline val_bpb: {baseline_bpb:.6f}\")\n",
|
|
"print(f\"Best val_bpb: {best_bpb:.6f}\")\n",
|
|
"print(f\"Total improvement: {baseline_bpb - best_bpb:.6f} ({(baseline_bpb - best_bpb) / baseline_bpb * 100:.2f}%)\")\n",
|
|
"print(f\"Best experiment: {best_row['description']}\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# How many experiments to find each improvement\n",
|
|
"print(\"Cumulative effort per improvement:\")\n",
|
|
"kept_sorted = kept.reset_index()\n",
|
|
"for i, (_, row) in enumerate(kept_sorted.iterrows()):\n",
|
|
" desc = str(row[\"description\"]).strip()\n",
|
|
" print(f\" Experiment #{row['index']:3d}: bpb={row['val_bpb']:.6f} {desc}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "oxri9h5c9gs",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Top Hits (Kept Experiments by Improvement)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "q86hxu10djk",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Each kept experiment's delta is measured vs the previous kept experiment's bpb\n",
|
|
"# (since experiments are cumulative -- each one builds on the last kept state)\n",
|
|
"kept = df[df[\"status\"] == \"KEEP\"].copy()\n",
|
|
"kept[\"prev_bpb\"] = kept[\"val_bpb\"].shift(1)\n",
|
|
"kept[\"delta\"] = kept[\"prev_bpb\"] - kept[\"val_bpb\"]\n",
|
|
"\n",
|
|
"# Drop baseline (no delta)\n",
|
|
"hits = kept.iloc[1:].copy()\n",
|
|
"\n",
|
|
"# Sort by delta improvement (biggest first)\n",
|
|
"hits = hits.sort_values(\"delta\", ascending=False)\n",
|
|
"\n",
|
|
"print(f\"{'Rank':>4} {'Delta':>8} {'BPB':>10} Description\")\n",
|
|
"print(\"-\" * 80)\n",
|
|
"for rank, (_, row) in enumerate(hits.iterrows(), 1):\n",
|
|
" print(f\"{rank:4d} {row['delta']:+.6f} {row['val_bpb']:.6f} {row['description']}\")\n",
|
|
"\n",
|
|
"print(f\"\\n{'':>4} {hits['delta'].sum():+.6f} {'':>10} TOTAL improvement over baseline\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f9bffe89",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|