feat(chart): benchmark scores with production disclaimer
This commit is contained in:
51
src/charts/benchmark_disclaimer.py
Normal file
51
src/charts/benchmark_disclaimer.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Benchmark Scores with Production Disclaimer (Optional/Secondary)"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
from src.data.agent_adoption import benchmark_scores_with_disclaimer
|
||||
from src.utils.styling import get_theme, EXPORT_DPI, BUBBLE_ZONE, WARNING_ZONE, GRAY_LIGHT
|
||||
|
||||
def plot_benchmark_disclaimer() -> str:
|
||||
plt.rcParams.update(get_theme())
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
|
||||
models = [d["model"] for d in benchmark_scores_with_disclaimer]
|
||||
scores = [d["swe_bench_verified_percent"] for d in benchmark_scores_with_disclaimer]
|
||||
|
||||
colors = [WARNING_ZONE if s < 90 else BUBBLE_ZONE for s in scores]
|
||||
bars = ax.barh(models, scores, color=colors, edgecolor="white", height=0.5)
|
||||
|
||||
for bar, val in zip(bars, scores):
|
||||
ax.text(val + 1, bar.get_y() + bar.get_height()/2, f"{val}%",
|
||||
va="center", fontsize=12, fontweight="bold")
|
||||
|
||||
ax.set_xlabel("SWE-bench Verified Score (%)", fontsize=11)
|
||||
ax.set_title("SWE-bench Scores — Lab Benchmark Only", fontsize=14, fontweight="bold")
|
||||
ax.set_xlim(0, 100)
|
||||
ax.grid(True, alpha=0.3, axis="x")
|
||||
|
||||
# LARGE DISCLAIMER — must be very prominent
|
||||
fig.text(0.5, 0.12,
|
||||
"⚠️ LAB BENCHMARK ONLY ⚠️\n"
|
||||
"Does NOT measure production capability, debugging, architecture,\n"
|
||||
"or code quality. Real-world performance may differ significantly.\n"
|
||||
"See chart 12_developer_ai_reality.png for real-world data.",
|
||||
ha="center", fontsize=12, fontweight="bold", color=BUBBLE_ZONE,
|
||||
bbox=dict(boxstyle="round,pad=0.8", facecolor=GRAY_LIGHT,
|
||||
edgecolor=BUBBLE_ZONE, linewidth=3))
|
||||
|
||||
fig.savefig("output/charts/12b_benchmarks_with_disclaimer.png", dpi=EXPORT_DPI,
|
||||
facecolor=fig.get_facecolor(), edgecolor="none")
|
||||
plt.close(fig)
|
||||
return "output/charts/12b_benchmarks_with_disclaimer.png"
|
||||
|
||||
def main():
|
||||
path = plot_benchmark_disclaimer()
|
||||
print(f"Chart saved: {path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user