diff --git a/src/charts/benchmark_disclaimer.py b/src/charts/benchmark_disclaimer.py new file mode 100644 index 0000000..4b57c4e --- /dev/null +++ b/src/charts/benchmark_disclaimer.py @@ -0,0 +1,51 @@ +"""Benchmark Scores with Production Disclaimer (Optional/Secondary)""" +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from src.data.agent_adoption import benchmark_scores_with_disclaimer +from src.utils.styling import get_theme, EXPORT_DPI, BUBBLE_ZONE, WARNING_ZONE, GRAY_LIGHT + +def plot_benchmark_disclaimer() -> str: + plt.rcParams.update(get_theme()) + fig, ax = plt.subplots(figsize=(10, 5)) + + models = [d["model"] for d in benchmark_scores_with_disclaimer] + scores = [d["swe_bench_verified_percent"] for d in benchmark_scores_with_disclaimer] + + colors = [WARNING_ZONE if s < 90 else BUBBLE_ZONE for s in scores] + bars = ax.barh(models, scores, color=colors, edgecolor="white", height=0.5) + + for bar, val in zip(bars, scores): + ax.text(val + 1, bar.get_y() + bar.get_height()/2, f"{val}%", + va="center", fontsize=12, fontweight="bold") + + ax.set_xlabel("SWE-bench Verified Score (%)", fontsize=11) + ax.set_title("SWE-bench Scores — Lab Benchmark Only", fontsize=14, fontweight="bold") + ax.set_xlim(0, 100) + ax.grid(True, alpha=0.3, axis="x") + + # LARGE DISCLAIMER — must be very prominent + fig.text(0.5, 0.12, + "⚠️ LAB BENCHMARK ONLY ⚠️\n" + "Does NOT measure production capability, debugging, architecture,\n" + "or code quality. Real-world performance may differ significantly.\n" + "See chart 12_developer_ai_reality.png for real-world data.", + ha="center", fontsize=12, fontweight="bold", color=BUBBLE_ZONE, + bbox=dict(boxstyle="round,pad=0.8", facecolor=GRAY_LIGHT, + edgecolor=BUBBLE_ZONE, linewidth=3)) + + fig.savefig("output/charts/12b_benchmarks_with_disclaimer.png", dpi=EXPORT_DPI, + facecolor=fig.get_facecolor(), edgecolor="none") + plt.close(fig) + return "output/charts/12b_benchmarks_with_disclaimer.png" + +def main(): + path = plot_benchmark_disclaimer() + print(f"Chart saved: {path}") + +if __name__ == "__main__": + main()