feat(data): add agent adoption surveys and real-world developer AI data

2026-06-04 16:58:41 -05:00
parent 579d7af709
commit 3e81f901fe
1 changed files with 391 additions and 0 deletions
--- a/src/data/agent_adoption.py
+++ b/src/data/agent_adoption.py
@@ -0,0 +1,391 @@
+"""Agent Adoption Surveys and Real-World Developer AI Data
+
+Source: LangChain, McKinsey, PwC surveys; GitHub, JetBrains, DX DevCycle;
+        academic studies; Omdia, BCC Research, MarketsandMarkets, Grand View Research.
+Retrieved: June 2026
+
+IMPORTANT: This module prioritizes REAL-WORLD data over lab benchmarks.
+Benchmark scores are included only with heavy disclaimers.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+# ---------------------------------------------------------------------------
+# Module metadata
+# ---------------------------------------------------------------------------
+MODULE_NAME: str = "agent_adoption"
+MODULE_VERSION: str = "1.0.0"
+DATA_RETRIEVED: str = "June 2026"
+MODULE_DISCLAIMER: str = (
+    "This module prioritizes REAL-WORLD data over lab benchmarks. "
+    "Benchmark scores are included only with heavy disclaimers."
+)
+
+
+# ---------------------------------------------------------------------------
+# Dataset J: Agent Adoption Surveys
+# ---------------------------------------------------------------------------
+
+agent_survey_data: dict[str, dict[str, Any]] = {
+    # Source: LangChain State of Agent Engineering (Nov-Dec 2025)
+    # 1,340 respondents surveyed on agent engineering practices.
+    "langchain_2025": {
+        "production": 57.3,  # % deploying agents in production
+        "observability_implemented": 89,  # % with observability in place
+        "full_tracing_prod": 71.5,  # % with full tracing in production
+        "multi_model_deployments": 75,  # % using multi-model deployments
+        "barrier_quality_percent": 32,  # % citing quality as top barrier
+        "barrier_security_enterprise_percent": 24.9,  # % citing security for enterprise
+        "barrier_latency_percent": 20,  # % citing latency as barrier
+        "sample_size": 1340,
+        "date": "2025-11 to 2025-12",
+        "source": "LangChain State of Agent Engineering",
+    },
+    # Source: McKinsey State of AI 2025 (Nov 2025)
+    # 1,993 executives surveyed on AI adoption and scaling.
+    "mckinsey_2025": {
+        "overall_ai_adoption": 88,  # % of respondents adopting AI
+        "agentic_ai_scaling": 23,  # % scaling agentic AI
+        "agentic_ai_experimenting": 39,  # % experimenting with agentic AI
+        "in_experimentation_stage": 32,  # % in experimentation stage
+        "in_piloting_stage": 30,  # % in piloting stage
+        "ai_scaling_enterprise_wide": 31,  # % scaling enterprise-wide
+        "expect_workforce_decrease": 32,  # % expecting workforce decrease
+        "expect_no_change": 43,  # % expecting no workforce change
+        "expect_workforce_increase": 13,  # % expecting workforce increase
+        "sample_size": 1993,
+        "date": "2025-11",
+        "source": "McKinsey State of AI 2025",
+    },
+    # Source: PwC AI Agent Survey (Apr 2025)
+    # 308 business leaders surveyed on AI agent adoption.
+    "pwc_2025": {
+        "plan_increase_ai_budgets": 88,  # % planning to increase AI budgets
+        "ai_agents_already_adopted": 79,  # % already adopting AI agents
+        "measurable_productivity_value": 66,  # % reporting measurable productivity value
+        "cost_savings_reported": 57,  # % reporting cost savings
+        "faster_decision_making": 55,  # % experiencing faster decision making
+        "improved_customer_experience": 54,  # % reporting improved customer experience
+        "agents_reshape_workplace_more_than_internet": 75,  # % saying agents will reshape workplace more than the internet
+        "sample_size": 308,
+        "date": "2025-04",
+        "source": "PwC AI Agent Survey",
+    },
+}
+
+# ---------------------------------------------------------------------------
+# Agent Market Forecasts
+# ---------------------------------------------------------------------------
+# Sources: Omdia, BCC Research, MarketsandMarkets, Grand View Research.
+# All figures in USD billions unless noted.
+
+agent_market_forecasts: list[dict[str, Any]] = [
+    {
+        "source": "Omdia",
+        "category": "Enterprise Agentic AI",
+        "year_2025_billions": 1.5,
+        "year_2030_billions": 41.8,
+        "cagr_percent": 175,
+        "date": "2025-09",
+    },
+    {
+        "source": "BCC Research",
+        "category": "AI Agents",
+        "year_2025_billions": 5.7,
+        "year_2030_billions": 48.3,
+        "cagr_percent": 43.3,
+    },
+    {
+        "source": "MarketsandMarkets",
+        "year_2025_billions": 7.84,
+        "year_2030_billions": 52.62,
+        "cagr_percent": 46.3,
+    },
+    {
+        "source": "Grand View Research",
+        "year_2025_billions": 7.63,
+        "year_2033_billions": 182.97,
+        "cagr_percent": 49.6,
+    },
+]
+
+# ---------------------------------------------------------------------------
+# GitHub Framework Stats (qualitative — no exact star counts available)
+# ---------------------------------------------------------------------------
+
+github_framework_stats: dict[str, Any] = {
+    "CrewAI": {
+        "position": "top agent framework",
+        "notes": "rapidly growing within LangChain ecosystem",
+    },
+    "LangGraph": {
+        "position": "top agent framework",
+        "notes": "rapidly growing within LangChain ecosystem",
+    },
+    "AutoGen": {
+        "position": "top agent framework",
+        "notes": "Microsoft-backed multi-agent framework",
+    },
+    # Market share of paid AI coding tools
+    "market_share_copilot": 42,  # % of paid AI coding tools
+    "market_share_cursor": 18,
+    "market_share_amazon_q": 11,
+}
+
+
+# ---------------------------------------------------------------------------
+# Dataset K: Real-World Developer AI Data
+# ---------------------------------------------------------------------------
+
+developer_ai_adoption: list[dict[str, Any]] = [
+    {
+        "source": "GitHub",
+        "metric": "all_time_copilot_users",
+        "value": 20_000_000,
+        "date": "2025-07",
+        "note": "includes free/student",
+    },
+    {
+        "source": "GitHub",
+        "metric": "paid_copilot_subscribers",
+        "value": 4_700_000,
+        "date": "2026-01",
+    },
+    {
+        "source": "GitHub",
+        "metric": "fortune_100_adoption_percent",
+        "value": 90,
+        "date": "2025",
+    },
+    {
+        "source": "JetBrains 2025",
+        "metric": "regular_ai_usage_percent",
+        "value": 85,
+        "date": "2025",
+    },
+    {
+        "source": "JetBrains 2025",
+        "metric": "rely_on_coding_assistant_percent",
+        "value": 62,
+        "date": "2025",
+    },
+    {
+        "source": "Stack Overflow 2025",
+        "metric": "use_or_plan_ai_tools_percent",
+        "value": 84,
+        "date": "2025",
+    },
+    {
+        "source": "Stack Overflow 2025",
+        "metric": "professional_devs_using_ai_daily",
+        "value": 51,
+        "date": "2025",
+    },
+    {
+        "source": "DX DevCycle Q4 2025",
+        "metric": "ai_adoption_in_active_repos",
+        "value": 91,
+        "date": "2025-Q4",
+    },
+    {
+        "source": "DX DevCycle Q4 2025",
+        "metric": "merged_code_ai_authored_percent",
+        "value": 22,
+        "date": "2025-Q4",
+    },
+]
+
+code_acceptance_rates: list[dict[str, Any]] = [
+    {
+        "tool": "GitHub Copilot",
+        "acceptance_rate_percent": 30,
+        "code_retention_percent": 88,
+        "source": "GitHub/Microsoft study",
+        "date": "2025",
+    },
+    {
+        "tool": "GitHub Copilot (heavy users)",
+        "acceptance_rate_percent": 29.73,
+        "source": "GitHub/Microsoft study",
+        "date": "2025",
+    },
+]
+
+real_world_productivity_impact: list[dict[str, Any]] = [
+    {
+        "company": "Accenture RCT",
+        "system": "GitHub Copilot",
+        "metric": "PRs_per_developer_increase",
+        "value_percent": 8.69,
+        "note": "randomized controlled trial",
+        "source": "Accenture study",
+        "date": "2025",
+    },
+    {
+        "company": "Accenture RCT",
+        "system": "GitHub Copilot",
+        "metric": "PR_merge_rate_increase",
+        "value_percent": 11,
+        "source": "Accenture study",
+    },
+    {
+        "company": "Accenture RCT",
+        "system": "GitHub Copilot",
+        "metric": "successful_builds_increase",
+        "value_percent": 84,
+        "source": "Accenture study",
+    },
+    {
+        "company": "Google",
+        "metric": "code_now_ai_assisted_percent",
+        "value": 21,
+        "date": "2025",
+        "source": "Google internal",
+    },
+    {
+        "company": "Microsoft Research",
+        "metric": "productivity_improvement_range",
+        "value": "20-45%",
+        "source": "Microsoft Research 2024-2025",
+    },
+]
+
+code_quality_in_production: list[dict[str, Any]] = [
+    {
+        "finding": "29.1% of Python AI-generated code contains security weaknesses",
+        "source": "Academic study (733 code snippets)",
+        "confidence": "HIGH",
+        "cwe_categories": 43,
+    },
+    {
+        "finding": "24.2% of JavaScript AI-generated code has security weaknesses",
+        "source": "Same academic study",
+        "confidence": "HIGH",
+    },
+    {
+        "finding": "48% of AI-generated code contains potential security vulnerabilities",
+        "source": "Multiple industry analyses",
+        "confidence": "MEDIUM",
+    },
+    {
+        "finding": "40% of Copilot-generated programs flagged for insecure code",
+        "source": "GitHub Copilot research",
+        "confidence": "HIGH",
+    },
+    {
+        "finding": "AI-coauthored PRs have ~1.7x more issues",
+        "source": "CodeRabbit Dec 2025 / DX DevCycle",
+        "confidence": "HIGH",
+    },
+    {
+        "finding": "6.4% secret leakage rate in Copilot repos (40% higher than 4.6% baseline)",
+        "source": "Academic security research",
+        "confidence": "MEDIUM",
+    },
+    {
+        "finding": "Google DORA 2024: AI use causes 7.2% drop in delivery stability",
+        "source": "Google DORA report",
+        "confidence": "HIGH",
+    },
+]
+
+failure_modes: list[dict[str, Any]] = [
+    {
+        "category": "pilot_to_production_failure",
+        "rate_percent": 72,
+        "source": "McKinsey State of AI 2025",
+        "confidence": "HIGH",
+        "note": "72% of AI initiatives fail to reach production",
+    },
+    {
+        "category": "ai_pilots_zero_roi",
+        "rate_percent": 95,
+        "source": "MIT Media Lab 2025",
+        "confidence": "HIGH",
+        "note": "95% of corporate AI pilots deliver zero measurable return",
+    },
+    {
+        "category": "companies_abandoned_ai",
+        "rate_percent": 42,
+        "source": "S&P Global 2025",
+        "confidence": "HIGH",
+        "note": "42% of companies abandoned most AI initiatives in 2025",
+    },
+    {
+        "category": "projects_fail_to_profit",
+        "rate_percent": 48,
+        "source": "Microsoft 2025 market study",
+        "confidence": "MEDIUM",
+        "note": "48% of IT leaders said AI projects were NOT profitable",
+    },
+    {
+        "category": "ai_projects_overall_fail",
+        "rate_percent": 80,
+        "source": "RAND Corporation 2025",
+        "confidence": "MEDIUM",
+        "note": "Over 80% of AI projects fail — twice non-AI rate",
+    },
+]
+
+developer_sentiment: list[dict[str, Any]] = [
+    {
+        "survey": "Stack Overflow 2025",
+        "finding": "84% use or plan to use AI tools",
+        "sample_size": "~70,000",
+    },
+    {
+        "survey": "JetBrains 2025",
+        "finding": "85% regular AI usage, 62% rely on at least one coding assistant",
+        "sample_size": "~30,000",
+    },
+    {
+        "survey": "Accenture RCT",
+        "finding": "90% felt more fulfilled, 91% enjoyed coding more with Copilot",
+        "sample_size": "RCT participants",
+    },
+    {
+        "survey": "Various",
+        "finding": "71% of developers do NOT merge AI code without manual review",
+        "confidence": "MEDIUM",
+    },
+    {
+        "survey": "Various",
+        "finding": "97% use AI tools before company policies allow (shadow IT)",
+        "confidence": "MEDIUM",
+    },
+]
+
+# ---------------------------------------------------------------------------
+# Benchmark Scores (HEAVY DISCLAIMER APPLIES)
+# ---------------------------------------------------------------------------
+#
+# !!! LAB BENCHMARK ONLY — Does not measure production capability,
+# !!! debugging, architecture, or code quality.
+# !!! Real-world performance may differ significantly.
+# !!! These numbers should NOT be used as proxies for real-world coding ability.
+#
+benchmark_scores_with_disclaimer: list[dict[str, Any]] = [
+    {
+        "model": "Claude Opus 4.5",
+        "swe_bench_verified_percent": 80.9,
+        "disclaimer": (
+            "LAB BENCHMARK ONLY — Does not measure production capability, "
+            "debugging, architecture, or code quality. "
+            "Real-world performance may differ significantly."
+        ),
+        "date": "2025",
+    },
+    {
+        "model": "Claude Mythos Preview",
+        "swe_bench_verified_percent": 93.9,
+        "disclaimer": (
+            "LAB BENCHMARK ONLY — Does not measure production capability, "
+            "debugging, architecture, or code quality. "
+            "Real-world performance may differ significantly."
+        ),
+        "date": "2025",
+    },
+]