feat(data): add agent adoption surveys and real-world developer AI data

2026-06-04 16:58:41 -05:00
parent 79660ad67a
commit acd8cf20a2
1 changed files with 391 additions and 0 deletions
--- a/src/data/agent_adoption.py
+++ b/src/data/agent_adoption.py
@@ -0,0 +1,391 @@
 """Agent Adoption Surveys and Real-World Developer AI Data
 Source: LangChain, McKinsey, PwC surveys; GitHub, JetBrains, DX DevCycle;
        academic studies; Omdia, BCC Research, MarketsandMarkets, Grand View Research.
 Retrieved: June 2026
 IMPORTANT: This module prioritizes REAL-WORLD data over lab benchmarks.
 Benchmark scores are included only with heavy disclaimers.
 """
 from __future__ import annotations
 from typing import Any
 # ---------------------------------------------------------------------------
 # Module metadata
 # ---------------------------------------------------------------------------
 MODULE_NAME: str = "agent_adoption"
 MODULE_VERSION: str = "1.0.0"
 DATA_RETRIEVED: str = "June 2026"
 MODULE_DISCLAIMER: str = (
    "This module prioritizes REAL-WORLD data over lab benchmarks. "
    "Benchmark scores are included only with heavy disclaimers."
 )
 # ---------------------------------------------------------------------------
 # Dataset J: Agent Adoption Surveys
 # ---------------------------------------------------------------------------
 agent_survey_data: dict[str, dict[str, Any]] = {
    # Source: LangChain State of Agent Engineering (Nov-Dec 2025)
    # 1,340 respondents surveyed on agent engineering practices.
    "langchain_2025": {
        "production": 57.3,  # % deploying agents in production
        "observability_implemented": 89,  # % with observability in place
        "full_tracing_prod": 71.5,  # % with full tracing in production
        "multi_model_deployments": 75,  # % using multi-model deployments
        "barrier_quality_percent": 32,  # % citing quality as top barrier
        "barrier_security_enterprise_percent": 24.9,  # % citing security for enterprise
        "barrier_latency_percent": 20,  # % citing latency as barrier
        "sample_size": 1340,
        "date": "2025-11 to 2025-12",
        "source": "LangChain State of Agent Engineering",
    },
    # Source: McKinsey State of AI 2025 (Nov 2025)
    # 1,993 executives surveyed on AI adoption and scaling.
    "mckinsey_2025": {
        "overall_ai_adoption": 88,  # % of respondents adopting AI
        "agentic_ai_scaling": 23,  # % scaling agentic AI
        "agentic_ai_experimenting": 39,  # % experimenting with agentic AI
        "in_experimentation_stage": 32,  # % in experimentation stage
        "in_piloting_stage": 30,  # % in piloting stage
        "ai_scaling_enterprise_wide": 31,  # % scaling enterprise-wide
        "expect_workforce_decrease": 32,  # % expecting workforce decrease
        "expect_no_change": 43,  # % expecting no workforce change
        "expect_workforce_increase": 13,  # % expecting workforce increase
        "sample_size": 1993,
        "date": "2025-11",
        "source": "McKinsey State of AI 2025",
    },
    # Source: PwC AI Agent Survey (Apr 2025)
    # 308 business leaders surveyed on AI agent adoption.
    "pwc_2025": {
        "plan_increase_ai_budgets": 88,  # % planning to increase AI budgets
        "ai_agents_already_adopted": 79,  # % already adopting AI agents
        "measurable_productivity_value": 66,  # % reporting measurable productivity value
        "cost_savings_reported": 57,  # % reporting cost savings
        "faster_decision_making": 55,  # % experiencing faster decision making
        "improved_customer_experience": 54,  # % reporting improved customer experience
        "agents_reshape_workplace_more_than_internet": 75,  # % saying agents will reshape workplace more than the internet
        "sample_size": 308,
        "date": "2025-04",
        "source": "PwC AI Agent Survey",
    },
 }
 # ---------------------------------------------------------------------------
 # Agent Market Forecasts
 # ---------------------------------------------------------------------------
 # Sources: Omdia, BCC Research, MarketsandMarkets, Grand View Research.
 # All figures in USD billions unless noted.
 agent_market_forecasts: list[dict[str, Any]] = [
    {
        "source": "Omdia",
        "category": "Enterprise Agentic AI",
        "year_2025_billions": 1.5,
        "year_2030_billions": 41.8,
        "cagr_percent": 175,
        "date": "2025-09",
    },
    {
        "source": "BCC Research",
        "category": "AI Agents",
        "year_2025_billions": 5.7,
        "year_2030_billions": 48.3,
        "cagr_percent": 43.3,
    },
    {
        "source": "MarketsandMarkets",
        "year_2025_billions": 7.84,
        "year_2030_billions": 52.62,
        "cagr_percent": 46.3,
    },
    {
        "source": "Grand View Research",
        "year_2025_billions": 7.63,
        "year_2033_billions": 182.97,
        "cagr_percent": 49.6,
    },
 ]
 # ---------------------------------------------------------------------------
 # GitHub Framework Stats (qualitative — no exact star counts available)
 # ---------------------------------------------------------------------------
 github_framework_stats: dict[str, Any] = {
    "CrewAI": {
        "position": "top agent framework",
        "notes": "rapidly growing within LangChain ecosystem",
    },
    "LangGraph": {
        "position": "top agent framework",
        "notes": "rapidly growing within LangChain ecosystem",
    },
    "AutoGen": {
        "position": "top agent framework",
        "notes": "Microsoft-backed multi-agent framework",
    },
    # Market share of paid AI coding tools
    "market_share_copilot": 42,  # % of paid AI coding tools
    "market_share_cursor": 18,
    "market_share_amazon_q": 11,
 }
 # ---------------------------------------------------------------------------
 # Dataset K: Real-World Developer AI Data
 # ---------------------------------------------------------------------------
 developer_ai_adoption: list[dict[str, Any]] = [
    {
        "source": "GitHub",
        "metric": "all_time_copilot_users",
        "value": 20_000_000,
        "date": "2025-07",
        "note": "includes free/student",
    },
    {
        "source": "GitHub",
        "metric": "paid_copilot_subscribers",
        "value": 4_700_000,
        "date": "2026-01",
    },
    {
        "source": "GitHub",
        "metric": "fortune_100_adoption_percent",
        "value": 90,
        "date": "2025",
    },
    {
        "source": "JetBrains 2025",
        "metric": "regular_ai_usage_percent",
        "value": 85,
        "date": "2025",
    },
    {
        "source": "JetBrains 2025",
        "metric": "rely_on_coding_assistant_percent",
        "value": 62,
        "date": "2025",
    },
    {
        "source": "Stack Overflow 2025",
        "metric": "use_or_plan_ai_tools_percent",
        "value": 84,
        "date": "2025",
    },
    {
        "source": "Stack Overflow 2025",
        "metric": "professional_devs_using_ai_daily",
        "value": 51,
        "date": "2025",
    },
    {
        "source": "DX DevCycle Q4 2025",
        "metric": "ai_adoption_in_active_repos",
        "value": 91,
        "date": "2025-Q4",
    },
    {
        "source": "DX DevCycle Q4 2025",
        "metric": "merged_code_ai_authored_percent",
        "value": 22,
        "date": "2025-Q4",
    },
 ]
 code_acceptance_rates: list[dict[str, Any]] = [
    {
        "tool": "GitHub Copilot",
        "acceptance_rate_percent": 30,
        "code_retention_percent": 88,
        "source": "GitHub/Microsoft study",
        "date": "2025",
    },
    {
        "tool": "GitHub Copilot (heavy users)",
        "acceptance_rate_percent": 29.73,
        "source": "GitHub/Microsoft study",
        "date": "2025",
    },
 ]
 real_world_productivity_impact: list[dict[str, Any]] = [
    {
        "company": "Accenture RCT",
        "system": "GitHub Copilot",
        "metric": "PRs_per_developer_increase",
        "value_percent": 8.69,
        "note": "randomized controlled trial",
        "source": "Accenture study",
        "date": "2025",
    },
    {
        "company": "Accenture RCT",
        "system": "GitHub Copilot",
        "metric": "PR_merge_rate_increase",
        "value_percent": 11,
        "source": "Accenture study",
    },
    {
        "company": "Accenture RCT",
        "system": "GitHub Copilot",
        "metric": "successful_builds_increase",
        "value_percent": 84,
        "source": "Accenture study",
    },
    {
        "company": "Google",
        "metric": "code_now_ai_assisted_percent",
        "value": 21,
        "date": "2025",
        "source": "Google internal",
    },
    {
        "company": "Microsoft Research",
        "metric": "productivity_improvement_range",
        "value": "20-45%",
        "source": "Microsoft Research 2024-2025",
    },
 ]
 code_quality_in_production: list[dict[str, Any]] = [
    {
        "finding": "29.1% of Python AI-generated code contains security weaknesses",
        "source": "Academic study (733 code snippets)",
        "confidence": "HIGH",
        "cwe_categories": 43,
    },
    {
        "finding": "24.2% of JavaScript AI-generated code has security weaknesses",
        "source": "Same academic study",
        "confidence": "HIGH",
    },
    {
        "finding": "48% of AI-generated code contains potential security vulnerabilities",
        "source": "Multiple industry analyses",
        "confidence": "MEDIUM",
    },
    {
        "finding": "40% of Copilot-generated programs flagged for insecure code",
        "source": "GitHub Copilot research",
        "confidence": "HIGH",
    },
    {
        "finding": "AI-coauthored PRs have ~1.7x more issues",
        "source": "CodeRabbit Dec 2025 / DX DevCycle",
        "confidence": "HIGH",
    },
    {
        "finding": "6.4% secret leakage rate in Copilot repos (40% higher than 4.6% baseline)",
        "source": "Academic security research",
        "confidence": "MEDIUM",
    },
    {
        "finding": "Google DORA 2024: AI use causes 7.2% drop in delivery stability",
        "source": "Google DORA report",
        "confidence": "HIGH",
    },
 ]
 failure_modes: list[dict[str, Any]] = [
    {
        "category": "pilot_to_production_failure",
        "rate_percent": 72,
        "source": "McKinsey State of AI 2025",
        "confidence": "HIGH",
        "note": "72% of AI initiatives fail to reach production",
    },
    {
        "category": "ai_pilots_zero_roi",
        "rate_percent": 95,
        "source": "MIT Media Lab 2025",
        "confidence": "HIGH",
        "note": "95% of corporate AI pilots deliver zero measurable return",
    },
    {
        "category": "companies_abandoned_ai",
        "rate_percent": 42,
        "source": "S&P Global 2025",
        "confidence": "HIGH",
        "note": "42% of companies abandoned most AI initiatives in 2025",
    },
    {
        "category": "projects_fail_to_profit",
        "rate_percent": 48,
        "source": "Microsoft 2025 market study",
        "confidence": "MEDIUM",
        "note": "48% of IT leaders said AI projects were NOT profitable",
    },
    {
        "category": "ai_projects_overall_fail",
        "rate_percent": 80,
        "source": "RAND Corporation 2025",
        "confidence": "MEDIUM",
        "note": "Over 80% of AI projects fail — twice non-AI rate",
    },
 ]
 developer_sentiment: list[dict[str, Any]] = [
    {
        "survey": "Stack Overflow 2025",
        "finding": "84% use or plan to use AI tools",
        "sample_size": "~70,000",
    },
    {
        "survey": "JetBrains 2025",
        "finding": "85% regular AI usage, 62% rely on at least one coding assistant",
        "sample_size": "~30,000",
    },
    {
        "survey": "Accenture RCT",
        "finding": "90% felt more fulfilled, 91% enjoyed coding more with Copilot",
        "sample_size": "RCT participants",
    },
    {
        "survey": "Various",
        "finding": "71% of developers do NOT merge AI code without manual review",
        "confidence": "MEDIUM",
    },
    {
        "survey": "Various",
        "finding": "97% use AI tools before company policies allow (shadow IT)",
        "confidence": "MEDIUM",
    },
 ]
 # ---------------------------------------------------------------------------
 # Benchmark Scores (HEAVY DISCLAIMER APPLIES)
 # ---------------------------------------------------------------------------
 #
 # !!! LAB BENCHMARK ONLY — Does not measure production capability,
 # !!! debugging, architecture, or code quality.
 # !!! Real-world performance may differ significantly.
 # !!! These numbers should NOT be used as proxies for real-world coding ability.
 #
 benchmark_scores_with_disclaimer: list[dict[str, Any]] = [
    {
        "model": "Claude Opus 4.5",
        "swe_bench_verified_percent": 80.9,
        "disclaimer": (
            "LAB BENCHMARK ONLY — Does not measure production capability, "
            "debugging, architecture, or code quality. "
            "Real-world performance may differ significantly."
        ),
        "date": "2025",
    },
    {
        "model": "Claude Mythos Preview",
        "swe_bench_verified_percent": 93.9,
        "disclaimer": (
            "LAB BENCHMARK ONLY — Does not measure production capability, "
            "debugging, architecture, or code quality. "
            "Real-world performance may differ significantly."
        ),
        "date": "2025",
    },
 ]