diff --git a/src/data/agent_adoption.py b/src/data/agent_adoption.py new file mode 100644 index 0000000..b18debe --- /dev/null +++ b/src/data/agent_adoption.py @@ -0,0 +1,391 @@ +"""Agent Adoption Surveys and Real-World Developer AI Data + +Source: LangChain, McKinsey, PwC surveys; GitHub, JetBrains, DX DevCycle; + academic studies; Omdia, BCC Research, MarketsandMarkets, Grand View Research. +Retrieved: June 2026 + +IMPORTANT: This module prioritizes REAL-WORLD data over lab benchmarks. +Benchmark scores are included only with heavy disclaimers. +""" + +from __future__ import annotations + +from typing import Any + + +# --------------------------------------------------------------------------- +# Module metadata +# --------------------------------------------------------------------------- +MODULE_NAME: str = "agent_adoption" +MODULE_VERSION: str = "1.0.0" +DATA_RETRIEVED: str = "June 2026" +MODULE_DISCLAIMER: str = ( + "This module prioritizes REAL-WORLD data over lab benchmarks. " + "Benchmark scores are included only with heavy disclaimers." +) + + +# --------------------------------------------------------------------------- +# Dataset J: Agent Adoption Surveys +# --------------------------------------------------------------------------- + +agent_survey_data: dict[str, dict[str, Any]] = { + # Source: LangChain State of Agent Engineering (Nov-Dec 2025) + # 1,340 respondents surveyed on agent engineering practices. + "langchain_2025": { + "production": 57.3, # % deploying agents in production + "observability_implemented": 89, # % with observability in place + "full_tracing_prod": 71.5, # % with full tracing in production + "multi_model_deployments": 75, # % using multi-model deployments + "barrier_quality_percent": 32, # % citing quality as top barrier + "barrier_security_enterprise_percent": 24.9, # % citing security for enterprise + "barrier_latency_percent": 20, # % citing latency as barrier + "sample_size": 1340, + "date": "2025-11 to 2025-12", + "source": "LangChain State of Agent Engineering", + }, + # Source: McKinsey State of AI 2025 (Nov 2025) + # 1,993 executives surveyed on AI adoption and scaling. + "mckinsey_2025": { + "overall_ai_adoption": 88, # % of respondents adopting AI + "agentic_ai_scaling": 23, # % scaling agentic AI + "agentic_ai_experimenting": 39, # % experimenting with agentic AI + "in_experimentation_stage": 32, # % in experimentation stage + "in_piloting_stage": 30, # % in piloting stage + "ai_scaling_enterprise_wide": 31, # % scaling enterprise-wide + "expect_workforce_decrease": 32, # % expecting workforce decrease + "expect_no_change": 43, # % expecting no workforce change + "expect_workforce_increase": 13, # % expecting workforce increase + "sample_size": 1993, + "date": "2025-11", + "source": "McKinsey State of AI 2025", + }, + # Source: PwC AI Agent Survey (Apr 2025) + # 308 business leaders surveyed on AI agent adoption. + "pwc_2025": { + "plan_increase_ai_budgets": 88, # % planning to increase AI budgets + "ai_agents_already_adopted": 79, # % already adopting AI agents + "measurable_productivity_value": 66, # % reporting measurable productivity value + "cost_savings_reported": 57, # % reporting cost savings + "faster_decision_making": 55, # % experiencing faster decision making + "improved_customer_experience": 54, # % reporting improved customer experience + "agents_reshape_workplace_more_than_internet": 75, # % saying agents will reshape workplace more than the internet + "sample_size": 308, + "date": "2025-04", + "source": "PwC AI Agent Survey", + }, +} + +# --------------------------------------------------------------------------- +# Agent Market Forecasts +# --------------------------------------------------------------------------- +# Sources: Omdia, BCC Research, MarketsandMarkets, Grand View Research. +# All figures in USD billions unless noted. + +agent_market_forecasts: list[dict[str, Any]] = [ + { + "source": "Omdia", + "category": "Enterprise Agentic AI", + "year_2025_billions": 1.5, + "year_2030_billions": 41.8, + "cagr_percent": 175, + "date": "2025-09", + }, + { + "source": "BCC Research", + "category": "AI Agents", + "year_2025_billions": 5.7, + "year_2030_billions": 48.3, + "cagr_percent": 43.3, + }, + { + "source": "MarketsandMarkets", + "year_2025_billions": 7.84, + "year_2030_billions": 52.62, + "cagr_percent": 46.3, + }, + { + "source": "Grand View Research", + "year_2025_billions": 7.63, + "year_2033_billions": 182.97, + "cagr_percent": 49.6, + }, +] + +# --------------------------------------------------------------------------- +# GitHub Framework Stats (qualitative — no exact star counts available) +# --------------------------------------------------------------------------- + +github_framework_stats: dict[str, Any] = { + "CrewAI": { + "position": "top agent framework", + "notes": "rapidly growing within LangChain ecosystem", + }, + "LangGraph": { + "position": "top agent framework", + "notes": "rapidly growing within LangChain ecosystem", + }, + "AutoGen": { + "position": "top agent framework", + "notes": "Microsoft-backed multi-agent framework", + }, + # Market share of paid AI coding tools + "market_share_copilot": 42, # % of paid AI coding tools + "market_share_cursor": 18, + "market_share_amazon_q": 11, +} + + +# --------------------------------------------------------------------------- +# Dataset K: Real-World Developer AI Data +# --------------------------------------------------------------------------- + +developer_ai_adoption: list[dict[str, Any]] = [ + { + "source": "GitHub", + "metric": "all_time_copilot_users", + "value": 20_000_000, + "date": "2025-07", + "note": "includes free/student", + }, + { + "source": "GitHub", + "metric": "paid_copilot_subscribers", + "value": 4_700_000, + "date": "2026-01", + }, + { + "source": "GitHub", + "metric": "fortune_100_adoption_percent", + "value": 90, + "date": "2025", + }, + { + "source": "JetBrains 2025", + "metric": "regular_ai_usage_percent", + "value": 85, + "date": "2025", + }, + { + "source": "JetBrains 2025", + "metric": "rely_on_coding_assistant_percent", + "value": 62, + "date": "2025", + }, + { + "source": "Stack Overflow 2025", + "metric": "use_or_plan_ai_tools_percent", + "value": 84, + "date": "2025", + }, + { + "source": "Stack Overflow 2025", + "metric": "professional_devs_using_ai_daily", + "value": 51, + "date": "2025", + }, + { + "source": "DX DevCycle Q4 2025", + "metric": "ai_adoption_in_active_repos", + "value": 91, + "date": "2025-Q4", + }, + { + "source": "DX DevCycle Q4 2025", + "metric": "merged_code_ai_authored_percent", + "value": 22, + "date": "2025-Q4", + }, +] + +code_acceptance_rates: list[dict[str, Any]] = [ + { + "tool": "GitHub Copilot", + "acceptance_rate_percent": 30, + "code_retention_percent": 88, + "source": "GitHub/Microsoft study", + "date": "2025", + }, + { + "tool": "GitHub Copilot (heavy users)", + "acceptance_rate_percent": 29.73, + "source": "GitHub/Microsoft study", + "date": "2025", + }, +] + +real_world_productivity_impact: list[dict[str, Any]] = [ + { + "company": "Accenture RCT", + "system": "GitHub Copilot", + "metric": "PRs_per_developer_increase", + "value_percent": 8.69, + "note": "randomized controlled trial", + "source": "Accenture study", + "date": "2025", + }, + { + "company": "Accenture RCT", + "system": "GitHub Copilot", + "metric": "PR_merge_rate_increase", + "value_percent": 11, + "source": "Accenture study", + }, + { + "company": "Accenture RCT", + "system": "GitHub Copilot", + "metric": "successful_builds_increase", + "value_percent": 84, + "source": "Accenture study", + }, + { + "company": "Google", + "metric": "code_now_ai_assisted_percent", + "value": 21, + "date": "2025", + "source": "Google internal", + }, + { + "company": "Microsoft Research", + "metric": "productivity_improvement_range", + "value": "20-45%", + "source": "Microsoft Research 2024-2025", + }, +] + +code_quality_in_production: list[dict[str, Any]] = [ + { + "finding": "29.1% of Python AI-generated code contains security weaknesses", + "source": "Academic study (733 code snippets)", + "confidence": "HIGH", + "cwe_categories": 43, + }, + { + "finding": "24.2% of JavaScript AI-generated code has security weaknesses", + "source": "Same academic study", + "confidence": "HIGH", + }, + { + "finding": "48% of AI-generated code contains potential security vulnerabilities", + "source": "Multiple industry analyses", + "confidence": "MEDIUM", + }, + { + "finding": "40% of Copilot-generated programs flagged for insecure code", + "source": "GitHub Copilot research", + "confidence": "HIGH", + }, + { + "finding": "AI-coauthored PRs have ~1.7x more issues", + "source": "CodeRabbit Dec 2025 / DX DevCycle", + "confidence": "HIGH", + }, + { + "finding": "6.4% secret leakage rate in Copilot repos (40% higher than 4.6% baseline)", + "source": "Academic security research", + "confidence": "MEDIUM", + }, + { + "finding": "Google DORA 2024: AI use causes 7.2% drop in delivery stability", + "source": "Google DORA report", + "confidence": "HIGH", + }, +] + +failure_modes: list[dict[str, Any]] = [ + { + "category": "pilot_to_production_failure", + "rate_percent": 72, + "source": "McKinsey State of AI 2025", + "confidence": "HIGH", + "note": "72% of AI initiatives fail to reach production", + }, + { + "category": "ai_pilots_zero_roi", + "rate_percent": 95, + "source": "MIT Media Lab 2025", + "confidence": "HIGH", + "note": "95% of corporate AI pilots deliver zero measurable return", + }, + { + "category": "companies_abandoned_ai", + "rate_percent": 42, + "source": "S&P Global 2025", + "confidence": "HIGH", + "note": "42% of companies abandoned most AI initiatives in 2025", + }, + { + "category": "projects_fail_to_profit", + "rate_percent": 48, + "source": "Microsoft 2025 market study", + "confidence": "MEDIUM", + "note": "48% of IT leaders said AI projects were NOT profitable", + }, + { + "category": "ai_projects_overall_fail", + "rate_percent": 80, + "source": "RAND Corporation 2025", + "confidence": "MEDIUM", + "note": "Over 80% of AI projects fail — twice non-AI rate", + }, +] + +developer_sentiment: list[dict[str, Any]] = [ + { + "survey": "Stack Overflow 2025", + "finding": "84% use or plan to use AI tools", + "sample_size": "~70,000", + }, + { + "survey": "JetBrains 2025", + "finding": "85% regular AI usage, 62% rely on at least one coding assistant", + "sample_size": "~30,000", + }, + { + "survey": "Accenture RCT", + "finding": "90% felt more fulfilled, 91% enjoyed coding more with Copilot", + "sample_size": "RCT participants", + }, + { + "survey": "Various", + "finding": "71% of developers do NOT merge AI code without manual review", + "confidence": "MEDIUM", + }, + { + "survey": "Various", + "finding": "97% use AI tools before company policies allow (shadow IT)", + "confidence": "MEDIUM", + }, +] + +# --------------------------------------------------------------------------- +# Benchmark Scores (HEAVY DISCLAIMER APPLIES) +# --------------------------------------------------------------------------- +# +# !!! LAB BENCHMARK ONLY — Does not measure production capability, +# !!! debugging, architecture, or code quality. +# !!! Real-world performance may differ significantly. +# !!! These numbers should NOT be used as proxies for real-world coding ability. +# +benchmark_scores_with_disclaimer: list[dict[str, Any]] = [ + { + "model": "Claude Opus 4.5", + "swe_bench_verified_percent": 80.9, + "disclaimer": ( + "LAB BENCHMARK ONLY — Does not measure production capability, " + "debugging, architecture, or code quality. " + "Real-world performance may differ significantly." + ), + "date": "2025", + }, + { + "model": "Claude Mythos Preview", + "swe_bench_verified_percent": 93.9, + "disclaimer": ( + "LAB BENCHMARK ONLY — Does not measure production capability, " + "debugging, architecture, or code quality. " + "Real-world performance may differ significantly." + ), + "date": "2025", + }, +]