feat(data): add agent adoption surveys and real-world developer AI data

This commit is contained in:
Orchestrator
2026-06-04 16:58:41 -05:00
parent 79660ad67a
commit acd8cf20a2

391
src/data/agent_adoption.py Normal file
View File

@@ -0,0 +1,391 @@
"""Agent Adoption Surveys and Real-World Developer AI Data
Source: LangChain, McKinsey, PwC surveys; GitHub, JetBrains, DX DevCycle;
academic studies; Omdia, BCC Research, MarketsandMarkets, Grand View Research.
Retrieved: June 2026
IMPORTANT: This module prioritizes REAL-WORLD data over lab benchmarks.
Benchmark scores are included only with heavy disclaimers.
"""
from __future__ import annotations
from typing import Any
# ---------------------------------------------------------------------------
# Module metadata
# ---------------------------------------------------------------------------
MODULE_NAME: str = "agent_adoption"
MODULE_VERSION: str = "1.0.0"
DATA_RETRIEVED: str = "June 2026"
MODULE_DISCLAIMER: str = (
"This module prioritizes REAL-WORLD data over lab benchmarks. "
"Benchmark scores are included only with heavy disclaimers."
)
# ---------------------------------------------------------------------------
# Dataset J: Agent Adoption Surveys
# ---------------------------------------------------------------------------
agent_survey_data: dict[str, dict[str, Any]] = {
# Source: LangChain State of Agent Engineering (Nov-Dec 2025)
# 1,340 respondents surveyed on agent engineering practices.
"langchain_2025": {
"production": 57.3, # % deploying agents in production
"observability_implemented": 89, # % with observability in place
"full_tracing_prod": 71.5, # % with full tracing in production
"multi_model_deployments": 75, # % using multi-model deployments
"barrier_quality_percent": 32, # % citing quality as top barrier
"barrier_security_enterprise_percent": 24.9, # % citing security for enterprise
"barrier_latency_percent": 20, # % citing latency as barrier
"sample_size": 1340,
"date": "2025-11 to 2025-12",
"source": "LangChain State of Agent Engineering",
},
# Source: McKinsey State of AI 2025 (Nov 2025)
# 1,993 executives surveyed on AI adoption and scaling.
"mckinsey_2025": {
"overall_ai_adoption": 88, # % of respondents adopting AI
"agentic_ai_scaling": 23, # % scaling agentic AI
"agentic_ai_experimenting": 39, # % experimenting with agentic AI
"in_experimentation_stage": 32, # % in experimentation stage
"in_piloting_stage": 30, # % in piloting stage
"ai_scaling_enterprise_wide": 31, # % scaling enterprise-wide
"expect_workforce_decrease": 32, # % expecting workforce decrease
"expect_no_change": 43, # % expecting no workforce change
"expect_workforce_increase": 13, # % expecting workforce increase
"sample_size": 1993,
"date": "2025-11",
"source": "McKinsey State of AI 2025",
},
# Source: PwC AI Agent Survey (Apr 2025)
# 308 business leaders surveyed on AI agent adoption.
"pwc_2025": {
"plan_increase_ai_budgets": 88, # % planning to increase AI budgets
"ai_agents_already_adopted": 79, # % already adopting AI agents
"measurable_productivity_value": 66, # % reporting measurable productivity value
"cost_savings_reported": 57, # % reporting cost savings
"faster_decision_making": 55, # % experiencing faster decision making
"improved_customer_experience": 54, # % reporting improved customer experience
"agents_reshape_workplace_more_than_internet": 75, # % saying agents will reshape workplace more than the internet
"sample_size": 308,
"date": "2025-04",
"source": "PwC AI Agent Survey",
},
}
# ---------------------------------------------------------------------------
# Agent Market Forecasts
# ---------------------------------------------------------------------------
# Sources: Omdia, BCC Research, MarketsandMarkets, Grand View Research.
# All figures in USD billions unless noted.
agent_market_forecasts: list[dict[str, Any]] = [
{
"source": "Omdia",
"category": "Enterprise Agentic AI",
"year_2025_billions": 1.5,
"year_2030_billions": 41.8,
"cagr_percent": 175,
"date": "2025-09",
},
{
"source": "BCC Research",
"category": "AI Agents",
"year_2025_billions": 5.7,
"year_2030_billions": 48.3,
"cagr_percent": 43.3,
},
{
"source": "MarketsandMarkets",
"year_2025_billions": 7.84,
"year_2030_billions": 52.62,
"cagr_percent": 46.3,
},
{
"source": "Grand View Research",
"year_2025_billions": 7.63,
"year_2033_billions": 182.97,
"cagr_percent": 49.6,
},
]
# ---------------------------------------------------------------------------
# GitHub Framework Stats (qualitative — no exact star counts available)
# ---------------------------------------------------------------------------
github_framework_stats: dict[str, Any] = {
"CrewAI": {
"position": "top agent framework",
"notes": "rapidly growing within LangChain ecosystem",
},
"LangGraph": {
"position": "top agent framework",
"notes": "rapidly growing within LangChain ecosystem",
},
"AutoGen": {
"position": "top agent framework",
"notes": "Microsoft-backed multi-agent framework",
},
# Market share of paid AI coding tools
"market_share_copilot": 42, # % of paid AI coding tools
"market_share_cursor": 18,
"market_share_amazon_q": 11,
}
# ---------------------------------------------------------------------------
# Dataset K: Real-World Developer AI Data
# ---------------------------------------------------------------------------
developer_ai_adoption: list[dict[str, Any]] = [
{
"source": "GitHub",
"metric": "all_time_copilot_users",
"value": 20_000_000,
"date": "2025-07",
"note": "includes free/student",
},
{
"source": "GitHub",
"metric": "paid_copilot_subscribers",
"value": 4_700_000,
"date": "2026-01",
},
{
"source": "GitHub",
"metric": "fortune_100_adoption_percent",
"value": 90,
"date": "2025",
},
{
"source": "JetBrains 2025",
"metric": "regular_ai_usage_percent",
"value": 85,
"date": "2025",
},
{
"source": "JetBrains 2025",
"metric": "rely_on_coding_assistant_percent",
"value": 62,
"date": "2025",
},
{
"source": "Stack Overflow 2025",
"metric": "use_or_plan_ai_tools_percent",
"value": 84,
"date": "2025",
},
{
"source": "Stack Overflow 2025",
"metric": "professional_devs_using_ai_daily",
"value": 51,
"date": "2025",
},
{
"source": "DX DevCycle Q4 2025",
"metric": "ai_adoption_in_active_repos",
"value": 91,
"date": "2025-Q4",
},
{
"source": "DX DevCycle Q4 2025",
"metric": "merged_code_ai_authored_percent",
"value": 22,
"date": "2025-Q4",
},
]
code_acceptance_rates: list[dict[str, Any]] = [
{
"tool": "GitHub Copilot",
"acceptance_rate_percent": 30,
"code_retention_percent": 88,
"source": "GitHub/Microsoft study",
"date": "2025",
},
{
"tool": "GitHub Copilot (heavy users)",
"acceptance_rate_percent": 29.73,
"source": "GitHub/Microsoft study",
"date": "2025",
},
]
real_world_productivity_impact: list[dict[str, Any]] = [
{
"company": "Accenture RCT",
"system": "GitHub Copilot",
"metric": "PRs_per_developer_increase",
"value_percent": 8.69,
"note": "randomized controlled trial",
"source": "Accenture study",
"date": "2025",
},
{
"company": "Accenture RCT",
"system": "GitHub Copilot",
"metric": "PR_merge_rate_increase",
"value_percent": 11,
"source": "Accenture study",
},
{
"company": "Accenture RCT",
"system": "GitHub Copilot",
"metric": "successful_builds_increase",
"value_percent": 84,
"source": "Accenture study",
},
{
"company": "Google",
"metric": "code_now_ai_assisted_percent",
"value": 21,
"date": "2025",
"source": "Google internal",
},
{
"company": "Microsoft Research",
"metric": "productivity_improvement_range",
"value": "20-45%",
"source": "Microsoft Research 2024-2025",
},
]
code_quality_in_production: list[dict[str, Any]] = [
{
"finding": "29.1% of Python AI-generated code contains security weaknesses",
"source": "Academic study (733 code snippets)",
"confidence": "HIGH",
"cwe_categories": 43,
},
{
"finding": "24.2% of JavaScript AI-generated code has security weaknesses",
"source": "Same academic study",
"confidence": "HIGH",
},
{
"finding": "48% of AI-generated code contains potential security vulnerabilities",
"source": "Multiple industry analyses",
"confidence": "MEDIUM",
},
{
"finding": "40% of Copilot-generated programs flagged for insecure code",
"source": "GitHub Copilot research",
"confidence": "HIGH",
},
{
"finding": "AI-coauthored PRs have ~1.7x more issues",
"source": "CodeRabbit Dec 2025 / DX DevCycle",
"confidence": "HIGH",
},
{
"finding": "6.4% secret leakage rate in Copilot repos (40% higher than 4.6% baseline)",
"source": "Academic security research",
"confidence": "MEDIUM",
},
{
"finding": "Google DORA 2024: AI use causes 7.2% drop in delivery stability",
"source": "Google DORA report",
"confidence": "HIGH",
},
]
failure_modes: list[dict[str, Any]] = [
{
"category": "pilot_to_production_failure",
"rate_percent": 72,
"source": "McKinsey State of AI 2025",
"confidence": "HIGH",
"note": "72% of AI initiatives fail to reach production",
},
{
"category": "ai_pilots_zero_roi",
"rate_percent": 95,
"source": "MIT Media Lab 2025",
"confidence": "HIGH",
"note": "95% of corporate AI pilots deliver zero measurable return",
},
{
"category": "companies_abandoned_ai",
"rate_percent": 42,
"source": "S&P Global 2025",
"confidence": "HIGH",
"note": "42% of companies abandoned most AI initiatives in 2025",
},
{
"category": "projects_fail_to_profit",
"rate_percent": 48,
"source": "Microsoft 2025 market study",
"confidence": "MEDIUM",
"note": "48% of IT leaders said AI projects were NOT profitable",
},
{
"category": "ai_projects_overall_fail",
"rate_percent": 80,
"source": "RAND Corporation 2025",
"confidence": "MEDIUM",
"note": "Over 80% of AI projects fail — twice non-AI rate",
},
]
developer_sentiment: list[dict[str, Any]] = [
{
"survey": "Stack Overflow 2025",
"finding": "84% use or plan to use AI tools",
"sample_size": "~70,000",
},
{
"survey": "JetBrains 2025",
"finding": "85% regular AI usage, 62% rely on at least one coding assistant",
"sample_size": "~30,000",
},
{
"survey": "Accenture RCT",
"finding": "90% felt more fulfilled, 91% enjoyed coding more with Copilot",
"sample_size": "RCT participants",
},
{
"survey": "Various",
"finding": "71% of developers do NOT merge AI code without manual review",
"confidence": "MEDIUM",
},
{
"survey": "Various",
"finding": "97% use AI tools before company policies allow (shadow IT)",
"confidence": "MEDIUM",
},
]
# ---------------------------------------------------------------------------
# Benchmark Scores (HEAVY DISCLAIMER APPLIES)
# ---------------------------------------------------------------------------
#
# !!! LAB BENCHMARK ONLY — Does not measure production capability,
# !!! debugging, architecture, or code quality.
# !!! Real-world performance may differ significantly.
# !!! These numbers should NOT be used as proxies for real-world coding ability.
#
benchmark_scores_with_disclaimer: list[dict[str, Any]] = [
{
"model": "Claude Opus 4.5",
"swe_bench_verified_percent": 80.9,
"disclaimer": (
"LAB BENCHMARK ONLY — Does not measure production capability, "
"debugging, architecture, or code quality. "
"Real-world performance may differ significantly."
),
"date": "2025",
},
{
"model": "Claude Mythos Preview",
"swe_bench_verified_percent": 93.9,
"disclaimer": (
"LAB BENCHMARK ONLY — Does not measure production capability, "
"debugging, architecture, or code quality. "
"Real-world performance may differ significantly."
),
"date": "2025",
},
]