feat(data): add agent productivity case studies and failure mode data

This commit is contained in:
Orchestrator
2026-06-04 16:59:09 -05:00
committed by marsultor
parent 3e81f901fe
commit c801c0c831

230
src/data/productivity.py Normal file
View File

@@ -0,0 +1,230 @@
"""Enterprise AI Agent Productivity Case Studies and Failure Modes
Source: Company case studies, vendor reports, research studies
Retrieved: June 2026
IMPORTANT: This module presents both successes AND failures honestly.
Many 'productivity gains' are self-reported by vendors and need
independent verification.
"""
case_studies: list[dict] = [
# Klarna — vendor case study via LangChain
{
"company": "Klarna",
"system": "AI Assistant (LangGraph + LangSmith)",
"metrics": {
"active_users": 85_000_000,
"daily_transactions": 2_500_000,
"fte_equivalent": 700,
"resolution_time_reduction_percent": 80,
"task_automation_percent": 70,
"conversations_handled": 2_500_000,
},
"source": "LangChain case study (Feb 2025)",
"source_url": "https://www.langchain.com/blog/customers-klarna",
"date": "2025-02",
"confidence": "HIGH",
"caveat": "Vendor case study — metrics from LangChain's official blog",
},
# JPMorgan Chase — COiN system launched 2017, widely cited
{
"company": "JPMorgan Chase",
"system": "COiN (Contract Intelligence)",
"metrics": {
"hours_saved_annually": 360_000,
"contracts_processed_annually": 12_000,
"attributes_per_document": 150,
"error_rate_before_percent": 5,
"error_rate_after_percent": "~0",
"annual_value_usd": 150_000_000,
"fte_equivalent": 173,
},
"source": "Multiple sources including JPMorgan executive quotes",
"date": "2017-launched, metrics current through 2024",
"confidence": "HIGH",
"caveat": "Metrics are 8+ years old; system has evolved significantly",
},
# ServiceNow partner case — SnowGeek Solutions (mid-size manufacturer)
{
"company": "ServiceNow (Partner Case — SnowGeek Solutions)",
"system": "Now Assist + Agentic AI for IT Operations",
"metrics": {
"midnight_escalation_reduction_percent": 73,
"mttr_improvement_percent": 65,
"annual_downtime_savings_usd": 2_300_000,
"engineering_hours_reclaimed": 1_840,
"repeat_incident_reduction_percent": 62,
"self_healing_incident_percent": 40,
},
"source": "SnowGeek Solutions partner case study (Q4 2025)",
"date": "2025-Q4",
"confidence": "MEDIUM",
"caveat": (
"Partner-reported metrics for mid-size manufacturer — "
"not directly from ServiceNow"
),
},
# Morgan Stanley — DevGen.AI claim, unverified
{
"company": "Morgan Stanley",
"system": "DevGen.AI Developer Assistant",
"metrics": {
"developer_hours_saved": 280_000,
},
"source": "Widely-reported claim",
"date": "Unknown",
"confidence": "LOW",
"caveat": (
"Could NOT be independently verified. Treat as unconfirmed."
),
},
# Amazon Q / CodeWhisperer — no verifiable metrics
{
"company": "Amazon Q / CodeWhisperer",
"system": "Developer Productivity Tools",
"metrics": {},
"source": (
"AWS has published various studies but specific metrics "
"could not be sourced"
),
"date": "Unknown",
"confidence": "LOW",
"caveat": (
"Could NOT be independently verified. AWS has claimed 55% "
"faster task completion but no primary source found."
),
},
]
# ---------------------------------------------------------------------------
# Failure Modes
# ---------------------------------------------------------------------------
# Sourced from academic research, consulting reports, and industry analyses.
# These rates underscore the gap between AI hype and measurable outcomes.
# ---------------------------------------------------------------------------
failure_modes: list[dict] = [
# MIT Media Lab 2025 — broad survey of corporate AI pilots
{
"category": "ai_pilots_zero_roi",
"rate_percent": 95,
"source": "MIT Media Lab 2025",
"confidence": "HIGH",
"detail": (
"95% of corporate AI pilots deliver zero measurable return; "
"only 5% reach production with impact"
),
"scope": "300+ initiatives, 52 org interviews, 153 executive surveys",
},
# S&P Global 2025 — corporate AI abandonment trends
{
"category": "companies_abandoned_ai",
"rate_percent": 42,
"source": "S&P Global 2025",
"confidence": "HIGH",
"detail": (
"42% of companies abandoned most AI initiatives in 2025 "
"(up from 17% in 2024); 46% of PoCs scrapped before production"
),
},
# RAND Corporation 2025 — comparative failure rates
{
"category": "ai_projects_overall_fail",
"rate_percent": 80,
"source": "RAND Corporation 2025",
"confidence": "MEDIUM",
"detail": (
"Over 80% of AI projects fail — twice the failure rate "
"of non-AI technology projects"
),
},
# Gartner May 2026 — layoffs vs ROI disconnect
{
"category": "layoffs_unrelated_to_roi",
"source": "Gartner May 2026",
"confidence": "MEDIUM",
"detail": (
"~80% of autonomous-AI deployers cut headcount; "
"ZERO correlation between layoffs and ROI"
),
"scope": "350 global executives",
},
# Gartner prediction — agentic AI project cancellations
{
"category": "agentic_ai_projects_cancelled_by_2027",
"rate_percent": 40,
"source": "Gartner prediction",
"confidence": "MEDIUM",
"detail": (
"Over 40% of agentic AI projects will be canceled by end of "
"2027 due to escalating costs, unclear value, or inadequate "
"risk controls"
),
},
# McKinsey State of AI 2025 — pilot purgatory
{
"category": "pilot_purgatory",
"source": "McKinsey State of AI 2025",
"confidence": "HIGH",
"detail": (
"88% AI adoption but only 31% scaling — vast majority "
"stuck in pilots"
),
},
# MIT Media Lab 2025 — build vs buy outcomes
{
"category": "build_vs_buy_success",
"source": "MIT Media Lab 2025",
"confidence": "MEDIUM",
"detail": (
"External partnership deployments succeed at ~67% "
"vs ~33% for internal builds"
),
},
# Multiple sources — shadow AI adoption
{
"category": "shadow_ai_adoption",
"source": "Multiple sources",
"confidence": "MEDIUM",
"detail": (
"90%+ of companies have employees using personal AI tools; "
"only 40% have official licensing"
),
},
]
# ---------------------------------------------------------------------------
# Additional Known Successes (from failure-mode research sources)
# ---------------------------------------------------------------------------
# These surfaced while researching failure rates but are not
# among the primary case studies above.
# ---------------------------------------------------------------------------
known_successes_outside_main: list[dict] = [
{"company": "Lumen", "savings_usd": 50_000_000, "metric": "research_time_4hrs_to_15min", "source": "WorkOS article"},
{"company": "Air India", "metric": "97%_automation_on_4M_queries", "source": "WorkOS article"},
{"company": "Microsoft", "savings_usd": 500_000_000, "metric": "call_center_ai_savings", "source": "WorkOS article"},
]
# ---------------------------------------------------------------------------
# Metadata
# ---------------------------------------------------------------------------
case_studies_meta = {
"total_cases": 5,
"high_confidence_cases": 2, # Klarna, JPMorgan
"medium_confidence_cases": 1, # ServiceNow partner
"low_confidence_cases": 2, # Morgan Stanley, Amazon Q
"sources": [
"LangChain case study",
"JPMorgan executive quotes",
"SnowGeek Solutions",
"widely-reported claims",
],
"retrieved": "2026-06-04",
}