feat(data): add agent productivity case studies and failure mode data

2026-06-04 16:59:09 -05:00
parent acd8cf20a2
commit 5c8becdb08
1 changed files with 230 additions and 0 deletions
--- a/src/data/productivity.py
+++ b/src/data/productivity.py
@@ -0,0 +1,230 @@
 """Enterprise AI Agent Productivity Case Studies and Failure Modes
 Source: Company case studies, vendor reports, research studies
 Retrieved: June 2026
 IMPORTANT: This module presents both successes AND failures honestly.
 Many 'productivity gains' are self-reported by vendors and need
 independent verification.
 """
 case_studies: list[dict] = [
    # Klarna — vendor case study via LangChain
    {
        "company": "Klarna",
        "system": "AI Assistant (LangGraph + LangSmith)",
        "metrics": {
            "active_users": 85_000_000,
            "daily_transactions": 2_500_000,
            "fte_equivalent": 700,
            "resolution_time_reduction_percent": 80,
            "task_automation_percent": 70,
            "conversations_handled": 2_500_000,
        },
        "source": "LangChain case study (Feb 2025)",
        "source_url": "https://www.langchain.com/blog/customers-klarna",
        "date": "2025-02",
        "confidence": "HIGH",
        "caveat": "Vendor case study — metrics from LangChain's official blog",
    },
    # JPMorgan Chase — COiN system launched 2017, widely cited
    {
        "company": "JPMorgan Chase",
        "system": "COiN (Contract Intelligence)",
        "metrics": {
            "hours_saved_annually": 360_000,
            "contracts_processed_annually": 12_000,
            "attributes_per_document": 150,
            "error_rate_before_percent": 5,
            "error_rate_after_percent": "~0",
            "annual_value_usd": 150_000_000,
            "fte_equivalent": 173,
        },
        "source": "Multiple sources including JPMorgan executive quotes",
        "date": "2017-launched, metrics current through 2024",
        "confidence": "HIGH",
        "caveat": "Metrics are 8+ years old; system has evolved significantly",
    },
    # ServiceNow partner case — SnowGeek Solutions (mid-size manufacturer)
    {
        "company": "ServiceNow (Partner Case — SnowGeek Solutions)",
        "system": "Now Assist + Agentic AI for IT Operations",
        "metrics": {
            "midnight_escalation_reduction_percent": 73,
            "mttr_improvement_percent": 65,
            "annual_downtime_savings_usd": 2_300_000,
            "engineering_hours_reclaimed": 1_840,
            "repeat_incident_reduction_percent": 62,
            "self_healing_incident_percent": 40,
        },
        "source": "SnowGeek Solutions partner case study (Q4 2025)",
        "date": "2025-Q4",
        "confidence": "MEDIUM",
        "caveat": (
            "Partner-reported metrics for mid-size manufacturer — "
            "not directly from ServiceNow"
        ),
    },
    # Morgan Stanley — DevGen.AI claim, unverified
    {
        "company": "Morgan Stanley",
        "system": "DevGen.AI Developer Assistant",
        "metrics": {
            "developer_hours_saved": 280_000,
        },
        "source": "Widely-reported claim",
        "date": "Unknown",
        "confidence": "LOW",
        "caveat": (
            "Could NOT be independently verified. Treat as unconfirmed."
        ),
    },
    # Amazon Q / CodeWhisperer — no verifiable metrics
    {
        "company": "Amazon Q / CodeWhisperer",
        "system": "Developer Productivity Tools",
        "metrics": {},
        "source": (
            "AWS has published various studies but specific metrics "
            "could not be sourced"
        ),
        "date": "Unknown",
        "confidence": "LOW",
        "caveat": (
            "Could NOT be independently verified. AWS has claimed 55% "
            "faster task completion but no primary source found."
        ),
    },
 ]
 # ---------------------------------------------------------------------------
 # Failure Modes
 # ---------------------------------------------------------------------------
 # Sourced from academic research, consulting reports, and industry analyses.
 # These rates underscore the gap between AI hype and measurable outcomes.
 # ---------------------------------------------------------------------------
 failure_modes: list[dict] = [
    # MIT Media Lab 2025 — broad survey of corporate AI pilots
    {
        "category": "ai_pilots_zero_roi",
        "rate_percent": 95,
        "source": "MIT Media Lab 2025",
        "confidence": "HIGH",
        "detail": (
            "95% of corporate AI pilots deliver zero measurable return; "
            "only 5% reach production with impact"
        ),
        "scope": "300+ initiatives, 52 org interviews, 153 executive surveys",
    },
    # S&P Global 2025 — corporate AI abandonment trends
    {
        "category": "companies_abandoned_ai",
        "rate_percent": 42,
        "source": "S&P Global 2025",
        "confidence": "HIGH",
        "detail": (
            "42% of companies abandoned most AI initiatives in 2025 "
            "(up from 17% in 2024); 46% of PoCs scrapped before production"
        ),
    },
    # RAND Corporation 2025 — comparative failure rates
    {
        "category": "ai_projects_overall_fail",
        "rate_percent": 80,
        "source": "RAND Corporation 2025",
        "confidence": "MEDIUM",
        "detail": (
            "Over 80% of AI projects fail — twice the failure rate "
            "of non-AI technology projects"
        ),
    },
    # Gartner May 2026 — layoffs vs ROI disconnect
    {
        "category": "layoffs_unrelated_to_roi",
        "source": "Gartner May 2026",
        "confidence": "MEDIUM",
        "detail": (
            "~80% of autonomous-AI deployers cut headcount; "
            "ZERO correlation between layoffs and ROI"
        ),
        "scope": "350 global executives",
    },
    # Gartner prediction — agentic AI project cancellations
    {
        "category": "agentic_ai_projects_cancelled_by_2027",
        "rate_percent": 40,
        "source": "Gartner prediction",
        "confidence": "MEDIUM",
        "detail": (
            "Over 40% of agentic AI projects will be canceled by end of "
            "2027 due to escalating costs, unclear value, or inadequate "
            "risk controls"
        ),
    },
    # McKinsey State of AI 2025 — pilot purgatory
    {
        "category": "pilot_purgatory",
        "source": "McKinsey State of AI 2025",
        "confidence": "HIGH",
        "detail": (
            "88% AI adoption but only 31% scaling — vast majority "
            "stuck in pilots"
        ),
    },
    # MIT Media Lab 2025 — build vs buy outcomes
    {
        "category": "build_vs_buy_success",
        "source": "MIT Media Lab 2025",
        "confidence": "MEDIUM",
        "detail": (
            "External partnership deployments succeed at ~67% "
            "vs ~33% for internal builds"
        ),
    },
    # Multiple sources — shadow AI adoption
    {
        "category": "shadow_ai_adoption",
        "source": "Multiple sources",
        "confidence": "MEDIUM",
        "detail": (
            "90%+ of companies have employees using personal AI tools; "
            "only 40% have official licensing"
        ),
    },
 ]
 # ---------------------------------------------------------------------------
 # Additional Known Successes (from failure-mode research sources)
 # ---------------------------------------------------------------------------
 # These surfaced while researching failure rates but are not
 # among the primary case studies above.
 # ---------------------------------------------------------------------------
 known_successes_outside_main: list[dict] = [
    {"company": "Lumen", "savings_usd": 50_000_000, "metric": "research_time_4hrs_to_15min", "source": "WorkOS article"},
    {"company": "Air India", "metric": "97%_automation_on_4M_queries", "source": "WorkOS article"},
    {"company": "Microsoft", "savings_usd": 500_000_000, "metric": "call_center_ai_savings", "source": "WorkOS article"},
 ]
 # ---------------------------------------------------------------------------
 # Metadata
 # ---------------------------------------------------------------------------
 case_studies_meta = {
    "total_cases": 5,
    "high_confidence_cases": 2,  # Klarna, JPMorgan
    "medium_confidence_cases": 1,  # ServiceNow partner
    "low_confidence_cases": 2,  # Morgan Stanley, Amazon Q
    "sources": [
        "LangChain case study",
        "JPMorgan executive quotes",
        "SnowGeek Solutions",
        "widely-reported claims",
    ],
    "retrieved": "2026-06-04",
 }