diff --git a/src/battlecards/claim_extractor.py b/src/battlecards/claim_extractor.py index 3f7a080..730da5c 100644 --- a/src/battlecards/claim_extractor.py +++ b/src/battlecards/claim_extractor.py @@ -6,9 +6,12 @@ claim/evidence/implication triples suitable for FIA card assembly. from __future__ import annotations +import importlib +import importlib.util +import json import re from pathlib import Path -from typing import Optional +from typing import Any, Optional class ClaimExtractor: @@ -16,12 +19,14 @@ class ClaimExtractor: Methods ------- - extract_from_narrative(narrative_path: str) -> list[dict] + parse_narrative(narrative_path: str) -> list[dict] Parse a narrative Markdown file for claim triples. extract_from_data(data_module_path: str) -> list[dict] Extract quantified claims from a Python data module. map_to_cards(claims: list[dict]) -> dict Map extracted claims to card numbers (1-8). + export_cards(cards_path: str) -> dict + Read claims.json and return structured card data. Claim dict format ----------------- @@ -31,26 +36,36 @@ class ClaimExtractor: "claim": str, "evidence": str, "source": str, + "confidence": str, # optional } """ # Card number to topic mapping for heuristic assignment - _CARD_TOPICS = { - 1: ("valuation", "cape", "market cap", "shiller", "p/e"), - 2: ("infrastructure", "data center", "hyperscaler", "capex"), - 3: ("gpu", "utilization", "tensor", "compute"), - 4: ("startup", "funding", "venture", "valuation disconnect"), - 5: ("enterprise", "deployment", "adoption", "production"), - 6: ("developer", "coding", "programming", "ide"), - 7: ("quality", "security", "vulnerability", "bug"), - 8: ("productivity", "long-term", "trajectory", "efficiency"), + _CARD_TOPICS: dict[int, tuple[str, ...]] = { + 1: ("valuation", "cape", "market cap", "shiller", "p/e", "dividend"), + 2: ("infrastructure", "data center", "hyperscaler", "capex", "nvidia"), + 3: ("gpu", "utilization", "tensor", "compute", "idle"), + 4: ("startup", "funding", "venture", "openai", "anthropic", "mistral"), + 5: ("enterprise", "deployment", "klarna", "jpmorgan", "servicenow", "production"), + 6: ("developer", "coding", "programming", "ide", "copilot", "github"), + 7: ("quality", "security", "vulnerability", "bug", "dora"), + 8: ( + "productivity", + "long-term", + "trajectory", + "efficiency", + "accenture", + "microsoft research", + ), } - def extract_from_narrative( - self, narrative_path: str - ) -> list[dict]: + def parse_narrative(self, narrative_path: str) -> list[dict]: """Parse a Markdown narrative for claim/evidence/implication triples. + Reads the narrative file and extracts bullet points and key + statements that contain quantitative data, classifying each + into fact, impact, or act sections and mapping to card numbers. + Parameters ---------- narrative_path : str @@ -103,9 +118,7 @@ class ClaimExtractor: return claims - def extract_from_data( - self, data_module_path: str - ) -> list[dict]: + def extract_from_data(self, data_module_path: str) -> list[dict]: """Extract quantified claims from a Python data module. Reads module-level list[dict] or dict constants and @@ -163,6 +176,62 @@ class ClaimExtractor: return claims + def extract_from_data_modules( + self, + market_bubbles_module: Optional[str] = None, + ai_infra_module: Optional[str] = None, + agent_adoption_module: Optional[str] = None, + productivity_module: Optional[str] = None, + ) -> list[dict]: + """Extract cross-referenced data points from data modules. + + Dynamically imports the specified data modules and extracts + key numeric values as claims with proper source attribution. + + Parameters + ---------- + market_bubbles_module : str, optional + Module path for market_bubbles data. + ai_infra_module : str, optional + Module path for ai_infrastructure data. + agent_adoption_module : str, optional + Module path for agent_adoption data. + productivity_module : str, optional + Module path for productivity data. + + Returns + ------- + list[dict] + List of cross-referenced claim dicts from data modules. + """ + claims: list[dict] = [] + + # Market bubbles data -> Card 1 + if market_bubbles_module: + mod = self._import_module(market_bubbles_module) + if mod: + claims.extend(self._extract_market_bubble_claims(mod)) + + # AI infrastructure data -> Cards 2, 3 + if ai_infra_module: + mod = self._import_module(ai_infra_module) + if mod: + claims.extend(self._extract_infrastructure_claims(mod)) + + # Agent adoption data -> Cards 5, 6, 7 + if agent_adoption_module: + mod = self._import_module(agent_adoption_module) + if mod: + claims.extend(self._extract_adoption_claims(mod)) + + # Productivity data -> Cards 5, 8 + if productivity_module: + mod = self._import_module(productivity_module) + if mod: + claims.extend(self._extract_productivity_claims(mod)) + + return claims + def map_to_cards(self, claims: list[dict]) -> dict: """Map a list of claims to card numbers (1-8). @@ -186,6 +255,437 @@ class ClaimExtractor: return card_map + def export_cards(self, cards_path: str) -> dict: + """Read claims.json and return structured card data. + + Parameters + ---------- + cards_path : str + Path to the claims.json file. + + Returns + ------- + dict + Parsed card data with metadata. + """ + path = Path(cards_path) + + if not path.exists(): + return {} + + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + def count_claims(self, cards_data: dict) -> dict: + """Count claims per card and section. + + Parameters + ---------- + cards_data : dict + Parsed cards data from claims.json. + + Returns + ------- + dict + Summary counts per card and overall. + """ + summary: dict[str, Any] = {} + cards = cards_data.get("cards", {}) + + for card_id, card in cards.items(): + fact_count = len(card.get("fact", [])) + impact_count = len(card.get("impact", [])) + act_count = len(card.get("act", [])) + total = fact_count + impact_count + act_count + + summary[f"card_{card_id}"] = { + "title": card.get("title", f"Card {card_id}"), + "fact": fact_count, + "impact": impact_count, + "act": act_count, + "total": total, + } + + summary["total_cards"] = len(cards) + summary["total_claims"] = sum( + v["total"] for v in summary.values() if isinstance(v, dict) and "total" in v + ) + + return summary + + # ----------------------------------------------------------------------- + # Data module extraction helpers + # ----------------------------------------------------------------------- + + def _import_module(self, module_path: str) -> Optional[Any]: + """Dynamically import a Python module from a file path.""" + try: + path = Path(module_path) + if not path.exists(): + return None + + spec = importlib.util.spec_from_file_location(path.stem, str(path)) + if spec is None or spec.loader is None: + return None + + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + except Exception: + return None + + def _extract_market_bubble_claims(self, mod: Any) -> list[dict]: + """Extract claims from market_bubbles data module.""" + claims: list[dict] = [] + + # Shiller CAPE + cape_data = getattr(mod, "shiller_cape", None) + cape_meta = getattr(mod, "shiller_cape_meta", {}) + if cape_data and isinstance(cape_data, list): + latest = cape_data[-1] if cape_data else {} + mean_val = cape_meta.get("historical_mean", "N/A") + peak_val = max(d.get("value", 0) for d in cape_data) if cape_data else 0 + peak_year = next( + (d.get("year", "?") for d in cape_data if d.get("value") == peak_val), + "?", + ) + claims.append( + { + "card_number": 1, + "section": "fact", + "claim": f"Shiller CAPE current: {latest.get('value', 'N/A')}, " + f"historical mean: {mean_val}, peak: {peak_val} " + f"(year {peak_year})", + "evidence": f"CAPE {latest.get('year', '?')}: " + f"{latest.get('value', 'N/A')}", + "source": "market_bubbles.shiller_cape", + "confidence": cape_meta.get("confidence", "HIGH"), + } + ) + + # Buffett Indicator + buffett_data = getattr(mod, "buffett_indicator", None) + buffett_meta = getattr(mod, "buffett_indicator_meta", {}) + if buffett_data and isinstance(buffett_data, list): + latest = buffett_data[-1] if buffett_data else {} + claims.append( + { + "card_number": 1, + "section": "fact", + "claim": f"Buffett Indicator: {latest.get('value', 'N/A')}% " + f"(200% danger threshold)", + "evidence": f"{latest.get('year', '?')}: " + f"{latest.get('value', 'N/A')}%", + "source": "market_bubbles.buffett_indicator", + "confidence": buffett_meta.get("confidence", "MEDIUM-HIGH"), + } + ) + + # S&P 500 P/E + pe_data = getattr(mod, "sp500_pe", None) + pe_meta = getattr(mod, "sp500_pe_meta", {}) + if pe_data and isinstance(pe_data, list): + latest = pe_data[-1] if pe_data else {} + mean_val = pe_meta.get("historical_mean", "N/A") + claims.append( + { + "card_number": 1, + "section": "fact", + "claim": f"S&P 500 P/E: {latest.get('value', 'N/A')} " + f"(mean: {mean_val})", + "evidence": f"{latest.get('year', '?')}: " + f"{latest.get('value', 'N/A')}", + "source": "market_bubbles.sp500_pe", + "confidence": pe_meta.get("confidence", "HIGH"), + } + ) + + # Dividend Yield + div_data = getattr(mod, "sp500_dividend_yield", None) + div_meta = getattr(mod, "sp500_dividend_yield_meta", {}) + if div_data and isinstance(div_data, list): + latest = div_data[-1] if div_data else {} + mean_val = div_meta.get("historical_mean", "N/A") + claims.append( + { + "card_number": 1, + "section": "fact", + "claim": f"S&P 500 dividend yield: " + f"{latest.get('value', 'N/A')}% (mean: {mean_val}%)", + "evidence": f"{latest.get('year', '?')}: " + f"{latest.get('value', 'N/A')}%", + "source": "market_bubbles.sp500_dividend_yield", + "confidence": div_meta.get("confidence", "HIGH"), + } + ) + + # Debt ratios + debt_data = getattr(mod, "us_debt_ratios", None) + if debt_data and isinstance(debt_data, list): + latest = debt_data[-1] if debt_data else {} + claims.append( + { + "card_number": 1, + "section": "fact", + "claim": f"Federal debt/GDP: " + f"{latest.get('federal_debt_gdp_percent', 'N/A')}% " + f"(household: " + f"{latest.get('household_debt_gdp_percent', 'N/A')}%)", + "evidence": f"{latest.get('year', '?')}: federal " + f"{latest.get('federal_debt_gdp_percent', 'N/A')}%, " + f"household " + f"{latest.get('household_debt_gdp_percent', 'N/A')}%", + "source": "market_bubbles.us_debt_ratios", + "confidence": "HIGH", + } + ) + + return claims + + def _extract_infrastructure_claims(self, mod: Any) -> list[dict]: + """Extract claims from ai_infrastructure data module.""" + claims: list[dict] = [] + + # Hyperscaler capex + capex_data = getattr(mod, "hyperscaler_capex_annual", None) + if capex_data and isinstance(capex_data, list): + # Sum 2020 and 2026 totals + years = {} + for entry in capex_data: + year = entry.get("year") + if year not in years: + years[year] = 0.0 + years[year] += entry.get("capex_billions", 0) + + y2020 = years.get(2020, 0) + y2026 = years.get(2026, 0) + claims.append( + { + "card_number": 2, + "section": "fact", + "claim": f"Hyperscaler combined capex: " + f"${y2020:.1f}B (2020) -> " + f"${y2026:.0f}B (2026 projected)", + "evidence": f"2020: ${y2020:.1f}B, 2026: ${y2026:.0f}B", + "source": "ai_infrastructure.hyperscaler_capex_annual", + "confidence": "HIGH", + } + ) + + # AI capex share + ai_share = getattr(mod, "hyperscaler_ai_capex_share", None) + if ai_share and isinstance(ai_share, dict): + latest_year = max(ai_share.keys()) + share = ai_share[latest_year] + claims.append( + { + "card_number": 2, + "section": "fact", + "claim": f"AI capex share: " + f"{share.get('low', 'N/A')}-{share.get('high', 'N/A')}% " + f"of hyperscaler spending in {latest_year}", + "evidence": f"{share.get('low')}% to " + f"{share.get('high')}%", + "source": "ai_infrastructure.hyperscaler_ai_capex_share", + "confidence": "MEDIUM", + } + ) + + # NVIDIA revenue + nvidia_data = getattr(mod, "nvidia_revenue", None) + if nvidia_data and isinstance(nvidia_data, list): + first_entry = nvidia_data[0] if nvidia_data else {} + last_entry = nvidia_data[-1] if nvidia_data else {} + + # Get data center or compute revenue + first_dc = first_entry.get( + "data_center_billions", + first_entry.get("compute_billions", 0), + ) + last_dc = last_entry.get( + "data_center_billions", + last_entry.get("compute_billions", 0), + ) + + claims.append( + { + "card_number": 2, + "section": "fact", + "claim": f"NVIDIA data center revenue: " + f"${first_dc:.2f}B ({first_entry.get('fiscal_quarter', '?')}) " + f"-> ${last_dc:.1f}B " + f"({last_entry.get('fiscal_quarter', '?')})", + "evidence": f"{first_entry.get('fiscal_quarter', '?')}: " + f"${first_dc:.2f}B, " + f"{last_entry.get('fiscal_quarter', '?')}: " + f"${last_dc:.1f}B", + "source": "ai_infrastructure.nvidia_revenue", + "confidence": "HIGH", + } + ) + + # Tech layoffs + layoffs = getattr(mod, "tech_layoffs", None) + layoffs_meta = getattr(mod, "layoffs_meta", {}) + if layoffs and isinstance(layoffs, list): + total_cut = layoffs_meta.get("total_jobs_cut_cumulative", 0) + peak_year = layoffs_meta.get("peak_year", "?") + peak_cut = layoffs_meta.get("peak_jobs_cut", 0) + claims.append( + { + "card_number": 2, + "section": "fact", + "claim": f"Tech layoffs: {total_cut:,} cumulative " + f"(peak: {peak_cut:,} in {peak_year})", + "evidence": f"Peak {peak_year}: {peak_cut:,} jobs", + "source": "ai_infrastructure.tech_layoffs", + "confidence": "HIGH", + } + ) + + return claims + + def _extract_adoption_claims(self, mod: Any) -> list[dict]: + """Extract claims from agent_adoption data module.""" + claims: list[dict] = [] + + # Developer AI adoption + dev_data = getattr(mod, "developer_ai_adoption", None) + if dev_data and isinstance(dev_data, list): + for entry in dev_data: + metric = entry.get("metric", "") + value = entry.get("value", 0) + source = entry.get("source", "") + + # Card 6: Developer adoption + if any( + kw in metric + for kw in ["copilot", "daily", "use_or_plan", "regular_ai"] + ): + claims.append( + { + "card_number": 6, + "section": "fact", + "claim": f"{source}: {metric} = {value}", + "evidence": str(value), + "source": f"agent_adoption.developer_ai_adoption", + "confidence": "HIGH", + } + ) + + # Agent survey data + survey_data = getattr(mod, "agent_survey_data", None) + if survey_data and isinstance(survey_data, dict): + for survey_name, metrics in survey_data.items(): + prod_rate = metrics.get("production", None) + if prod_rate is not None: + claims.append( + { + "card_number": 5, + "section": "fact", + "claim": f"{survey_name}: " + f"{prod_rate}% deploying agents in production", + "evidence": f"{prod_rate}%", + "source": f"agent_adoption.agent_survey_data.{survey_name}", + "confidence": "HIGH", + } + ) + + # Code quality issues + quality_data = getattr(mod, "code_quality_in_production", None) + if quality_data and isinstance(quality_data, list): + for entry in quality_data: + finding = entry.get("finding", "") + confidence = entry.get("confidence", "MEDIUM") + claims.append( + { + "card_number": 7, + "section": "fact", + "claim": finding, + "evidence": entry.get("source", "N/A"), + "source": "agent_adoption.code_quality_in_production", + "confidence": confidence, + } + ) + + # Failure modes + failure_data = getattr(mod, "failure_modes", None) + if failure_data and isinstance(failure_data, list): + for entry in failure_data: + category = entry.get("category", "") + rate = entry.get("rate_percent", None) + source = entry.get("source", "") + + if rate is not None: + claims.append( + { + "card_number": 8, + "section": "fact", + "claim": f"{source}: {category} - " + f"{rate}% failure/abandonment rate", + "evidence": f"{rate}%", + "source": f"agent_adoption.failure_modes", + "confidence": entry.get("confidence", "MEDIUM"), + } + ) + + return claims + + def _extract_productivity_claims(self, mod: Any) -> list[dict]: + """Extract claims from productivity data module.""" + claims: list[dict] = [] + + # Case studies + case_data = getattr(mod, "case_studies", None) + if case_data and isinstance(case_data, list): + for case in case_data: + company = case.get("company", "Unknown") + confidence = case.get("confidence", "MEDIUM") + metrics = case.get("metrics", {}) + + # Build metric summary + metric_parts = [] + for k, v in metrics.items(): + if isinstance(v, (int, float)): + metric_parts.append(f"{k}: {v:,}") + elif isinstance(v, str): + metric_parts.append(f"{k}: {v}") + + metric_str = "; ".join(metric_parts[:5]) if metric_parts else "N/A" + + claims.append( + { + "card_number": 5, + "section": "fact", + "claim": f"{company}: {metric_str}", + "evidence": metric_str, + "source": f"productivity.case_studies ({company})", + "confidence": confidence, + } + ) + + # Failure modes + failure_data = getattr(mod, "failure_modes", None) + if failure_data and isinstance(failure_data, list): + for entry in failure_data: + category = entry.get("category", "") + source = entry.get("source", "") + rate = entry.get("rate_percent", None) + + if rate is not None: + claims.append( + { + "card_number": 8, + "section": "fact", + "claim": f"{source}: {category} - {rate}%", + "evidence": entry.get("detail", str(rate)), + "source": f"productivity.failure_modes", + "confidence": entry.get("confidence", "MEDIUM"), + } + ) + + return claims + # ----------------------------------------------------------------------- # Private helpers # ----------------------------------------------------------------------- @@ -197,16 +697,27 @@ class ClaimExtractor: if any( kw in lower for kw in [ - "risk", "impact", "threat", "consequence", "could", - "would", "may lead", "potential", + "risk", + "impact", + "threat", + "consequence", + "could", + "would", + "may lead", + "potential", ] ): return "impact" if any( kw in lower for kw in [ - "should", "recommend", "act", "take action", - "consider", "monitor", "hedge", + "should", + "recommend", + "act", + "take action", + "consider", + "monitor", + "hedge", ] ): return "act" diff --git a/src/battlecards/claims.json b/src/battlecards/claims.json new file mode 100644 index 0000000..22ef2c9 --- /dev/null +++ b/src/battlecards/claims.json @@ -0,0 +1,530 @@ +{ + "cards": { + "1": { + "title": "Market Valuation Extremes", + "cluster": "bubble", + "summary": "The US stock market is trading at historic valuation extremes that mirror previous bubble periods across multiple metrics.", + "fact": [ + { + "claim": "The Shiller CAPE ratio stands at 40.03, more than 2x the historical mean of 17.39 since 1881.", + "evidence": "Yale/Shiller data, 1881-2026 (147 annual data points). Historical mean: 17.39. 2026 value: 40.03. Second-highest in 147-year record after 2000 dot-com peak of 43.77.", + "source": "Yale/Shiller CAPE dataset, retrieved 2026-06-04", + "confidence": "HIGH" + }, + { + "claim": "The Buffett Indicator (US equity market cap / GDP) is at 219%, well above the 200% danger threshold.", + "evidence": "Composite from CEIC, currentmarketvaluation.com, and thebuffettindicator.com. 2026 value: 219%. 1996 warning level: ~105%. 2000 dot-com peak: 147.38%. Series above 200% since 2024.", + "source": "CEIC + currentmarketvaluation.com + thebuffettindicator.com, 2026", + "confidence": "MEDIUM-HIGH" + }, + { + "claim": "The S&P 500 trailing P/E ratio is 29.6 against a historical mean of 17.9.", + "evidence": "multpl.com/Shiller data, 1950-2026. Current 29.6 vs mean 17.9 represents a 65% premium over long-term average. Above 20 for most of the past six years.", + "source": "multpl.com/Shiller S&P 500 P/E ratio, 2026-06-04", + "confidence": "HIGH" + }, + { + "claim": "The S&P 500 dividend yield has fallen to 1.04%, the lowest since the series began in 1950.", + "evidence": "multpl.com/Shiller data, 1950-2026. Current: 1.04%. Historical mean: 3.15%. Lowest reading since 1950.", + "source": "multpl.com/Shiller dividend yield, 2026-06-04", + "confidence": "HIGH" + }, + { + "claim": "Federal debt rose from 33% of GDP in 1980 to approximately 122.6% in 2025.", + "evidence": "FRED series GFDEGDQ188S. Key inflection points: 1980 (33%), 2007 (61%), 2020 (125%), 2025 (122.6%). Limits monetary policy flexibility during a correction.", + "source": "FRED/Macrotrends, 2026-06-04", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "When the CAPE exceeds 30, subsequent 10-year annualized returns tend to be significantly lower than historical averages.", + "evidence": "Dot-com bubble period (CAPE above 40 in 1999-2000) was followed by a 20% decline in nominal terms over the next decade. Current CAPE of 40.03 signals similarly depressed future returns.", + "source": "Shiller CAPE historical analysis", + "confidence": "HIGH" + }, + { + "claim": "The combination of elevated equity valuations and high sovereign debt creates a fragile macroeconomic environment.", + "evidence": "Federal debt at 122.6% of GDP constrains government ability to deploy stimulus. If AI bubble corrects sharply, policy tools are limited, potentially amplifying the severity of any correction.", + "source": "FRED debt data + macroeconomic analysis", + "confidence": "HIGH" + }, + { + "claim": "AI spending is amplifying the existing market bubble by driving speculative capital into technology equities.", + "evidence": "AI startup valuations (OpenAI $840B, Anthropic $380B) are priced into broader market indices. The narrative of inevitable AI disruption justifies extraordinary valuations across the tech sector.", + "source": "CB Insights Q1 2026, market analysis", + "confidence": "MEDIUM" + } + ], + "act": [ + { + "claim": "Lead with valuation data as the primary signal of bubble conditions.", + "evidence": "Multiple converging metrics (CAPE 40.03, Buffett 219%, P/E 29.6, dividend yield 1.04%) all independently point to overvaluation. No single metric is sufficient, but together they paint an unambiguous picture.", + "source": "Synthesis of market_bubbles.py datasets A, B, C, D, H", + "confidence": "HIGH" + }, + { + "claim": "Key question: Is the AI revenue growth actually justifying current market pricing?", + "evidence": "The narrative of AI-driven disruption has justified extraordinary valuations. However, the disconnect between price and underlying value remains significant. AI companies collectively have not yet generated revenue commensurate with their combined valuations.", + "source": "CB Insights valuation data + revenue analysis", + "confidence": "HIGH" + }, + { + "claim": "Counter-argument: Dot-com parallel suggests infrastructure built during the bubble will endure.", + "evidence": "Internet and telecom bubbles of the 1990s left behind foundational infrastructure (fiber optic cables, cellular networks) that enabled subsequent decades of innovation. The AI infrastructure buildout may follow a similar pattern.", + "source": "Historical precedent analysis, Section 4 of narrative", + "confidence": "HIGH" + } + ] + }, + "2": { + "title": "AI Infrastructure Buildout", + "cluster": "bubble", + "summary": "Combined hyperscaler capital expenditure has surged tenfold from 2020 to 2026, representing one of the largest capital deployment cycles in technology history.", + "fact": [ + { + "claim": "Combined hyperscaler capex grew from $55.3B in 2020 to a projected $605B in 2026.", + "evidence": "Microsoft $100B, Alphabet $175-185B, Meta $115-135B, Amazon $200B projected for 2026. Tenfold increase in six years. Q1 2026 already exceeded $130B combined (run rate >$520B annually).", + "source": "ValueAddVC, SEC filings, ai_infrastructure.py Dataset E, 2026-06", + "confidence": "HIGH" + }, + { + "claim": "AI-related capex is estimated at 85-90% of total hyperscaler spending in 2026.", + "evidence": "Roughly $514-545B of the projected $605B is devoted to AI infrastructure. Up from 50-60% in 2023.", + "source": "ValueAddVC estimates, ai_infrastructure.py hyperscaler_ai_capex_share", + "confidence": "MEDIUM" + }, + { + "claim": "NVIDIA data center revenue climbed from $1.57B in FY2020 Q1 to $75.2B in FY2027 Q1.", + "evidence": "FY2020-Q1: $1.57B. FY2024-Q4: $18.72B. FY2025-Q4: $39.25B. FY2026-Q4: $62.3B. FY2027-Q1 (new segments): compute $60.4B + networking $14.8B + edge $6.4B = $81.62B total. Year-over-year growth decelerating from 364% (2023) to ~83% (2027 projected).", + "source": "SEC 10-Q filings, NVIDIA IR, ai_infrastructure.py Dataset F", + "confidence": "HIGH" + }, + { + "claim": "Tech debt surged to $121B in 2025, approximately four times the five-year average.", + "evidence": "Accelerated pace of AI infrastructure deployment has generated significant technical debt through shortcuts, temporary solutions, and deferred maintenance. Creates structural risk for future innovation and security.", + "source": "Narrative Section 3, chart 06_tech_debt.png", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "Massive capital commitment creates an infrastructure overhang regardless of valuation outcomes.", + "evidence": "The GPU clusters, data centers, and networking fabric being deployed today will exist regardless of what happens to current valuations. Parallel to telecom and internet infrastructure buildouts of previous eras.", + "source": "Narrative Section 4, historical precedent analysis", + "confidence": "HIGH" + }, + { + "claim": "Diminishing returns are likely as the infrastructure buildout matures.", + "evidence": "NVIDIA growth deceleration from 364% to ~83% signals potential plateau. While still representing substantial growth, the rate of acceleration is declining, suggesting the easy-growth phase of infrastructure investment may be ending.", + "source": "ai_infrastructure.py Dataset F growth rate analysis", + "confidence": "MEDIUM" + }, + { + "claim": "The accelerated deployment pace generates compounding technical debt.", + "evidence": "$121B tech debt spike represents shortcuts in codebases and systems. Creates structural risk: may slow future innovation, increase vulnerability to security incidents, and amplify correction costs.", + "source": "Narrative Section 3, tech debt analysis", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Question the efficiency of capital allocation given the scale of spending.", + "evidence": "$605B in projected 2026 capex with 85-90% devoted to AI infrastructure. The economic justification requires scrutiny: is this level of spending generating proportional returns, or is it driven by competitive anxiety and FOMO?", + "source": "ValueAddVC projections + utilization analysis", + "confidence": "HIGH" + }, + { + "claim": "Compare to dot-com infrastructure buildout for historical context.", + "evidence": "Dot-com bubble saw massive investment in fiber optic cables, data centers, and networking infrastructure. Most companies failed, but the infrastructure became the backbone of the digital economy. Similar pattern likely in AI.", + "source": "Narrative Section 4, historical precedent", + "confidence": "HIGH" + } + ] + }, + "3": { + "title": "GPU Utilization Paradox", + "cluster": "bubble", + "summary": "Approximately $295B has been spent on AI infrastructure at ~5% average GPU utilization, implying ~$280B in idle computing capacity.", + "fact": [ + { + "claim": "Over $295B has been spent on AI-related infrastructure at an average GPU utilization rate of approximately 5%.", + "evidence": "Aggregate infrastructure spending estimate across hyperscaler capex, enterprise AI purchases, and GPU procurement. 5% utilization rate derived from industry surveys and data center monitoring.", + "source": "Narrative Section 3, GPU Utilization Paradox subsection", + "confidence": "MEDIUM" + }, + { + "claim": "Approximately $280B in computing capacity sits largely idle in data centers worldwide.", + "evidence": "$295B total spend minus ~5% utilization = ~$280B effectively wasted. This represents one of the largest capital inefficiencies in recent technology history.", + "source": "Narrative Section 3, utilization analysis", + "confidence": "MEDIUM" + }, + { + "claim": "Underutilization stems from overprovisioning, training-inference imbalance, organizational friction, and economic moat building.", + "evidence": "Four primary drivers: (1) companies buying capacity to secure supply rather than for current workloads; (2) GPU clusters optimized for training not efficiently used for inference; (3) enterprises lack talent/processes to deploy effectively; (4) hyperscalers building competitive barriers regardless of economics.", + "source": "Narrative Section 3, four-factor analysis", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "Enormous capital waste undermines the economic case for continued AI infrastructure spending.", + "evidence": "$280B in idle capacity represents misallocated capital that could have generated returns elsewhere. If the investment cannot be justified by actual utilization, the economic basis for continued spending becomes increasingly precarious.", + "source": "Narrative Section 3, economic analysis", + "confidence": "HIGH" + }, + { + "claim": "The utilization gap represents a significant ROI crisis for AI infrastructure investors.", + "evidence": "5% utilization means 95% of purchased capacity generates no revenue. For infrastructure investors and hyperscalers, this represents an enormous gap between capital deployed and revenue generated.", + "source": "GPU utilization analysis + hyperscaler capex data", + "confidence": "HIGH" + }, + { + "claim": "GPU utilization paradox is perhaps the clearest single indicator of the bubble.", + "evidence": "The infrastructure buildout is being driven more by speculation and competitive anxiety than by genuine demand for computing resources. If demand does not materialize, correction will be severe.", + "source": "Narrative Section 3, concluding analysis", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Highlight the utilization gap as a critical risk indicator.", + "evidence": "5% utilization on $295B of infrastructure spending is the single most concrete evidence of overinvestment. This metric cuts through the narrative of inevitable growth and exposes the fundamental disconnect between spending and demand.", + "source": "GPU utilization data synthesis", + "confidence": "HIGH" + }, + { + "claim": "Question the efficiency of AI spending in light of underutilization.", + "evidence": "If only 5% of purchased GPU capacity is being utilized, organizations should be examining whether alternative approaches (cloud rental, inference optimization, workload scheduling) would deliver better ROI than outright infrastructure ownership.", + "source": "Utilization analysis + industry best practices", + "confidence": "HIGH" + } + ] + }, + "4": { + "title": "Startup Valuation Disconnect", + "cluster": "bubble", + "summary": "AI startup valuations have reached extraordinary levels with revenue multiples of 31x-40x, historically unprecedented for pre-profit companies.", + "fact": [ + { + "claim": "OpenAI is valued at $840B with a 31x revenue multiple; Anthropic at $380B with 40x revenue.", + "evidence": "CB Insights Q1 2026 data. OpenAI: $840B valuation, 31x revenue. Anthropic: $380B, 40x revenue. Perplexity AI: $5.3B, 27x. Scale AI: $14B, 7x. Mistral AI: $8B, 40x.", + "source": "CB Insights, Q1 2026, narrative Section 2", + "confidence": "MEDIUM" + }, + { + "claim": "Revenue multiples of 31x-40x are historically unprecedented for pre-profit companies.", + "evidence": "During the dot-com bubble, even the most speculative internet companies rarely sustained revenue multiples above 50x. Those valuations were quickly corrected. AI companies are pricing in multi-decade market dominance assumptions.", + "source": "Dot-com historical comparison, narrative Section 2", + "confidence": "HIGH" + }, + { + "claim": "The AI sector is effectively pricing in the assumption that these companies will dominate a multi-trillion-dollar market for decades.", + "evidence": "Combined AI startup valuations exceed $1.2T (OpenAI $840B + Anthropic $380B + others). Current combined revenue is a fraction of this. The implied future revenue trajectory required to justify these valuations is extraordinary.", + "source": "CB Insights valuation data + revenue analysis", + "confidence": "MEDIUM" + } + ], + "impact": [ + { + "claim": "Valuations are fundamentally detached from near-term financial fundamentals.", + "evidence": "31x-40x revenue multiples for companies that are not yet profitable represent a complete disconnect between price and value. If growth disappoints even slightly, the repricing could be devastating.", + "source": "CB Insights data + financial analysis", + "confidence": "HIGH" + }, + { + "claim": "Crash risk is elevated if growth projections fail to materialize.", + "evidence": "Dot-com companies with similar multiples saw rapid corrections. Pets.com, WebVan, and others lost nearly all their value within months. AI startups face the same risk if they cannot demonstrate sustainable revenue growth.", + "source": "Dot-com historical comparison", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Compare AI startup valuations to dot-com era benchmarks.", + "evidence": "1999-2000: internet companies with 50x+ revenue multiples collapsed. 2026: AI companies with 31-40x multiples face similar overvaluation. The historical parallel suggests inevitable correction.", + "source": "Dot-com bubble historical data", + "confidence": "HIGH" + }, + { + "claim": "Highlight the revenue reality against the valuation narrative.", + "evidence": "OpenAI's $840B valuation implies annual revenue of ~$27B at 31x multiple. Anthropic's $380B at 40x implies ~$9.5B. Both companies are nowhere near these revenue levels, making current valuations unsustainable without exponential growth.", + "source": "Revenue multiple analysis", + "confidence": "HIGH" + } + ] + }, + "5": { + "title": "Real-World Enterprise Deployment", + "cluster": "utility", + "summary": "AI agents are moving beyond experimentation into genuine production deployment, with verified productivity gains in specific use cases.", + "fact": [ + { + "claim": "Klarna's AI assistant handles 2.5M daily transactions with ~700 FTE equivalent capacity.", + "evidence": "LangGraph + LangSmith deployment. 85M active users, 80% reduction in resolution time, 70% task automation. HIGH confidence based on LangChain official documentation.", + "source": "LangChain case study, Feb 2025, productivity.py case_studies[0]", + "confidence": "HIGH" + }, + { + "claim": "JPMorgan COiN processes 12,000 contracts annually, saving ~$150M per year.", + "evidence": "Extracts 150 attributes per document with near-zero error rates. Saves approximately 360,000 hours per year (173 FTE equivalent). Launched 2017, widely cited across multiple sources.", + "source": "JPMorgan executive quotes, productivity.py case_studies[1]", + "confidence": "HIGH" + }, + { + "claim": "ServiceNow partner case shows 73% reduction in midnight escalations and $2.3M annual downtime savings.", + "evidence": "SnowGeek Solutions (mid-size manufacturer) deploying Now Assist + Agentic AI for IT operations. 65% improvement in MTTR. MEDIUM confidence from partner rather than ServiceNow directly.", + "source": "SnowGeek Solutions partner case study, Q4 2025, productivity.py case_studies[2]", + "confidence": "MEDIUM" + }, + { + "claim": "57.3% of organizations report deploying agents in production with mature engineering practices.", + "evidence": "LangChain State of Agent Engineering, Nov-Dec 2025 (1,340 respondents). 89% have observability, 71.5% have full tracing, 75% using multi-model deployments.", + "source": "LangChain State of Agent Engineering 2025, agent_adoption.py agent_survey_data", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "Real ROI exists in specific, well-defined deployments.", + "evidence": "Klarna ($60M equivalent), JPMorgan ($150M/year), and ServiceNow ($2.3M/year) demonstrate measurable productivity gains. These case studies represent the leading edge of AI deployment.", + "source": "Case study synthesis, productivity.py", + "confidence": "HIGH" + }, + { + "claim": "Production maturity is accelerating with observability and multi-model strategies.", + "evidence": "High rates of observability (89%), full tracing (71.5%), and multi-model deployment (75%) suggest organizations are moving past superficial experimentation toward serious engineering practices.", + "source": "LangChain State of Agent Engineering 2025", + "confidence": "HIGH" + }, + { + "claim": "Productivity gains are measurable and quantifiable.", + "evidence": "Concrete metrics: 700 FTE equivalent (Klarna), 173 FTE equivalent (JPMorgan), 73% escalation reduction (ServiceNow). These are not abstract claims but documented operational improvements.", + "source": "Case study metrics compilation", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Use verified case studies as evidence of genuine AI utility.", + "evidence": "The Klarna and JPMorgan cases carry HIGH confidence ratings based on publicly documented sources. These represent the most credible evidence of AI productivity gains in production environments.", + "source": "productivity.py case_studies meta analysis", + "confidence": "HIGH" + }, + { + "claim": "Focus on verified metrics rather than vendor self-reports.", + "evidence": "Morgan Stanley's 280K developer hours saved claim carries LOW confidence and could not be independently verified. The distinction between verified and unverified claims is critical for honest assessment.", + "source": "Narrative Section 5, confidence analysis", + "confidence": "HIGH" + } + ] + }, + "6": { + "title": "Developer Adoption Reality", + "cluster": "utility", + "summary": "AI tool adoption among software developers is now pervasive, with 84% using or planning to use AI tools and 22% of merged code being AI-authored.", + "fact": [ + { + "claim": "GitHub Copilot has 20M users (4.7M paid) with 90% Fortune 100 adoption.", + "evidence": "GitHub all-time users: 20,000,000. Paid subscribers: 4,700,000 (Jan 2026). 90% of Fortune 100 companies have adopted GitHub Copilot.", + "source": "GitHub data, agent_adoption.py developer_ai_adoption", + "confidence": "HIGH" + }, + { + "claim": "84% of developers use or plan to use AI tools; 51% use them daily.", + "evidence": "Stack Overflow 2025 (~70,000 respondents): 84% use or plan to use, 51% daily use. JetBrains 2025 (~30,000 respondents): 85% regular AI usage, 62% rely on at least one coding assistant.", + "source": "Stack Overflow 2025 + JetBrains 2025 surveys, agent_adoption.py", + "confidence": "HIGH" + }, + { + "claim": "22% of merged code is AI-authored, with ~30% acceptance rate for Copilot suggestions.", + "evidence": "DX DevCycle Q4 2025: 22% of merged code is AI-authored. 91% of active repositories show AI adoption. GitHub Copilot acceptance rate ~30%, with 88% of accepted code retained.", + "source": "DX DevCycle Q4 2025, GitHub/Microsoft study, agent_adoption.py", + "confidence": "HIGH" + }, + { + "claim": "Accenture RCT found measurable productivity improvements with GitHub Copilot.", + "evidence": "8.69% increase in PRs per developer, 11% increase in PR merge rate, 84% increase in successful builds. Randomized controlled trial methodology provides empirical grounding.", + "source": "Accenture RCT study, agent_adoption.py real_world_productivity_impact", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "AI tool adoption among developers is real and accelerating.", + "evidence": "Multiple independent surveys (Stack Overflow, JetBrains, GitHub, DX DevCycle) all converge on high adoption rates. 91% of active repos show AI adoption. The trend is not a niche phenomenon but industry-wide.", + "source": "Multi-source survey convergence", + "confidence": "HIGH" + }, + { + "claim": "Quality concerns persist despite high adoption rates.", + "evidence": "~30% acceptance rate means 70% of AI suggestions are rejected. 71% of developers do not merge AI code without manual review. 97% use AI tools before company policies allow (shadow IT).", + "source": "developer_sentiment data, agent_adoption.py", + "confidence": "MEDIUM" + } + ], + "act": [ + { + "claim": "Present adoption data honestly with quality caveats.", + "evidence": "High adoption (84% of developers) does not equal high trust. The 30% acceptance rate and 71% manual review rate indicate that developers remain skeptical of AI-generated code quality.", + "source": "Adoption data + quality metrics synthesis", + "confidence": "HIGH" + }, + { + "claim": "Acknowledge that AI is an assistive tool, not a replacement for skilled engineering.", + "evidence": "Productivity gains are real but bounded. Accenture RCT shows ~9% PR increase, not a 10x improvement. AI excels at code completion, boilerplate, and documentation but cannot replace architecture, debugging, and system design.", + "source": "Accenture RCT + narrative Section 5 analysis", + "confidence": "HIGH" + } + ] + }, + "7": { + "title": "Code Quality and Security Caveats", + "cluster": "risk", + "summary": "AI-generated code introduces significant security vulnerabilities and quality issues, with 48% of AI-generated code containing potential vulnerabilities.", + "fact": [ + { + "claim": "48% of AI-generated code contains potential security vulnerabilities.", + "evidence": "Multiple industry analyses. 29.1% of AI-generated Python code contains security weaknesses spanning 43 CWE categories. 24.2% of AI-generated JavaScript code has security weaknesses.", + "source": "Academic study of 733 code snippets, agent_adoption.py code_quality_in_production", + "confidence": "HIGH" + }, + { + "claim": "AI-coauthored pull requests have approximately 1.7x more issues than non-AI PRs.", + "evidence": "CodeRabbit / DX DevCycle December 2025 study. AI assistance introduces additional complexity and error surface that human reviewers must contend with.", + "source": "CodeRabbit Dec 2025 / DX DevCycle, agent_adoption.py code_quality_in_production", + "confidence": "HIGH" + }, + { + "claim": "40% of Copilot-generated programs are flagged for insecure code.", + "evidence": "GitHub Copilot research. 6.4% secret leakage rate in Copilot repositories — 40% higher than the 4.6% baseline.", + "source": "GitHub Copilot research + academic security research, agent_adoption.py", + "confidence": "HIGH" + }, + { + "claim": "Google DORA 2024 found AI use causes a 7.2% drop in delivery stability.", + "evidence": "Teams using AI tools experienced less reliable software delivery than those that didn't. Delivery stability is a key metric in DevOps performance.", + "source": "Google DORA 2024 report, agent_adoption.py code_quality_in_production", + "confidence": "HIGH" + } + ], + "impact": [ + { + "claim": "AI-assisted development introduces real security risks in production systems.", + "evidence": "When AI-generated code with vulnerabilities is integrated into production, the vulnerabilities propagate through entire architectures. 48% vulnerability rate is not acceptable for critical systems.", + "source": "Security vulnerability analysis, narrative Section 5", + "confidence": "HIGH" + }, + { + "claim": "Long-term technical debt accumulates from AI-generated code integration.", + "evidence": "1.7x more issues in AI-coauthored PRs suggests that AI assistance may be introducing complexity that compounds over time. Maintenance burden increases as AI-generated code becomes embedded in legacy systems.", + "source": "CodeRabbit study + tech debt analysis", + "confidence": "HIGH" + }, + { + "claim": "Delivery reliability suffers when teams adopt AI tools without adequate review processes.", + "evidence": "7.2% drop in delivery stability is a significant operational impact. Less reliable software delivery increases risk of outages, customer complaints, and security incidents.", + "source": "Google DORA 2024 report", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Acknowledge real risks and recommend cautious adoption with mandatory validation.", + "evidence": "48% vulnerability rate and 1.7x more PR issues are not academic concerns — they are production realities. Organizations adopting AI-assisted development must invest in security review processes, code quality gates, and developer training.", + "source": "Security risk assessment + industry best practices", + "confidence": "HIGH" + }, + { + "claim": "AI-generated code should never be deployed without human review and security auditing.", + "evidence": "6.4% secret leakage rate (40% higher than baseline) and 43 CWE categories of vulnerabilities demonstrate that AI tools can expose sensitive credentials and introduce systemic security weaknesses.", + "source": "Academic security research + GitHub Copilot data", + "confidence": "HIGH" + } + ] + }, + "8": { + "title": "Long-Term Productivity Trajectory", + "cluster": "utility", + "summary": "AI-assisted development shows genuine productivity gains of 20-67% in realistic ranges, with gains compounding over time despite significant near-term failure rates.", + "fact": [ + { + "claim": "Realistic productivity gains range from 20-67% depending on context and use case.", + "evidence": "Accenture RCT: 8.69% PR increase, 11% merge rate increase, 84% successful builds increase. Microsoft Research: 20-45% productivity improvement. Broader industry estimates reach up to 67% for specific tasks.", + "source": "Accenture RCT, Microsoft Research 2024-2025, agent_adoption.py", + "confidence": "HIGH" + }, + { + "claim": "95% of corporate AI pilots deliver zero measurable return; only 5% reach production with impact.", + "evidence": "MIT Media Lab 2025, based on 300+ initiatives, 52 organizational interviews, and 153 executive surveys. 72% of AI initiatives fail to reach production (McKinsey). 80% overall AI project failure rate (RAND).", + "source": "MIT Media Lab 2025, McKinsey 2025, RAND 2025, productivity.py failure_modes", + "confidence": "HIGH" + }, + { + "claim": "88% report AI adoption, but only 31% are scaling enterprise-wide.", + "evidence": "McKinsey State of AI 2025: vast majority stuck in pilot purgatory. 40% of agentic AI projects projected to be canceled by end of 2027 (Gartner). 42% of companies abandoned most AI initiatives in 2025 (S&P Global).", + "source": "McKinsey 2025, Gartner prediction, S&P Global 2025", + "confidence": "HIGH" + }, + { + "claim": "External partnership deployments succeed at ~67% vs ~33% for internal builds.", + "evidence": "MIT Media Lab 2025 build-vs-buy analysis. Organizations that partner with external vendors achieve significantly higher success rates than those attempting internal development.", + "source": "MIT Media Lab 2025, productivity.py failure_modes", + "confidence": "MEDIUM" + } + ], + "impact": [ + { + "claim": "AI-assisted development is inevitable, with gains that compound over time.", + "evidence": "Despite high failure rates, the 5% of successful pilots demonstrate that AI can deliver transformative productivity improvements. The organizations that succeed build institutional knowledge and practices that compound.", + "source": "Narrative Section 5, central thesis analysis", + "confidence": "HIGH" + }, + { + "claim": "High failure rates indicate AI requires significant investment and patience.", + "evidence": "95% pilot failure rate and 80% overall project failure rate underscore that AI adoption is not plug-and-play. Organizations must invest in talent, processes, and security to realize returns.", + "source": "Failure mode analysis, MIT Media Lab + RAND data", + "confidence": "HIGH" + }, + { + "claim": "The infrastructure buildout will outlast the valuation bubble.", + "evidence": "Historical precedent from dot-com and telecom bubbles shows that infrastructure built during bubble periods becomes the foundation for transformative innovation. The GPU clusters and data centers will remain valuable even after valuations correct.", + "source": "Narrative central thesis, Section 4 historical analysis", + "confidence": "HIGH" + } + ], + "act": [ + { + "claim": "Frame AI as long-term transformation despite short-term inefficiencies.", + "evidence": "The 20-67% productivity gains in successful deployments, combined with the inevitable nature of AI tool adoption (84% of developers), suggest that the long-term trajectory is positive. Short-term failure rates should be viewed as a maturation cost.", + "source": "Productivity data + adoption trend synthesis", + "confidence": "HIGH" + }, + { + "claim": "Invest in real utility rather than speculation, with realistic expectations.", + "evidence": "The organizations that succeed are those that separate signal from noise: they focus on well-defined use cases, invest in security review, maintain realistic expectations, and prioritize measurable outcomes over marketing hype.", + "source": "Narrative Summary, central recommendations", + "confidence": "HIGH" + } + ] + } + }, + "metadata": { + "extraction_date": "2026-06-04", + "source_narrative": "report/case_narrative.md (438 lines, 7 sections)", + "source_data_modules": [ + "src/data/market_bubbles.py", + "src/data/ai_infrastructure.py", + "src/data/agent_adoption.py", + "src/data/productivity.py" + ], + "total_cards": 8, + "card_clusters": { + "bubble": [1, 2, 3, 4], + "utility": [5, 6, 8], + "risk": [7] + }, + "confidence_levels": ["HIGH", "MEDIUM", "LOW"], + "extraction_method": "ClaimExtractor.parse_narrative + data module cross-reference" + } +}