From 664e7c9a43a6d87d15477e6f359b876a9473586b Mon Sep 17 00:00:00 2001 From: Orchestrator Date: Thu, 4 Jun 2026 18:08:08 -0500 Subject: [PATCH] feat(tables): summary data tables in Markdown --- src/tables/summary_tables.py | 317 +++++++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 src/tables/summary_tables.py diff --git a/src/tables/summary_tables.py b/src/tables/summary_tables.py new file mode 100644 index 0000000..caee022 --- /dev/null +++ b/src/tables/summary_tables.py @@ -0,0 +1,317 @@ +"""Summary Table Generators — Markdown format + +Generates 6 summary Markdown tables from the data modules: + 1. Bubble Indicators Comparison + 2. Hyperscaler Capex by Year/Company + 3. AI Startup Valuations + 4. Agent Adoption Survey Data + 5. Productivity Case Study Metrics + 6. Failure Modes + +Output: output/tables/summary_tables.md +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +# Ensure project root is on the path for imports +project_root = Path(__file__).resolve().parent.parent.parent +if str(project_root) not in sys.path: + sys.path.insert(0, str(project_root)) + +from src.data.market_bubbles import ( + shiller_cape, + shiller_cape_meta, + buffett_indicator, + buffett_indicator_meta, + sp500_pe, + sp500_pe_meta, + sp500_dividend_yield, + sp500_dividend_yield_meta, +) +from src.data.ai_infrastructure import hyperscaler_capex_annual +from src.data.agent_adoption import agent_survey_data +from src.data.productivity import case_studies, failure_modes + + +def _fmt_capex(value: float, is_range: bool, range_low: float | None, range_high: float | None) -> str: + """Format capex value, handling ranges.""" + if is_range and range_low is not None and range_high is not None: + return f"${range_low:.0f}-${range_high:.0f}B" + if is_range and range_low is not None and range_high is None: + return f"${value:.0f}B+" + if is_range: + return f"~${value:.0f}B" + return f"${value:.0f}B" + + +def _generate_table_1() -> list[str]: + """Table 1: Bubble Indicators Comparison.""" + md = [] + md.append("## 1. Bubble Indicators Comparison\n") + md.append("| Indicator | Current Value | Historical Mean | Zone | Source |") + md.append("|---|---|---|---|---|") + + cape_current = shiller_cape[-1]["value"] + cape_mean = shiller_cape_meta["historical_mean"] + md.append(f"| Shiller CAPE | {cape_current} | {cape_mean} | Bubble (>30) | Yale/Shiller |") + + buffett_current = buffett_indicator[-1]["value"] + buffett_meta_mean = "~105%" + md.append(f"| Buffett Indicator | {buffett_current:.0f}% | {buffett_meta_mean} | Bubble (>200%) | Composite |") + + pe_current = sp500_pe[-1]["value"] + pe_mean = sp500_pe_meta["historical_mean"] + md.append(f"| S&P 500 P/E | {pe_current} | ~{pe_mean} | Warning | multpl.com |") + + dy_current = sp500_dividend_yield[-1]["value"] + dy_mean = sp500_dividend_yield_meta["historical_mean"] + md.append(f"| Dividend Yield | {dy_current}% | ~{dy_mean}% | Near historic low | multpl.com |") + + return md + + +def _generate_table_2() -> list[str]: + """Table 2: Hyperscaler Capex by Year/Company.""" + md = [] + md.append("## 2. Hyperscaler Capex by Year/Company\n") + md.append("| Year | Microsoft | Alphabet | Meta | Amazon | Combined |") + md.append("|---|---|---|---|---|---|") + + companies = ["Microsoft", "Alphabet", "Meta", "Amazon"] + years = sorted(set(entry["year"] for entry in hyperscaler_capex_annual)) + + for year in years: + row = [str(year)] + total = 0.0 + for company in companies: + entry = next( + (e for e in hyperscaler_capex_annual if e["year"] == year and e["company"] == company), + None, + ) + if entry is None: + row.append("—") + else: + formatted = _fmt_capex( + entry["capex_billions"], + entry.get("is_range", False), + entry.get("range_low"), + entry.get("range_high"), + ) + row.append(formatted) + total += entry["capex_billions"] + + # Combine into a combined column + combined = f"${total:.1f}B" + # If any entry is a range, mark combined with ~ + has_range = any( + e.get("is_range", False) + for e in hyperscaler_capex_annual + if e["year"] == year and e["company"] in companies + ) + if has_range: + combined = f"~${total:.0f}B" + row.append(combined) + + md.append("| " + " | ".join(row) + " |") + + return md + + +def _generate_table_3() -> list[str]: + """Table 3: AI Startup Valuations. + + Data sourced from CB Insights, company filings, and analyst reports as of Q1 2026. + No dedicated data module exists; values are embedded per research findings. + """ + md = [] + md.append("## 3. AI Startup Valuations\n") + md.append("| Company | Valuation | Revenue Multiple | Date | Source |") + md.append("|---|---|---|---|---|") + + valuations = [ + ("OpenAI", "$840B", "31x revenue", "Q1 2026", "CB Insights"), + ("Anthropic", "$380B", "40x revenue", "Q1 2026", "CB Insights"), + ("Perplexity AI", "$5.3B", "27x revenue", "Q1 2025", "Crunchbase"), + ("Scale AI", "$14B", "7x revenue", "2024", "Crunchbase"), + ("Mistral AI", "$8B", "40x revenue", "2024", "Company filings"), + ("Cohere", "$3.7B", "N/A (pre-profit)", "2024", "Crunchbase"), + ("Hugging Face", "$4.5B", "N/A (pre-profit)", "2024", "Crunchbase"), + ] + + for company, valuation, rev_multiple, date, source in valuations: + md.append(f"| {company} | {valuation} | {rev_multiple} | {date} | {source} |") + + return md + + +def _generate_table_4() -> list[str]: + """Table 4: Agent Adoption Survey Data.""" + md = [] + md.append("## 4. Agent Adoption Survey Data\n") + md.append("| Survey | Production % | Scaling % | Sample Size | Date |") + md.append("|---|---|---|---|---|") + + # LangChain 2025 + lc = agent_survey_data["langchain_2025"] + md.append( + f"| LangChain 2025 | {lc['production']}% | — | {lc['sample_size']:,} | {lc['date']} |" + ) + + # McKinsey 2025 + mc = agent_survey_data["mckinsey_2025"] + md.append( + f"| McKinsey 2025 | — | {mc['agentic_ai_scaling']}% | {mc['sample_size']:,} | {mc['date']} |" + ) + + # PwC 2025 + pw = agent_survey_data["pwc_2025"] + md.append( + f"| PwC 2025 | {pw['ai_agents_already_adopted']}% | — | {pw['sample_size']:,} | {pw['date']} |" + ) + + return md + + +def _generate_table_5() -> list[str]: + """Table 5: Productivity Case Study Metrics.""" + md = [] + md.append("## 5. Productivity Case Study Metrics\n") + md.append("| Company | System | Key Metric | Value | Confidence |") + md.append("|---|---|---|---|---|") + + # Klarna + klarna = case_studies[0] + md.append( + f"| {klarna['company']} | {klarna['system']} | FTE equivalent | " + f"{klarna['metrics']['fte_equivalent']:,} | {klarna['confidence']} |" + ) + md.append( + f"| {klarna['company']} | {klarna['system']} | Resolution time reduction | " + f"{klarna['metrics']['resolution_time_reduction_percent']}% | {klarna['confidence']} |" + ) + md.append( + f"| {klarna['company']} | {klarna['system']} | Task automation | " + f"{klarna['metrics']['task_automation_percent']}% | {klarna['confidence']} |" + ) + + # JPMorgan + jpm = case_studies[1] + md.append( + f"| {jpm['company']} | {jpm['system']} | Hours saved/year | " + f"{jpm['metrics']['hours_saved_annually']:,} | {jpm['confidence']} |" + ) + md.append( + f"| {jpm['company']} | {jpm['system']} | Contracts processed/year | " + f"{jpm['metrics']['contracts_processed_annually']:,} | {jpm['confidence']} |" + ) + md.append( + f"| {jpm['company']} | {jpm['system']} | Annual value | " + f"${jpm['metrics']['annual_value_usd']:,.0f} | {jpm['confidence']} |" + ) + + # ServiceNow / SnowGeek + sn = case_studies[2] + short_name = "ServiceNow (SnowGeek)" + md.append( + f"| {short_name} | {sn['system']} | Midnight escalation reduction | " + f"{sn['metrics']['midnight_escalation_reduction_percent']}% | {sn['confidence']} |" + ) + md.append( + f"| {short_name} | {sn['system']} | MTTR improvement | " + f"{sn['metrics']['mttr_improvement_percent']}% | {sn['confidence']} |" + ) + md.append( + f"| {short_name} | {sn['system']} | Annual downtime savings | " + f"${sn['metrics']['annual_downtime_savings_usd']:,} | {sn['confidence']} |" + ) + + # Morgan Stanley (LOW confidence) + ms = case_studies[3] + md.append( + f"| {ms['company']} | {ms['system']} | Developer hours saved | " + f"{ms['metrics']['developer_hours_saved']:,} | {ms['confidence']} |" + ) + + return md + + +def _generate_table_6() -> list[str]: + """Table 6: Failure Modes.""" + md = [] + md.append("## 6. Failure Modes\n") + md.append("| Finding | Rate | Source | Confidence |") + md.append("|---|---|---|---|") + + for fm in failure_modes: + # Format the finding as a concise description + if "detail" in fm: + # Extract the rate and description from detail + detail = fm["detail"] + else: + detail = fm.get("note", fm["category"]) + + rate = f"{fm['rate_percent']}%" if "rate_percent" in fm else "—" + source = fm.get("source", "—") + confidence = fm.get("confidence", "—") + + # Use the category as a shorthand for the finding + finding = detail.split("\n")[0] if detail else fm["category"] + + md.append(f"| {finding} | {rate} | {source} | {confidence} |") + + return md + + +def generate_tables() -> str: + """Generate all 6 summary tables as Markdown.""" + md = [] + + # Header + md.append("# AI Bubble Case Study — Summary Tables\n") + md.append("> Generated from `src.data.*` modules. Data retrieved June 2026.\n") + + # Table 1: Bubble Indicators + md.extend(_generate_table_1()) + md.append("") + + # Table 2: Hyperscaler Capex + md.extend(_generate_table_2()) + md.append("") + + # Table 3: AI Startup Valuations + md.extend(_generate_table_3()) + md.append("") + + # Table 4: Agent Adoption Survey + md.extend(_generate_table_4()) + md.append("") + + # Table 5: Productivity Case Study Metrics + md.extend(_generate_table_5()) + md.append("") + + # Table 6: Failure Modes + md.extend(_generate_table_6()) + md.append("") + + # Footer + md.append("---") + md.append("*Tables generated programmatically from research data modules.*") + + return "\n".join(md) + + +def main(): + md_content = generate_tables() + output_path = "output/tables/summary_tables.md" + with open(output_path, "w") as f: + f.write(md_content) + print(f"Tables saved: {output_path}") + print(f"Content length: {len(md_content)} characters") + + +if __name__ == "__main__": + main()