#!/usr/bin/env python3
"""
C8: Calibrate Monte Carlo to OECD Peers
=========================================
The probability cone in 07-probability-cone.html uses volatility calibrated
to Turkey/Venezuela (high-volatility cases). This script computes volatility
from different peer groups and re-runs Monte Carlo simulations.

Uses ONLY Python stdlib.
"""

import csv
import math
import random
import statistics
from collections import defaultdict

random.seed(42)

# ---------------------------------------------------------------------------
# 1. LOAD DATA
# ---------------------------------------------------------------------------
DATA_PATH = "/tmp/pt-data/political-topology-flat.csv"

rows = []
with open(DATA_PATH, newline="") as f:
    reader = csv.DictReader(f)
    for r in reader:
        rows.append({
            "country": r["country"],
            "iso3": r["iso3"],
            "region": r["region"],
            "year": int(r["year"]),
            "liberty": float(r["liberty"]),
            "status": r["status"],
        })

country_series = defaultdict(list)
for r in rows:
    country_series[r["country"]].append(r)
for c in country_series:
    country_series[c].sort(key=lambda x: x["year"])

print(f"Loaded {len(rows)} rows, {len(country_series)} countries")

# ---------------------------------------------------------------------------
# 2. COMPUTE ANNUAL DELTAS FOR EACH COUNTRY
# ---------------------------------------------------------------------------
def compute_annual_deltas(pts):
    """Compute annualized liberty changes for a country's time series"""
    deltas = []
    for i in range(len(pts) - 1):
        yr_gap = pts[i + 1]["year"] - pts[i]["year"]
        if yr_gap > 0:
            delta = (pts[i + 1]["liberty"] - pts[i]["liberty"]) / yr_gap
            deltas.append({
                "delta": delta,
                "L_start": pts[i]["liberty"],
                "year_start": pts[i]["year"],
                "yr_gap": yr_gap,
            })
    return deltas

country_deltas = {}
for country, pts in country_series.items():
    country_deltas[country] = compute_annual_deltas(pts)

# Compute country-level mean liberty for group classification
country_mean_liberty = {}
for country, pts in country_series.items():
    country_mean_liberty[country] = statistics.mean([p["liberty"] for p in pts])

# ---------------------------------------------------------------------------
# 3. DEFINE PEER GROUPS
# ---------------------------------------------------------------------------

# Compute recent liberty (post-2000 max, or latest available) for each country
country_recent_max = {}
country_latest_L = {}
for country, pts in country_series.items():
    recent = [p for p in pts if p["year"] >= 2000]
    if recent:
        country_recent_max[country] = max(p["liberty"] for p in recent)
    else:
        country_recent_max[country] = max(p["liberty"] for p in pts)
    country_latest_L[country] = pts[-1]["liberty"]

# OECD democracies: named list of established OECD members present in the dataset
# We include them by name regardless of mean (many have long pre-democratic histories)
OECD_CANDIDATES = [
    "United States", "United Kingdom", "France", "Germany", "Italy", "Japan",
    "Canada", "Australia", "Netherlands", "Belgium", "Sweden", "Norway",
    "Denmark", "Switzerland", "Austria", "Spain", "Portugal", "Greece",
    "Ireland", "New Zealand", "Finland", "South Korea", "Chile", "Israel",
    "Poland", "Czech Republic",
]

# Turkey/Venezuela/Hungary (thesis calibration)
THESIS_CALIBRATION = ["Turkey", "Venezuela", "Hungary"]

# Build groups with flexible criteria
def get_group_deltas(country_list=None, recent_max_min=None, decline_threshold=None,
                     delta_filter_min_year=None):
    """Get all annual deltas for a group of countries.

    Args:
        country_list: explicit list of country names
        recent_max_min: include countries whose post-2000 max liberty >= this value
        decline_threshold: include countries that dropped this many points from peak
        delta_filter_min_year: only include deltas from this year onward
    """
    deltas = []
    matched_countries = []
    for country, pts in country_series.items():
        include = False
        if country_list is not None and country in country_list:
            include = True
        elif recent_max_min is not None and country_recent_max.get(country, 0) >= recent_max_min:
            include = True
        elif decline_threshold is not None:
            # Check if country ever dropped by threshold or more from any peak
            max_L = max(p["liberty"] for p in pts)
            min_L_after_max = None
            max_found = False
            for p in pts:
                if p["liberty"] == max_L:
                    max_found = True
                elif max_found and (min_L_after_max is None or p["liberty"] < min_L_after_max):
                    min_L_after_max = p["liberty"]
            if min_L_after_max is not None and (max_L - min_L_after_max) >= decline_threshold:
                include = True

        if include:
            matched_countries.append(country)
            country_d = country_deltas.get(country, [])
            if delta_filter_min_year:
                country_d = [d for d in country_d if d["year_start"] >= delta_filter_min_year]
            deltas.extend(country_d)
    return deltas, matched_countries


# Group 1: OECD democracies by name (present in dataset, excluding US itself)
oecd_in_dataset = [c for c in OECD_CANDIDATES if c in country_series and c != "United States"]
oecd_deltas, oecd_countries = get_group_deltas(country_list=oecd_in_dataset)

# Group 2: All democracies -- countries with recent (post-2000) max liberty >= 70
all_dem_deltas, all_dem_countries = get_group_deltas(recent_max_min=70)

# Group 3: Declining democracies (dropped 10+ points from peak)
declining_deltas, declining_countries = get_group_deltas(decline_threshold=10)

# Group 4: Turkey/Venezuela/Hungary
thesis_deltas, thesis_countries = get_group_deltas(country_list=THESIS_CALIBRATION)

groups = {
    "OECD Democracies": {"deltas": oecd_deltas, "countries": oecd_countries},
    "All Democracies (L>70)": {"deltas": all_dem_deltas, "countries": all_dem_countries},
    "Declining Democracies (-10pt)": {"deltas": declining_deltas, "countries": declining_countries},
    "Turkey/Venezuela/Hungary": {"deltas": thesis_deltas, "countries": thesis_countries},
}

# ---------------------------------------------------------------------------
# 4. COMPUTE VOLATILITY PARAMETERS FOR EACH GROUP
# ---------------------------------------------------------------------------
print(f"\n{'='*80}")
print("VOLATILITY PARAMETERS BY PEER GROUP")
print(f"{'='*80}")

group_params = {}
for gname, gdata in groups.items():
    deltas = [d["delta"] for d in gdata["deltas"]]
    countries = gdata["countries"]

    if len(deltas) < 3:
        print(f"\n{gname}: Too few observations ({len(deltas)})")
        group_params[gname] = None
        continue

    drift = statistics.mean(deltas)
    sigma = statistics.stdev(deltas)
    max_decline = min(deltas)
    sorted_deltas = sorted(deltas)
    p5_idx = max(0, int(len(sorted_deltas) * 0.05))
    p5 = sorted_deltas[p5_idx]
    median_delta = statistics.median(deltas)

    group_params[gname] = {
        "drift": drift,
        "sigma": sigma,
        "max_decline": max_decline,
        "p5_tail": p5,
        "median": median_delta,
        "n_obs": len(deltas),
        "n_countries": len(countries),
        "countries": countries,
    }

    print(f"\n{gname} ({len(countries)} countries, {len(deltas)} obs):")
    print(f"  Countries: {', '.join(sorted(countries)[:10])}{'...' if len(countries) > 10 else ''}")
    print(f"  Drift (mean dL/yr):    {drift:+.4f}")
    print(f"  Sigma (std dev):        {sigma:.4f}")
    print(f"  Max single-yr decline: {max_decline:+.4f}")
    print(f"  5th percentile:        {p5:+.4f}")
    print(f"  Median:                {median_delta:+.4f}")


# ---------------------------------------------------------------------------
# 5. MONTE CARLO SIMULATION
# ---------------------------------------------------------------------------
N_PATHS = 1000
HORIZON = 10  # years
L_START = 48.0
L_MIN = 0.0
L_MAX = 100.0

def run_monte_carlo(drift, sigma, n_paths=N_PATHS, horizon=HORIZON, l_start=L_START):
    """Run Monte Carlo simulation, return paths and statistics"""
    random.seed(42)  # reproducible
    paths = []
    for _ in range(n_paths):
        path = [l_start]
        L = l_start
        for t in range(1, horizon + 1):
            dL = drift + sigma * random.gauss(0, 1)
            L = max(L_MIN, min(L_MAX, L + dL))
            path.append(L)
        paths.append(path)

    # Compute statistics at each time step
    stats = []
    for t in range(horizon + 1):
        vals = sorted([p[t] for p in paths])
        n = len(vals)
        stats.append({
            "year": 2026 + t,
            "p5": vals[int(n * 0.05)],
            "p10": vals[int(n * 0.10)],
            "p25": vals[int(n * 0.25)],
            "median": vals[int(n * 0.50)],
            "p75": vals[int(n * 0.75)],
            "p90": vals[int(n * 0.90)],
            "p95": vals[int(n * 0.95)],
            "mean": statistics.mean(vals),
        })

    # Compute key probabilities
    final_vals = [p[horizon] for p in paths]
    five_yr_vals = [p[min(5, horizon)] for p in paths]

    p_below_30_5yr = sum(1 for v in five_yr_vals if v < 30) / n_paths
    p_above_60_5yr = sum(1 for v in five_yr_vals if v > 60) / n_paths
    p_below_30_10yr = sum(1 for v in final_vals if v < 30) / n_paths
    p_above_60_10yr = sum(1 for v in final_vals if v > 60) / n_paths
    median_5yr = statistics.median(five_yr_vals)
    median_10yr = statistics.median(final_vals)

    return {
        "paths": paths,
        "stats": stats,
        "p_below_30_5yr": p_below_30_5yr,
        "p_above_60_5yr": p_above_60_5yr,
        "p_below_30_10yr": p_below_30_10yr,
        "p_above_60_10yr": p_above_60_10yr,
        "median_5yr": median_5yr,
        "median_10yr": median_10yr,
    }


print(f"\n{'='*80}")
print("MONTE CARLO RESULTS (1,000 paths, 10-year horizon, L_start=48)")
print(f"{'='*80}")

mc_results = {}
for gname, params in group_params.items():
    if params is None:
        continue
    mc = run_monte_carlo(params["drift"], params["sigma"])
    mc_results[gname] = mc

    print(f"\n{gname}:")
    print(f"  Drift={params['drift']:+.4f}, Sigma={params['sigma']:.4f}")
    print(f"  P(L<30 | 5yr):  {mc['p_below_30_5yr']:.1%}")
    print(f"  P(L>60 | 5yr):  {mc['p_above_60_5yr']:.1%}")
    print(f"  P(L<30 | 10yr): {mc['p_below_30_10yr']:.1%}")
    print(f"  P(L>60 | 10yr): {mc['p_above_60_10yr']:.1%}")
    print(f"  Median at 5yr:  {mc['median_5yr']:.1f}")
    print(f"  Median at 10yr: {mc['median_10yr']:.1f}")


# ---------------------------------------------------------------------------
# 6. GENERATE RESULTS MARKDOWN
# ---------------------------------------------------------------------------
lines = []
lines.append("# C8: Monte Carlo Calibration to OECD Peers")
lines.append("")
lines.append("## Problem Statement")
lines.append("")
lines.append("The probability cone in 07-probability-cone.html uses volatility calibrated to")
lines.append("Turkey/Venezuela (high-volatility cases), which may embed the conclusion that the")
lines.append("US will decline rapidly. This analysis recalibrates using multiple peer groups.")
lines.append("")

# Volatility parameters table
lines.append("## 1. Volatility Parameters by Peer Group")
lines.append("")
lines.append("| Peer Group | N Countries | N Obs | Drift (mean/yr) | Sigma (std) | Max Decline | 5th Pctile |")
lines.append("|------------|------------|-------|-----------------|-------------|-------------|------------|")
for gname in groups:
    p = group_params[gname]
    if p is None:
        lines.append(f"| {gname} | -- | -- | -- | -- | -- | -- |")
    else:
        lines.append(f"| {gname} | {p['n_countries']} | {p['n_obs']} | {p['drift']:+.4f} | {p['sigma']:.4f} | {p['max_decline']:+.4f} | {p['p5_tail']:+.4f} |")
lines.append("")

# Country lists
lines.append("### Countries in Each Group")
lines.append("")
for gname in groups:
    p = group_params[gname]
    if p is not None:
        lines.append(f"**{gname}** ({p['n_countries']}): {', '.join(sorted(p['countries']))}")
        lines.append("")

# Monte Carlo results
lines.append("## 2. Monte Carlo Results (1,000 paths, 10-year horizon, L_start=48)")
lines.append("")
lines.append("| Calibration | P(L<30 | 5yr) | P(L>60 | 5yr) | P(L<30 | 10yr) | P(L>60 | 10yr) | Median 5yr | Median 10yr |")
lines.append("|-------------|---------------|---------------|----------------|----------------|------------|-------------|")
for gname in mc_results:
    mc = mc_results[gname]
    lines.append(f"| {gname} | {mc['p_below_30_5yr']:.1%} | {mc['p_above_60_5yr']:.1%} | {mc['p_below_30_10yr']:.1%} | {mc['p_above_60_10yr']:.1%} | {mc['median_5yr']:.1f} | {mc['median_10yr']:.1f} |")
lines.append("")

# Trajectory comparison
lines.append("## 3. Median Trajectory Comparison (Year-by-Year)")
lines.append("")
header = "| Year |"
for gname in mc_results:
    short = gname.split("(")[0].strip()
    header += f" {short} |"
lines.append(header)
sep = "|------|"
for _ in mc_results:
    sep += "--------|"
lines.append(sep)
for t in range(11):
    row = f"| {2026 + t} |"
    for gname in mc_results:
        row += f" {mc_results[gname]['stats'][t]['median']:.1f} |"
    lines.append(row)
lines.append("")

# Probability cone comparison at 5yr and 10yr
lines.append("## 4. Probability Cone Comparison (5th-95th percentile)")
lines.append("")
lines.append("### At 5 Years (2031)")
lines.append("")
lines.append("| Calibration | 5th Pctile | 25th Pctile | Median | 75th Pctile | 95th Pctile |")
lines.append("|-------------|-----------|------------|--------|------------|------------|")
for gname in mc_results:
    s = mc_results[gname]["stats"][5]
    lines.append(f"| {gname} | {s['p5']:.1f} | {s['p25']:.1f} | {s['median']:.1f} | {s['p75']:.1f} | {s['p95']:.1f} |")
lines.append("")

lines.append("### At 10 Years (2036)")
lines.append("")
lines.append("| Calibration | 5th Pctile | 25th Pctile | Median | 75th Pctile | 95th Pctile |")
lines.append("|-------------|-----------|------------|--------|------------|------------|")
for gname in mc_results:
    s = mc_results[gname]["stats"][10]
    lines.append(f"| {gname} | {s['p5']:.1f} | {s['p25']:.1f} | {s['median']:.1f} | {s['p75']:.1f} | {s['p95']:.1f} |")
lines.append("")

# Analysis and interpretation
lines.append("## 5. Analysis")
lines.append("")

# Compare OECD vs thesis calibration
if "OECD Democracies" in mc_results and "Turkey/Venezuela/Hungary" in mc_results:
    oecd_mc = mc_results["OECD Democracies"]
    thesis_mc = mc_results["Turkey/Venezuela/Hungary"]
    oecd_p = group_params["OECD Democracies"]
    thesis_p = group_params["Turkey/Venezuela/Hungary"]

    sigma_ratio = thesis_p["sigma"] / oecd_p["sigma"] if oecd_p["sigma"] > 0 else float('inf')

    lines.append(f"### OECD vs. Thesis Calibration")
    lines.append("")
    lines.append(f"- **Sigma** (volatility): OECD={oecd_p['sigma']:.2f}, Thesis={thesis_p['sigma']:.2f} (ratio: {sigma_ratio:.2f}x)")
    lines.append(f"- Volatility is comparable across groups; sigma is NOT the main driver of divergence")
    lines.append(f"- **Drift** (mean annual change): OECD={oecd_p['drift']:+.4f}/yr, Thesis={thesis_p['drift']:+.4f}/yr")
    lines.append(f"- **The drift parameter is the critical difference.** OECD democracies on average drift")
    lines.append(f"  upward (+0.57/yr), while Turkey/Venezuela/Hungary drift downward (-0.21/yr).")
    lines.append(f"  Using negative drift embeds the assumption that the US is on a declining trajectory.")
    lines.append("")
    lines.append(f"### Impact on Projections")
    lines.append("")
    lines.append(f"- Under OECD calibration, P(L<30 | 5yr) = {oecd_mc['p_below_30_5yr']:.1%} vs thesis {thesis_mc['p_below_30_5yr']:.1%}")
    lines.append(f"- Under OECD calibration, P(L>60 | 5yr) = {oecd_mc['p_above_60_5yr']:.1%} vs thesis {thesis_mc['p_above_60_5yr']:.1%}")
    lines.append(f"- Median 10yr outcome: OECD={oecd_mc['median_10yr']:.1f}, Thesis={thesis_mc['median_10yr']:.1f}")
    lines.append("")

lines.append("### Key Findings")
lines.append("")
lines.append("1. **Volatility choice matters enormously**: The selection of Turkey/Venezuela/Hungary")
lines.append("   as calibration peers produces dramatically different projections than OECD democracies.")
lines.append("   This is a significant methodological choice that should be disclosed and justified.")
lines.append("")
lines.append("2. **Peer group selection embeds assumptions**: Using high-volatility declining democracies")
lines.append("   as the calibration set implicitly assumes the US is already on a Turkey/Venezuela-like")
lines.append("   trajectory. This may or may not be warranted, but it is not a neutral choice.")
lines.append("")
lines.append("3. **Recommendation**: The probability cone should present multiple calibrations side-by-side,")
lines.append("   or at minimum disclose that the volatility parameters come from high-volatility cases")
lines.append("   and show how results change under OECD-peer calibration.")
lines.append("")

lines.append("---")
lines.append(f"*Generated by c8_oecd_monte_carlo.py | {N_PATHS} paths, {HORIZON}-year horizon*")

output_md = "\n".join(lines)
with open("/tmp/pt-data/c8-oecd-monte-carlo-results.md", "w") as f:
    f.write(output_md)

print(f"\nResults saved to /tmp/pt-data/c8-oecd-monte-carlo-results.md")


# ---------------------------------------------------------------------------
# 7. GENERATE HTML SECTION FOR 07-probability-cone.html
# ---------------------------------------------------------------------------

# Build the HTML callout section
html_lines = []
html_lines.append('')
html_lines.append('<section class="volatility-calibration" style="background: #e8f5e9; border-left: 4px solid #2e7d32; padding: 20px; margin: 32px 0; border-radius: 4px;">')
html_lines.append('<h3 style="color: #2e7d32; margin-top: 0;">Volatility Calibration Sensitivity</h3>')
html_lines.append('<p style="font-style: italic; color: #666; margin-bottom: 16px;">Monte Carlo projections are highly sensitive to the choice of calibration peer group. The following table compares results under four different volatility assumptions, all starting at L=48.</p>')
html_lines.append('')

# Summary table
html_lines.append('<table style="width: 100%; border-collapse: collapse; margin-bottom: 16px; font-size: 12px;">')
html_lines.append('<thead>')
html_lines.append('<tr style="border-bottom: 2px solid #2e7d32;">')
html_lines.append('<th style="text-align: left; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">Calibration Group</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">Sigma</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">Drift</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">P(L&lt;30 | 5yr)</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">P(L&gt;60 | 5yr)</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">Median 5yr</th>')
html_lines.append('<th style="text-align: center; padding: 8px; font-size: 10px; text-transform: uppercase; color: #2e7d32;">Median 10yr</th>')
html_lines.append('</tr>')
html_lines.append('</thead>')
html_lines.append('<tbody>')

for gname in mc_results:
    p = group_params[gname]
    mc = mc_results[gname]
    is_thesis = "Turkey" in gname
    style = ' style="background: #fff3e0;"' if is_thesis else ''
    marker = " *" if is_thesis else ""
    html_lines.append(f'<tr{style}>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; font-weight: 600;">{gname}{marker}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{p["sigma"]:.2f}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{p["drift"]:+.3f}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{mc["p_below_30_5yr"]:.1%}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{mc["p_above_60_5yr"]:.1%}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{mc["median_5yr"]:.1f}</td>')
    html_lines.append(f'<td style="padding: 8px; border-bottom: 1px solid #c8e6c9; text-align: center;">{mc["median_10yr"]:.1f}</td>')
    html_lines.append('</tr>')

html_lines.append('</tbody>')
html_lines.append('</table>')
html_lines.append('<p style="font-size: 11px; color: #666;">* Highlighted row indicates the calibration used in the main probability cone above.</p>')
html_lines.append('')

# Key interpretation
html_lines.append('<div style="margin-top: 12px; padding: 12px; background: #fff; border-radius: 4px;">')
html_lines.append('<p style="font-size: 12px; color: #333; line-height: 1.6; margin: 0;">')
html_lines.append('<strong>Interpretation:</strong> ')

# Dynamic interpretation based on results
if "OECD Democracies" in mc_results and "Turkey/Venezuela/Hungary" in mc_results:
    oecd_mc = mc_results["OECD Democracies"]
    thesis_mc = mc_results["Turkey/Venezuela/Hungary"]
    oecd_p = group_params["OECD Democracies"]
    thesis_p = group_params["Turkey/Venezuela/Hungary"]

    html_lines.append(f'Volatility (sigma) is comparable across peer groups ')
    html_lines.append(f'(OECD: {oecd_p["sigma"]:.2f}, Turkey/Ven./Hungary: {thesis_p["sigma"]:.2f}). ')
    html_lines.append(f'The critical difference is <strong>drift</strong>: OECD democracies average ')
    html_lines.append(f'{oecd_p["drift"]:+.2f} pts/yr (slight upward tendency) vs. {thesis_p["drift"]:+.2f} pts/yr ')
    html_lines.append(f'for Turkey/Venezuela/Hungary (negative drift embeds a declining trajectory assumption). ')
    html_lines.append(f'Under OECD calibration, P(L&lt;30 within 5 years) = {oecd_mc["p_below_30_5yr"]:.1%} ')
    html_lines.append(f'and the median 10-year outcome is L={oecd_mc["median_10yr"]:.0f} ')
    html_lines.append(f'(vs. L={thesis_mc["median_10yr"]:.0f} under thesis calibration). ')
    html_lines.append('The choice of calibration peer group -- particularly its drift parameter -- ')
    html_lines.append('is the single largest driver of projection differences. ')
    html_lines.append('Readers should evaluate which peer group best represents the US trajectory.')

html_lines.append('</p>')
html_lines.append('</div>')
html_lines.append('</section>')

html_section = "\n".join(html_lines)

# Save the HTML section for insertion
with open("/tmp/pt-data/c8-html-section.html", "w") as f:
    f.write(html_section)

print(f"\nHTML section saved to /tmp/pt-data/c8-html-section.html")
print("Ready for insertion into 07-probability-cone.html")