#!/usr/bin/env python3
"""
RP-02: US Liberty Score Validation (Tests T1, T4, T5)
RP-04: Tyranny Validation (Tests T1-T5)

Nobel-caliber quantitative research using ONLY Python stdlib.
Dataset: /tmp/pt-data/political-topology-flat.csv
"""

import csv
import math
import random
import statistics
from collections import defaultdict

random.seed(42)

# ============================================================
# LOAD DATA
# ============================================================
DATA = []
with open("/tmp/pt-data/political-topology-flat.csv", "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        DATA.append({
            "country": row["country"],
            "iso3": row["iso3"],
            "region": row["region"],
            "year": int(row["year"]),
            "liberty": float(row["liberty"]),
            "tyranny": float(row["tyranny"]),
            "chaos": float(row["chaos"]),
            "status": row["status"],
            "event_horizon_below": row["event_horizon_below"],
            "data_source_period": row["data_source_period"],
        })

print(f"Loaded {len(DATA)} observations")

# US data
US_DATA = sorted([d for d in DATA if d["country"] == "United States"], key=lambda x: x["year"])
print(f"US observations: {len(US_DATA)}")

# ============================================================
# HELPER FUNCTIONS
# ============================================================

def mean(vals):
    if not vals:
        return 0.0
    return sum(vals) / len(vals)

def stdev(vals):
    if len(vals) < 2:
        return 0.0
    m = mean(vals)
    return math.sqrt(sum((v - m) ** 2 for v in vals) / (len(vals) - 1))

def percentile(vals, p):
    """Compute p-th percentile (0-100)."""
    s = sorted(vals)
    n = len(s)
    k = (p / 100.0) * (n - 1)
    f = math.floor(k)
    c = math.ceil(k)
    if f == c:
        return s[int(k)]
    return s[f] * (c - k) + s[c] * (k - f)

def correlation(x, y):
    """Pearson correlation coefficient."""
    n = len(x)
    if n < 2:
        return 0.0
    mx, my = mean(x), mean(y)
    sx, sy = stdev(x), stdev(y)
    if sx == 0 or sy == 0:
        return 0.0
    return sum((xi - mx) * (yi - my) for xi, yi in zip(x, y)) / ((n - 1) * sx * sy)

def skewness(vals):
    """Fisher-Pearson coefficient of skewness."""
    n = len(vals)
    if n < 3:
        return 0.0
    m = mean(vals)
    s = stdev(vals)
    if s == 0:
        return 0.0
    return (n / ((n - 1) * (n - 2))) * sum(((v - m) / s) ** 3 for v in vals)

def cohens_kappa(a, b):
    """Cohen's kappa for two binary lists."""
    n = len(a)
    # Contingency table
    tp = sum(1 for i in range(n) if a[i] and b[i])
    tn = sum(1 for i in range(n) if not a[i] and not b[i])
    fp = sum(1 for i in range(n) if not a[i] and b[i])
    fn = sum(1 for i in range(n) if a[i] and not b[i])
    po = (tp + tn) / n  # observed agreement
    # expected agreement
    p_a1 = (tp + fn) / n
    p_a0 = (fp + tn) / n
    p_b1 = (tp + fp) / n
    p_b0 = (fn + tn) / n
    pe = p_a1 * p_b1 + p_a0 * p_b0
    if pe == 1.0:
        return 1.0
    return (po - pe) / (1 - pe)

def get_stage(L):
    """Map Liberty score to stage S1-S8."""
    if L >= 90:
        return "S1 (Consolidated Democracy)"
    elif L >= 80:
        return "S2 (Mature Democracy)"
    elif L >= 70:
        return "S3 (Electoral Democracy)"
    elif L >= 60:
        return "S4 (Hybrid/Transitional)"
    elif L >= 50:
        return "S5 (Competitive Authoritarian)"
    elif L >= 40:
        return "S6 (Electoral Autocracy)"
    elif L >= 25:
        return "S7 (Closed Autocracy)"
    else:
        return "S8 (Failed State/Total Autocracy)"

def below_event_horizon(L):
    return L < 53.5  # Canonical Event Horizon range: L≈52-55; using midpoint 53.5

def ar1_predict(L):
    """AR(1) predicted next-year L: L(t+1) = 3.56 + 0.956*L(t)"""
    return 3.56 + 0.956 * L

def liberty_yield(L):
    """Liberty-yield regression: Y = 33.05 - 0.35*L"""
    return 33.05 - 0.35 * L

# ============================================================
# OUTPUT BUFFER
# ============================================================
OUT = []

def w(line=""):
    OUT.append(line)

def table_row(cells, widths=None):
    if widths is None:
        return "| " + " | ".join(str(c) for c in cells) + " |"
    parts = []
    for c, wid in zip(cells, widths):
        parts.append(str(c).ljust(wid))
    return "| " + " | ".join(parts) + " |"

def table_sep(widths):
    return "|" + "|".join("-" * (w + 2) for w in widths) + "|"


# ============================================================
# RP-02: US LIBERTY SCORE VALIDATION
# ============================================================

w("# RP-02 & RP-04: US Liberty Score & Tyranny Validation")
w()
w("---")
w()
w("# RP-02: US LIBERTY SCORE VALIDATION")
w()

# ----------------------------------------------------------
# T1: Cross-Index Reconstruction
# ----------------------------------------------------------
w("## T1: Cross-Index Reconstruction")
w()

us_latest = US_DATA[-1]
w(f"**Most recent US observation:** year={us_latest['year']}, L={us_latest['liberty']}, T={us_latest['tyranny']}, C={us_latest['chaos']}")
w()

# Freedom House 2024: 83/100 -> L scale
# FH scale is 0-100 (aggregate freedom score). L scale conversion:
# FH 100 -> L ~94-95 (top democracies), FH 0 -> L ~3-5
# Linear mapping: L = FH * 0.95 (rough, based on observed calibration)
# More precisely: calibrate from known anchors
# US FH 2024 = 83. Top democracies (Finland, Norway) FH ~100, L ~95
# Partly free countries at FH ~50 get L ~45-55
# So L ≈ FH * 0.94 roughly, or we can use the dataset's own calibration
# US at 2023 had L=84, FH 2023 was ~83. So FH ≈ L for the US historically.
# Actually FH scores historically tracked L closely for the US.
# Let's use a principled conversion:
# FH is 0-100 political rights + civil liberties aggregate
# L scale appears to be 0-100 where top democracies ~92-95, bottom ~3-5
# Simple linear: L_FH = FH * (95/100) ≈ 0.95 * FH
fh_score = 83
L_from_FH = fh_score * 0.95
w(f"**Freedom House 2024:** Score = {fh_score}/100")
w(f"  - Conversion: L_FH = {fh_score} x 0.95 = **{L_from_FH:.1f}**")
w(f"  - Rationale: FH 100 maps to ~L=95 (top democracies), linear scaling")
w()

# V-Dem Sep 2025 reclassified US as "electoral autocracy"
# V-Dem Liberal Democracy Index (LDI) ranges 0-1
# Electoral autocracy in V-Dem is typically LDI 0.20-0.42
# For a country just reclassified, estimate LDI ≈ 0.40 (upper bound of category)
# LDI to L: L ≈ LDI * 100 (rough), so LDI 0.40 -> L ≈ 40
# More calibrated: V-Dem LDI 0.80+ = liberal democracy (L~80+),
# LDI 0.42-0.80 = electoral democracy, LDI 0.20-0.42 = electoral autocracy
# Just-reclassified means near boundary: LDI ≈ 0.40-0.42
vdem_ldi_est = 0.41  # just below electoral democracy threshold
L_from_VDem = vdem_ldi_est * 100
w(f"**V-Dem (Sep 2025):** Reclassified US as 'electoral autocracy'")
w(f"  - Estimated LDI ≈ {vdem_ldi_est} (just below electoral democracy threshold of 0.42)")
w(f"  - Conversion: L_VDem = LDI x 100 = **{L_from_VDem:.1f}**")
w(f"  - Rationale: V-Dem category boundaries map approximately to L scale")
w()

# TCF Democracy Meter 2025: 57/100
tcf_score = 57
# TCF is 0-100, appears to already be on similar scale
# TCF calibration: TCF 57 for US. TCF 100 = full democracy ≈ L=95
# L_TCF = TCF * 0.95 or closer to 1:1 since TCF is explicitly 0-100
L_from_TCF = tcf_score * 0.95
w(f"**TCF Democracy Meter 2025:** Score = {tcf_score}/100")
w(f"  - Conversion: L_TCF = {tcf_score} x 0.95 = **{L_from_TCF:.1f}**")
w()

# Cross-index consensus
cross_index_Ls = [L_from_FH, L_from_VDem, L_from_TCF]
cross_mean = mean(cross_index_Ls)
cross_min = min(cross_index_Ls)
cross_max = max(cross_index_Ls)
cross_range = cross_max - cross_min

thesis_L = 48
deviation = thesis_L - cross_mean

w(f"### Cross-Index Summary")
w()
w(f"| Index | Raw Score | Converted L |")
w(f"|-------|-----------|-------------|")
w(f"| Freedom House 2024 | {fh_score}/100 | {L_from_FH:.1f} |")
w(f"| V-Dem 2025 (est.) | LDI {vdem_ldi_est} | {L_from_VDem:.1f} |")
w(f"| TCF Democracy Meter 2025 | {tcf_score}/100 | {L_from_TCF:.1f} |")
w(f"| **Cross-Index Mean** | | **{cross_mean:.1f}** |")
w(f"| **Thesis L (2025)** | | **{thesis_L}** |")
w()
w(f"- Cross-index range: {cross_min:.1f} to {cross_max:.1f} (span = {cross_range:.1f} points)")
w(f"- Cross-index mean: {cross_mean:.1f}")
w(f"- Thesis L=48 deviation from cross-index mean: **{deviation:+.1f} points**")
w()

if abs(deviation) < 10:
    w(f"**VERDICT:** L=48 is within {abs(deviation):.1f} points of cross-index consensus -- **broadly consistent** given index methodology differences.")
elif abs(deviation) < 20:
    w(f"**VERDICT:** L=48 is {abs(deviation):.1f} points below cross-index consensus -- **moderately aggressive** but within the range spanned by V-Dem's reclassification.")
else:
    w(f"**VERDICT:** L=48 is {abs(deviation):.1f} points from cross-index consensus -- **significantly divergent** from external benchmarks.")
w()

# Note: V-Dem's own assessment (L~41) is actually *below* thesis L=48
w(f"**Key observation:** V-Dem's reclassification implies L ~ {L_from_VDem:.0f}, which is *below* the thesis L=48.")
w(f"The thesis value sits between Freedom House ({L_from_FH:.0f}) and V-Dem ({L_from_VDem:.0f}), closer to TCF ({L_from_TCF:.0f}).")
w()

# ----------------------------------------------------------
# T4: Sensitivity Propagation Analysis
# ----------------------------------------------------------
w("## T4: Sensitivity Propagation Analysis")
w()

L_values = [38, 43, 48, 53, 58, 65, 70, 77, 84]

# Historical base rate computation: what % of countries at L recovered above 80 within 15 years?
def compute_recovery_rate(L_target, window=5, recovery_threshold=80, recovery_years=15):
    """For countries that were at L within +/-window of L_target,
    what fraction recovered above recovery_threshold within recovery_years?"""
    # Build country time series
    country_series = defaultdict(list)
    for d in DATA:
        country_series[d["country"]].append((d["year"], d["liberty"]))

    episodes = 0
    recoveries = 0
    for country, series in country_series.items():
        series.sort()
        for i, (yr, lib) in enumerate(series):
            if abs(lib - L_target) <= window:
                # Check if recovered within recovery_years
                for j in range(i + 1, len(series)):
                    yr_j, lib_j = series[j]
                    if yr_j - yr <= recovery_years:
                        if lib_j >= recovery_threshold:
                            recoveries += 1
                            break
                    else:
                        break
                episodes += 1
    return recoveries, episodes

# Key thesis conclusions at L=48 to check which flip
def thesis_conclusions(L):
    """Return dict of key thesis conclusions and whether they hold at this L."""
    return {
        "Below Event Horizon (L<52-55)": L < 53.5,  # Canonical EH range: L≈52-55
        "Classified as autocracy (L<40)": L < 40,
        "In democracy basin (L>70)": L > 70,
        "AR(1) predicts further decline": ar1_predict(L) < L,
        "Liberty-yield > 15% (high risk premium)": liberty_yield(L) > 15,
        "Status: NOT FREE (L<40)": L < 40,
        "Status: PARTLY FREE (40<=L<70)": 40 <= L < 70,
        "Status: FREE (L>=70)": L >= 70,
    }

# Reference conclusions at L=48
ref_conclusions = thesis_conclusions(48)

w("### Full Sensitivity Table")
w()

# Header
w("| L | Stage | Below EH? | AR(1) L(t+1) | Recovery Rate (15yr) | Velocity from L=94 | Liberty-Yield | Conclusions Flipped vs L=48 |")
w("|---|-------|-----------|--------------|----------------------|---------------------|---------------|----------------------------|")

for L in L_values:
    stage = get_stage(L)
    eh = "YES" if below_event_horizon(L) else "NO"
    ar1 = ar1_predict(L)

    # Recovery rate
    recs, eps = compute_recovery_rate(L, window=5)
    rate_str = f"{recs}/{eps} ({100*recs/eps:.1f}%)" if eps > 0 else "N/A"

    # Velocity from L=94 in 2015 to L in 2025 (10 years)
    velocity = (L - 94) / 10

    # Liberty yield
    ly = liberty_yield(L)

    # Conclusions that flip
    current = thesis_conclusions(L)
    flipped = [k for k in ref_conclusions if current[k] != ref_conclusions[k]]
    flip_str = "; ".join(flipped) if flipped else "None"

    w(f"| {L} | {stage} | {eh} | {ar1:.1f} | {rate_str} | {velocity:+.1f}/yr | {ly:.1f}% | {flip_str} |")

w()

# Detailed narrative for key L values
w("### Detailed Sensitivity Narratives")
w()

for L in L_values:
    stage = get_stage(L)
    eh = below_event_horizon(L)
    ar1 = ar1_predict(L)
    ar1_direction = "further decline" if ar1 < L else "recovery"
    velocity = (L - 94) / 10
    ly = liberty_yield(L)
    recs, eps = compute_recovery_rate(L, window=5)
    rate = (100 * recs / eps) if eps > 0 else 0

    w(f"**L = {L}:**")
    w(f"- Stage: {stage}")
    w(f"- Event Horizon: {'BELOW (trapped)' if eh else 'ABOVE (escape possible)'}")
    w(f"- AR(1) next-year prediction: {ar1:.1f} ({'declining' if ar1 < L else 'recovering'})")
    w(f"- Historical recovery to L>80 within 15yr: {recs}/{eps} episodes ({rate:.1f}%)")
    w(f"- Required velocity from 2015 (L=94): {velocity:+.1f} pts/year")
    w(f"- Liberty-yield (risk premium): {ly:.1f}%")
    w()

# ----------------------------------------------------------
# T5: Historical US Velocity Context
# ----------------------------------------------------------
w("## T5: Historical US Velocity Context")
w()

# Compute all 10-year declines (or closest to 10 years)
country_series = defaultdict(list)
for d in DATA:
    country_series[d["country"]].append((d["year"], d["liberty"]))

for c in country_series:
    country_series[c].sort()

declines_10yr = []  # (country, year_start, year_end, L_start, L_end, decline, years)

for country, series in country_series.items():
    for i in range(len(series)):
        yr_i, L_i = series[i]
        for j in range(i + 1, len(series)):
            yr_j, L_j = series[j]
            span = yr_j - yr_i
            if 8 <= span <= 12:  # "exactly 10 years or closest"
                decline = L_i - L_j
                if decline > 0:  # only actual declines
                    declines_10yr.append((country, yr_i, yr_j, L_i, L_j, decline, span))

# Sort by decline magnitude (largest first)
declines_10yr.sort(key=lambda x: -x[5])

w(f"Total 10-year decline episodes found (8-12 year windows): **{len(declines_10yr)}**")
w()

# Top 20
w("### Top 20 Fastest 10-Year Declines")
w()
w("| Rank | Country | Period | L_start | L_end | Decline | Years |")
w("|------|---------|--------|---------|-------|---------|-------|")
for rank, (country, y1, y2, L1, L2, dec, span) in enumerate(declines_10yr[:20], 1):
    w(f"| {rank} | {country} | {y1}-{y2} | {L1:.0f} | {L2:.0f} | {dec:.0f} | {span} |")
w()

# Where does US 94->48 rank?
us_decline_46 = 46  # 94 - 48
rank_46 = sum(1 for d in declines_10yr if d[5] >= us_decline_46)
total_declines = len(declines_10yr)
w(f"### US Decline Rankings")
w()
w(f"**94 -> 48 (decline = 46 points, 2015-2025):**")
w(f"- Rank: **{rank_46}** out of {total_declines} episodes")
w(f"- Percentile: top {100 * rank_46 / total_declines:.1f}% of all 10-year declines")
w()

us_decline_17 = 17  # 94 - 77
rank_17 = sum(1 for d in declines_10yr if d[5] >= us_decline_17)
w(f"**94 -> 77 (decline = 17 points):**")
w(f"- Rank: **{rank_17}** out of {total_declines} episodes")
w(f"- Percentile: top {100 * rank_17 / total_declines:.1f}%")
w()

us_decline_29 = 29  # 94 - 65
rank_29 = sum(1 for d in declines_10yr if d[5] >= us_decline_29)
w(f"**94 -> 65 (decline = 29 points):**")
w(f"- Rank: **{rank_29}** out of {total_declines} episodes")
w(f"- Percentile: top {100 * rank_29 / total_declines:.1f}%")
w()

# Context: what are comparable declines?
w("### Context: Countries with similar 10-year declines to US 94->48 (decline >= 40)")
w()
big_declines = [(c, y1, y2, L1, L2, d, s) for c, y1, y2, L1, L2, d, s in declines_10yr if d >= 40]
if big_declines:
    w("| Country | Period | L_start | L_end | Decline | Context |")
    w("|---------|--------|---------|-------|---------|---------|")
    for country, y1, y2, L1, L2, dec, span in big_declines:
        context = ""
        if "Afghanistan" in country:
            context = "Soviet invasion / Taliban"
        elif "Iraq" in country:
            context = "Saddam consolidation / invasion"
        elif "Chile" in country:
            context = "Pinochet coup"
        elif "Iran" in country:
            context = "Islamic Revolution"
        elif "Venezuela" in country:
            context = "Chavez consolidation"
        elif "Russia" in country:
            context = "Putin consolidation"
        elif "Myanmar" in country or "Burma" in country:
            context = "Military coup"
        elif "Hungary" in country:
            context = "Orban consolidation"
        elif "Nicaragua" in country:
            context = "Ortega consolidation"
        elif "United States" in country:
            context = "Per thesis scoring"
        else:
            context = "Regime change"
        w(f"| {country} | {y1}-{y2} | {L1:.0f} | {L2:.0f} | {dec:.0f} | {context} |")
    w()
else:
    w("No 10-year declines >= 40 points found.")
    w()

# Also find the actual US data point pair closest to 10 years from L=94
us_series = [(d["year"], d["liberty"]) for d in US_DATA]
w("### US Liberty Score Trajectory (full history)")
w()
w("| Year | L | Stage | Event Horizon (L≈52-55) |")
w("|------|---|-------|--------------------------|")
for yr, lib in us_series:
    w(f"| {yr} | {lib:.0f} | {get_stage(lib)} | {'BELOW' if lib < 53.5 else 'IN RANGE' if lib < 55 else 'ABOVE'} |")
w()

# Compute actual 10-year US change from 2015
us_2015 = None
us_2025 = None
for yr, lib in us_series:
    if yr == 2015:
        us_2015 = lib
    if yr == 2025:
        us_2025 = lib

if us_2015 and us_2025:
    actual_decline = us_2015 - us_2025
    w(f"**Actual US 10-year change (2015-2025):** {us_2015:.0f} -> {us_2025:.0f} = **-{actual_decline:.0f} points**")
    # But from peak
    us_2010 = None
    for yr, lib in us_series:
        if yr == 2010:
            us_2010 = lib
    if us_2010:
        w(f"**From peak (2010, L={us_2010:.0f}) to 2025:** {us_2010:.0f} -> {us_2025:.0f} = **-{us_2010 - us_2025:.0f} points** over 15 years")
    w()

# ============================================================
# RP-04: TYRANNY VALIDATION
# ============================================================

w("---")
w()
w("# RP-04: TYRANNY VALIDATION")
w()

# ----------------------------------------------------------
# T1: Error Propagation Analysis (Monte Carlo)
# ----------------------------------------------------------
w("## T1: Error Propagation Analysis (Monte Carlo, 10,000 draws)")
w()

# Get most recent observation per country
latest_by_country = {}
for d in DATA:
    c = d["country"]
    if c not in latest_by_country or d["year"] > latest_by_country[c]["year"]:
        latest_by_country[c] = d

N_MC = 10000

mc_results = {}  # country -> {mean_T, std_T, ci_lo, ci_hi, original_T}
countries_high_uncertainty = 0
countries_reclassified = 0
T_THRESHOLD = 50

for country, obs in sorted(latest_by_country.items()):
    L_orig = obs["liberty"]
    C_orig = obs["chaos"]
    T_orig = obs["tyranny"]

    T_samples = []
    for _ in range(N_MC):
        L_noisy = L_orig + random.uniform(-5, 5)
        C_noisy = C_orig + random.uniform(-5, 5)
        T_noisy = 100.0 - L_noisy - C_noisy
        T_noisy = max(0, min(100, T_noisy))
        T_samples.append(T_noisy)

    m = mean(T_samples)
    s = stdev(T_samples)
    ci_lo = percentile(T_samples, 5)
    ci_hi = percentile(T_samples, 95)

    mc_results[country] = {
        "mean_T": m,
        "std_T": s,
        "ci_lo": ci_lo,
        "ci_hi": ci_hi,
        "original_T": T_orig,
        "L_orig": L_orig,
        "C_orig": C_orig,
    }

    if s > 12:
        countries_high_uncertainty += 1

    # Check reclassification: original side of T=50 vs possibility of crossing
    orig_above = T_orig > T_THRESHOLD
    # If CI spans the threshold, reclassification is possible
    if (ci_lo < T_THRESHOLD < ci_hi):
        countries_reclassified += 1

w(f"**Countries analyzed:** {len(mc_results)}")
w(f"**Monte Carlo draws per country:** {N_MC}")
w(f"**Error injection:** L += U(-5,+5), C += U(-5,+5)")
w()
w(f"**Countries with T uncertainty (std) > 12 points:** {countries_high_uncertainty}")
w(f"**Countries where T could cross T=50 threshold (90% CI spans 50):** {countries_reclassified}")
w()

# Show sample of results - top 20 by uncertainty
w("### Countries with highest T uncertainty (top 20 by std)")
w()
w("| Country | L | C | T_orig | T_mean | T_std | 90% CI |")
w("|---------|---|---|--------|--------|-------|--------|")
sorted_by_std = sorted(mc_results.items(), key=lambda x: -x[1]["std_T"])
for country, r in sorted_by_std[:20]:
    w(f"| {country} | {r['L_orig']:.0f} | {r['C_orig']:.0f} | {r['original_T']:.0f} | {r['mean_T']:.1f} | {r['std_T']:.1f} | [{r['ci_lo']:.1f}, {r['ci_hi']:.1f}] |")
w()

# Countries near threshold that could be reclassified
w("### Countries vulnerable to reclassification around T=50")
w()
reclassifiable = [(c, r) for c, r in mc_results.items() if r["ci_lo"] < T_THRESHOLD < r["ci_hi"]]
reclassifiable.sort(key=lambda x: abs(x[1]["original_T"] - 50))
if reclassifiable:
    w("| Country | T_orig | T_mean | 90% CI | Distance from 50 |")
    w("|---------|--------|--------|--------|-------------------|")
    for country, r in reclassifiable[:20]:
        dist = abs(r["original_T"] - 50)
        w(f"| {country} | {r['original_T']:.0f} | {r['mean_T']:.1f} | [{r['ci_lo']:.1f}, {r['ci_hi']:.1f}] | {dist:.0f} |")
w()

# Theoretical analysis of error propagation
w("### Theoretical Error Propagation")
w()
w("Since T = 100 - L - C, and errors are U(-5,+5) on both L and C:")
w("- Var(L_noise) = Var(U(-5,5)) = (10)^2/12 = 8.33")
w("- Var(C_noise) = 8.33")
w("- T_noisy = 100 - (L+eL) - (C+eC) = T - eL - eC")
w("- Var(T_noisy) = Var(eL) + Var(eC) = 16.67 (assuming independence)")
w(f"- Theoretical std(T_noisy) = sqrt(16.67) = **{math.sqrt(16.67):.2f}**")
w("- This is constant across all countries (error does not depend on T level)")
w()
actual_stds = [r["std_T"] for r in mc_results.values()]
w(f"- Empirical mean std(T) across countries: **{mean(actual_stds):.2f}** (should be ~4.08)")
w(f"- Empirical std of std(T): {stdev(actual_stds):.3f}")
w()
w(f"**VERDICT:** T uncertainty from +/-5 measurement error is ~{mean(actual_stds):.1f} points (moderate). "
  f"Only {countries_reclassified} countries have 90% CIs spanning the T=50 autocracy threshold. "
  f"{'Zero' if countries_high_uncertainty == 0 else str(countries_high_uncertainty)} countries have std > 12.")
w()

# ----------------------------------------------------------
# T2: Internal Consistency Check
# ----------------------------------------------------------
w("## T2: Internal Consistency Check")
w()

# Verify L + T + C = 100
sums = []
violations = 0
for d in DATA:
    s = d["liberty"] + d["tyranny"] + d["chaos"]
    sums.append(s)
    if abs(s - 100) > 0.01:
        violations += 1

w(f"**Constraint: L + T + C = 100**")
w(f"- Observations checked: {len(DATA)}")
w(f"- Violations (|sum - 100| > 0.01): **{violations}**")
w(f"- Sum range: [{min(sums):.2f}, {max(sums):.2f}]")
w(f"- Mean sum: {mean(sums):.4f}")
w()

# Correlations
all_L = [d["liberty"] for d in DATA]
all_T = [d["tyranny"] for d in DATA]
all_C = [d["chaos"] for d in DATA]

r_LT = correlation(all_L, all_T)
r_LC = correlation(all_L, all_C)
r_TC = correlation(all_T, all_C)

w(f"**Pairwise Correlations (n={len(DATA)}):**")
w()
w(f"| Pair | Pearson r | Interpretation |")
w(f"|------|-----------|---------------|")
w(f"| L vs T | {r_LT:.4f} | {'Near-perfect anti-correlation' if r_LT < -0.90 else 'Strong negative' if r_LT < -0.70 else 'Moderate negative' if r_LT < -0.40 else 'Weak'} |")
w(f"| L vs C | {r_LC:.4f} | {'Strong negative' if r_LC < -0.70 else 'Moderate negative' if r_LC < -0.40 else 'Weak negative' if r_LC < 0 else 'Positive'} |")
w(f"| T vs C | {r_TC:.4f} | {'Strong negative' if r_TC < -0.70 else 'Moderate negative' if r_TC < -0.40 else 'Weak negative' if r_TC < 0 else 'Positive'} |")
w()

# Assess whether system is effectively bivariate
w(f"**Is the system effectively bivariate?**")
w()
if r_LT < -0.90:
    w(f"- r(L,T) = {r_LT:.4f} < -0.90: **YES, T is near-redundant with 100-L**")
    w(f"  T adds almost no information beyond what L already provides.")
    w(f"  The ternary system is effectively a bivariate L-C system.")
elif r_LT < -0.80:
    w(f"- r(L,T) = {r_LT:.4f}: **PARTIALLY** - T is highly predictable from L but C introduces some independent variation.")
else:
    w(f"- r(L,T) = {r_LT:.4f}: **NO** - C provides enough independent variation that T is not simply 100-L.")
w()

# Check C variance
w(f"**Chaos (C) distribution summary:**")
w(f"- Mean(C) = {mean(all_C):.2f}")
w(f"- Std(C) = {stdev(all_C):.2f}")
w(f"- Range: [{min(all_C):.0f}, {max(all_C):.0f}]")
w(f"- If C has low variance, then T ≈ 100 - L and system is bivariate.")
w()

# ----------------------------------------------------------
# T3: Classification Concordance
# ----------------------------------------------------------
w("## T3: Classification Concordance")
w()

# Use most recent observation per country
latest_obs = list(latest_by_country.values())

autocracy_by_L = [d["liberty"] < 40 for d in latest_obs]
autocracy_by_T = [d["tyranny"] > 50 for d in latest_obs]

n_agree = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a == b)
n_disagree = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a != b)
n_total = len(latest_obs)

kappa = cohens_kappa(autocracy_by_L, autocracy_by_T)

w(f"**Classification definitions:**")
w(f"- Definition A (Liberty-based): Autocracy if L < 40")
w(f"- Definition B (Tyranny-based): Autocracy if T > 50")
w()
w(f"**Results (most recent observation per country, n={n_total}):**")
w(f"- Agree: {n_agree} ({100*n_agree/n_total:.1f}%)")
w(f"- Disagree: {n_disagree} ({100*n_disagree/n_total:.1f}%)")
w(f"- Cohen's Kappa: **{kappa:.4f}**")
w()

# Contingency table
tp = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a and b)
tn = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if not a and not b)
fp = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if not a and b)
fn = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a and not b)

w(f"**Contingency Table:**")
w(f"```")
w(f"                 T > 50 (autocracy)  T <= 50 (not)")
w(f"L < 40 (autoc)        {tp:3d}                 {fn:3d}")
w(f"L >= 40 (not)         {fp:3d}                 {tn:3d}")
w(f"```")
w()

# Interpretation
if kappa > 0.80:
    kappa_interp = "Almost perfect agreement"
elif kappa > 0.60:
    kappa_interp = "Substantial agreement"
elif kappa > 0.40:
    kappa_interp = "Moderate agreement"
elif kappa > 0.20:
    kappa_interp = "Fair agreement"
else:
    kappa_interp = "Slight/poor agreement"

w(f"**Kappa interpretation:** {kappa_interp}")
w()

# Show disagreement countries
w("### Countries classified differently by L<40 vs T>50")
w()
disagree_countries = []
for d in latest_obs:
    a = d["liberty"] < 40
    b = d["tyranny"] > 50
    if a != b:
        disagree_countries.append(d)

if disagree_countries:
    disagree_countries.sort(key=lambda x: x["country"])
    w("| Country | L | T | C | L<40? | T>50? |")
    w("|---------|---|---|---|-------|-------|")
    for d in disagree_countries:
        w(f"| {d['country']} | {d['liberty']:.0f} | {d['tyranny']:.0f} | {d['chaos']:.0f} | {'Yes' if d['liberty'] < 40 else 'No'} | {'Yes' if d['tyranny'] > 50 else 'No'} |")
    w()
else:
    w("No countries classified differently.")
    w()

# ----------------------------------------------------------
# T4: Do Key Findings Survive Alternative T?
# ----------------------------------------------------------
w("## T4: Alternative T Definition -- Do Key Findings Survive?")
w()

w("**Original:** T = 100 - L - C")
w("**Alternative:** T_alt = 100 - L (ignoring chaos entirely)")
w()

# Attractor basin analysis with original T
# Democracy basin: L > 70, Autocracy basin: L < 30
# These are L-based, so T definition doesn't change them!
# But let's check distribution shape

# Get latest L for each country
latest_Ls = [d["liberty"] for d in latest_obs]

# Bimodal distribution check
# Bin L values into 10-point bins
bins = list(range(0, 101, 10))
bin_counts = [0] * len(bins)
for L in latest_Ls:
    for i in range(len(bins) - 1):
        if bins[i] <= L < bins[i + 1]:
            bin_counts[i] += 1
            break
    if L == 100:
        bin_counts[-2] += 1  # put 100 in 90-100 bin

w("### L Distribution (latest observation per country)")
w()
w("| L Range | Count (T=100-L-C) | Count (T_alt=100-L) |")
w("|---------|-------------------|---------------------|")
for i in range(len(bins) - 1):
    w(f"| {bins[i]:2d}-{bins[i+1]:2d} | {bin_counts[i]:3d} | {bin_counts[i]:3d} |")
w()
w("*Note: Since attractor basins are defined by L thresholds (L>70 = democracy, L<30 = autocracy),")
w("the T definition does NOT affect basin membership. The bimodal structure is preserved identically.*")
w()

# But check: does T_alt vs T_orig change the dynamics?
# Compute T and T_alt for all observations
all_T_orig = [d["tyranny"] for d in DATA]
all_T_alt = [100 - d["liberty"] for d in DATA]
r_T_Talt = correlation(all_T_orig, all_T_alt)

w(f"**Correlation between T and T_alt:** r = {r_T_Talt:.4f}")
w()

# Count democracy / autocracy basin populations
n_demo_basin = sum(1 for L in latest_Ls if L > 70)
n_auto_basin = sum(1 for L in latest_Ls if L < 30)
n_middle = sum(1 for L in latest_Ls if 30 <= L <= 70)

w(f"**Basin populations (latest obs):**")
w(f"- Democracy basin (L > 70): {n_demo_basin} countries")
w(f"- Autocracy basin (L < 30): {n_auto_basin} countries")
w(f"- Middle zone (30-70): {n_middle} countries")
w()

# Check if bimodal by looking at bin structure
demo_mass = sum(bin_counts[7:])  # 70+
auto_mass = sum(bin_counts[:3])  # 0-29
middle_mass = sum(bin_counts[3:7])  # 30-69

w(f"**Bimodality test:**")
w(f"- Upper mode (L >= 70): {demo_mass} countries ({100*demo_mass/len(latest_Ls):.1f}%)")
w(f"- Lower mode (L < 30): {auto_mass} countries ({100*auto_mass/len(latest_Ls):.1f}%)")
w(f"- Valley (30-69): {middle_mass} countries ({100*middle_mass/len(latest_Ls):.1f}%)")
w()

if demo_mass > middle_mass and auto_mass > 0:
    w("**VERDICT:** Bimodal structure IS preserved regardless of T definition, because basins are L-defined.")
else:
    w("**VERDICT:** Distribution does not show strong bimodality.")
w(f"Using T_alt = 100-L changes T values but **does not alter** any L-based findings (stages, event horizon, basins).")
w(f"Since r(T, T_alt) = {r_T_Talt:.4f}, the alternative T is {'nearly identical' if r_T_Talt > 0.95 else 'similar but distinct' if r_T_Talt > 0.80 else 'meaningfully different'}.")
w()

# ----------------------------------------------------------
# T5: Chaos Distribution Analysis
# ----------------------------------------------------------
w("## T5: Chaos Distribution Analysis")
w()

all_C = [d["chaos"] for d in DATA]

c_mean = mean(all_C)
c_std = stdev(all_C)
c_min = min(all_C)
c_max = max(all_C)
c_skew = skewness(all_C)
c_median = percentile(all_C, 50)
c_iqr_lo = percentile(all_C, 25)
c_iqr_hi = percentile(all_C, 75)

w(f"**Chaos (C) Distribution Statistics (n={len(all_C)}):**")
w()
w(f"| Statistic | Value |")
w(f"|-----------|-------|")
w(f"| Mean | {c_mean:.2f} |")
w(f"| Std Dev | {c_std:.2f} |")
w(f"| Min | {c_min:.0f} |")
w(f"| Max | {c_max:.0f} |")
w(f"| Median | {c_median:.1f} |")
w(f"| IQR | [{c_iqr_lo:.1f}, {c_iqr_hi:.1f}] |")
w(f"| Skewness | {c_skew:.3f} |")
w()

# Histogram of C
c_bins = list(range(0, 81, 5))
c_hist = [0] * len(c_bins)
for c_val in all_C:
    for i in range(len(c_bins) - 1):
        if c_bins[i] <= c_val < c_bins[i + 1]:
            c_hist[i] += 1
            break
    if c_val >= c_bins[-1]:
        c_hist[-1] += 1

w("### Chaos Histogram")
w()
w("| C Range | Count | Bar |")
w("|---------|-------|-----|")
max_count = max(c_hist) if max(c_hist) > 0 else 1
for i in range(len(c_bins) - 1):
    bar = "#" * int(40 * c_hist[i] / max_count)
    w(f"| {c_bins[i]:2d}-{c_bins[i+1]:2d} | {c_hist[i]:4d} | {bar} |")
if c_hist[-1] > 0:
    bar = "#" * int(40 * c_hist[-1] / max_count)
    w(f"| {c_bins[-1]:2d}+ | {c_hist[-1]:4d} | {bar} |")
w()

# Check if C is near-constant (low variance)
w("### Is the system effectively bivariate?")
w()
cv = c_std / c_mean if c_mean > 0 else 0  # coefficient of variation
w(f"- Coefficient of variation (CV) = std/mean = {cv:.3f}")
w(f"- If CV < 0.3, C is relatively stable -> system is approximately bivariate")
w(f"- CV = {cv:.3f}: C is {'relatively stable' if cv < 0.3 else 'moderately variable' if cv < 0.6 else 'highly variable'}")
w()

# Partial correlation of T with L, controlling for...
# Since T = 100-L-C, the partial correlation of T with some external Y controlling for L
# would capture only the C component. We don't have external Y, but we can compute
# the partial correlation of T and C controlling for L.

# Partial corr(T,C|L) = (r_TC - r_TL*r_CL) / sqrt((1-r_TL^2)*(1-r_CL^2))
r_TL = correlation(all_T, all_L)
r_TC_val = correlation(all_T, all_C)
r_CL = correlation(all_C, all_L)

denom = math.sqrt(max(0.0001, (1 - r_TL**2) * (1 - r_CL**2)))
partial_r_TC_given_L = (r_TC_val - r_TL * r_CL) / denom

w(f"**Partial correlation r(T, C | L) = {partial_r_TC_given_L:.4f}**")
w(f"- This captures how much T varies with C *after* removing L's influence")
w(f"- Since T = 100-L-C by construction, partial_r(T,C|L) should be strongly negative")
w(f"  (higher C means lower T, given fixed L)")
w()

# What fraction of T variance is explained by L alone?
r2_TL = r_TL ** 2
w(f"**R-squared of T ~ L: {r2_TL:.4f}**")
w(f"- L alone explains {100*r2_TL:.1f}% of T variance")
w(f"- Remaining {100*(1-r2_TL):.1f}% is attributable to C variation")
w()

if r2_TL > 0.90:
    verdict = "T is >90% determined by L. The ternary system is EFFECTIVELY BIVARIATE. T adds negligible independent information."
elif r2_TL > 0.75:
    verdict = f"T is {100*r2_TL:.0f}% determined by L. C contributes meaningfully but the system is predominantly bivariate."
else:
    verdict = f"T is only {100*r2_TL:.0f}% determined by L. C contributes substantial independent variation. The ternary system is genuinely trivariate."

w(f"**VERDICT:** {verdict}")
w()

# ============================================================
# FINAL SYNTHESIS
# ============================================================
w("---")
w()
w("# SYNTHESIS: Key Findings")
w()
w("## RP-02 Summary")
w()
w(f"1. **Cross-index consensus** places US at L={cross_mean:.0f} (range {cross_min:.0f}-{cross_max:.0f}). "
  f"The thesis L=48 sits within this range, between FH ({L_from_FH:.0f}) and V-Dem ({L_from_VDem:.0f}).")
w()
w(f"2. **Sensitivity analysis** shows the event horizon (L≈52-55) is the critical threshold. "
  f"At L=48, the US is below it; at L=65+, it is above. "
  f"The AR(1) model predicts {'continued decline' if ar1_predict(48) < 48 else 'slight recovery'} from L=48 "
  f"(next-year prediction: {ar1_predict(48):.1f}).")
w()

# Find the actual rank for the narrative
w(f"3. **Historical velocity context:** A 46-point decline in 10 years (94->48) ranks "
  f"#{rank_46} out of {total_declines} episodes. "
  f"{'This is historically unprecedented for an established democracy.' if rank_46 <= 10 else 'This is historically rare but not unprecedented.'}")
w()

w("## RP-04 Summary")
w()
w(f"4. **Error propagation:** +/-5 point measurement error produces ~{mean(actual_stds):.1f} point T uncertainty. "
  f"{countries_reclassified} countries have CIs spanning the T=50 autocracy threshold.")
w()
w(f"5. **Internal consistency:** L+T+C=100 holds exactly ({violations} violations). "
  f"r(L,T)={r_LT:.3f} -- {'T is nearly redundant with L' if r_LT < -0.90 else 'T captures some independent variation from C'}.")
w()
w(f"6. **Classification concordance:** L<40 vs T>50 agree on {100*n_agree/n_total:.0f}% of countries "
  f"(kappa={kappa:.3f}, {kappa_interp.lower()}).")
w()
w(f"7. **Alternative T (ignoring C):** r(T, T_alt)={r_T_Talt:.3f}. "
  f"Bimodal attractor structure is {'preserved' if r_T_Talt > 0.90 else 'altered'} because basins are L-defined.")
w()
w(f"8. **Chaos distribution:** C has mean={c_mean:.1f}, std={c_std:.1f}, range [{c_min:.0f},{c_max:.0f}]. "
  f"L explains {100*r2_TL:.0f}% of T variance. "
  f"{'The system is effectively bivariate.' if r2_TL > 0.85 else 'C contributes meaningful independent variation.'}")
w()

# ============================================================
# WRITE OUTPUT
# ============================================================
output = "\n".join(OUT)
with open("/tmp/pt-data/rp02-rp04-results.md", "w") as f:
    f.write(output)

print("\n" + "=" * 60)
print("RESULTS WRITTEN TO: /tmp/pt-data/rp02-rp04-results.md")
print("=" * 60)
print()
print(output)