#!/usr/bin/env python3 """ RP-02: US Liberty Score Validation (Tests T1, T4, T5) RP-04: Tyranny Validation (Tests T1-T5) Nobel-caliber quantitative research using ONLY Python stdlib. Dataset: /tmp/pt-data/political-topology-flat.csv """ import csv import math import random import statistics from collections import defaultdict random.seed(42) # ============================================================ # LOAD DATA # ============================================================ DATA = [] with open("/tmp/pt-data/political-topology-flat.csv", "r") as f: reader = csv.DictReader(f) for row in reader: DATA.append({ "country": row["country"], "iso3": row["iso3"], "region": row["region"], "year": int(row["year"]), "liberty": float(row["liberty"]), "tyranny": float(row["tyranny"]), "chaos": float(row["chaos"]), "status": row["status"], "event_horizon_below": row["event_horizon_below"], "data_source_period": row["data_source_period"], }) print(f"Loaded {len(DATA)} observations") # US data US_DATA = sorted([d for d in DATA if d["country"] == "United States"], key=lambda x: x["year"]) print(f"US observations: {len(US_DATA)}") # ============================================================ # HELPER FUNCTIONS # ============================================================ def mean(vals): if not vals: return 0.0 return sum(vals) / len(vals) def stdev(vals): if len(vals) < 2: return 0.0 m = mean(vals) return math.sqrt(sum((v - m) ** 2 for v in vals) / (len(vals) - 1)) def percentile(vals, p): """Compute p-th percentile (0-100).""" s = sorted(vals) n = len(s) k = (p / 100.0) * (n - 1) f = math.floor(k) c = math.ceil(k) if f == c: return s[int(k)] return s[f] * (c - k) + s[c] * (k - f) def correlation(x, y): """Pearson correlation coefficient.""" n = len(x) if n < 2: return 0.0 mx, my = mean(x), mean(y) sx, sy = stdev(x), stdev(y) if sx == 0 or sy == 0: return 0.0 return sum((xi - mx) * (yi - my) for xi, yi in zip(x, y)) / ((n - 1) * sx * sy) def skewness(vals): """Fisher-Pearson coefficient of skewness.""" n = len(vals) if n < 3: return 0.0 m = mean(vals) s = stdev(vals) if s == 0: return 0.0 return (n / ((n - 1) * (n - 2))) * sum(((v - m) / s) ** 3 for v in vals) def cohens_kappa(a, b): """Cohen's kappa for two binary lists.""" n = len(a) # Contingency table tp = sum(1 for i in range(n) if a[i] and b[i]) tn = sum(1 for i in range(n) if not a[i] and not b[i]) fp = sum(1 for i in range(n) if not a[i] and b[i]) fn = sum(1 for i in range(n) if a[i] and not b[i]) po = (tp + tn) / n # observed agreement # expected agreement p_a1 = (tp + fn) / n p_a0 = (fp + tn) / n p_b1 = (tp + fp) / n p_b0 = (fn + tn) / n pe = p_a1 * p_b1 + p_a0 * p_b0 if pe == 1.0: return 1.0 return (po - pe) / (1 - pe) def get_stage(L): """Map Liberty score to stage S1-S8.""" if L >= 90: return "S1 (Consolidated Democracy)" elif L >= 80: return "S2 (Mature Democracy)" elif L >= 70: return "S3 (Electoral Democracy)" elif L >= 60: return "S4 (Hybrid/Transitional)" elif L >= 50: return "S5 (Competitive Authoritarian)" elif L >= 40: return "S6 (Electoral Autocracy)" elif L >= 25: return "S7 (Closed Autocracy)" else: return "S8 (Failed State/Total Autocracy)" def below_event_horizon(L): return L < 53.5 # Canonical Event Horizon range: L≈52-55; using midpoint 53.5 def ar1_predict(L): """AR(1) predicted next-year L: L(t+1) = 3.56 + 0.956*L(t)""" return 3.56 + 0.956 * L def liberty_yield(L): """Liberty-yield regression: Y = 33.05 - 0.35*L""" return 33.05 - 0.35 * L # ============================================================ # OUTPUT BUFFER # ============================================================ OUT = [] def w(line=""): OUT.append(line) def table_row(cells, widths=None): if widths is None: return "| " + " | ".join(str(c) for c in cells) + " |" parts = [] for c, wid in zip(cells, widths): parts.append(str(c).ljust(wid)) return "| " + " | ".join(parts) + " |" def table_sep(widths): return "|" + "|".join("-" * (w + 2) for w in widths) + "|" # ============================================================ # RP-02: US LIBERTY SCORE VALIDATION # ============================================================ w("# RP-02 & RP-04: US Liberty Score & Tyranny Validation") w() w("---") w() w("# RP-02: US LIBERTY SCORE VALIDATION") w() # ---------------------------------------------------------- # T1: Cross-Index Reconstruction # ---------------------------------------------------------- w("## T1: Cross-Index Reconstruction") w() us_latest = US_DATA[-1] w(f"**Most recent US observation:** year={us_latest['year']}, L={us_latest['liberty']}, T={us_latest['tyranny']}, C={us_latest['chaos']}") w() # Freedom House 2024: 83/100 -> L scale # FH scale is 0-100 (aggregate freedom score). L scale conversion: # FH 100 -> L ~94-95 (top democracies), FH 0 -> L ~3-5 # Linear mapping: L = FH * 0.95 (rough, based on observed calibration) # More precisely: calibrate from known anchors # US FH 2024 = 83. Top democracies (Finland, Norway) FH ~100, L ~95 # Partly free countries at FH ~50 get L ~45-55 # So L ≈ FH * 0.94 roughly, or we can use the dataset's own calibration # US at 2023 had L=84, FH 2023 was ~83. So FH ≈ L for the US historically. # Actually FH scores historically tracked L closely for the US. # Let's use a principled conversion: # FH is 0-100 political rights + civil liberties aggregate # L scale appears to be 0-100 where top democracies ~92-95, bottom ~3-5 # Simple linear: L_FH = FH * (95/100) ≈ 0.95 * FH fh_score = 83 L_from_FH = fh_score * 0.95 w(f"**Freedom House 2024:** Score = {fh_score}/100") w(f" - Conversion: L_FH = {fh_score} x 0.95 = **{L_from_FH:.1f}**") w(f" - Rationale: FH 100 maps to ~L=95 (top democracies), linear scaling") w() # V-Dem Sep 2025 reclassified US as "electoral autocracy" # V-Dem Liberal Democracy Index (LDI) ranges 0-1 # Electoral autocracy in V-Dem is typically LDI 0.20-0.42 # For a country just reclassified, estimate LDI ≈ 0.40 (upper bound of category) # LDI to L: L ≈ LDI * 100 (rough), so LDI 0.40 -> L ≈ 40 # More calibrated: V-Dem LDI 0.80+ = liberal democracy (L~80+), # LDI 0.42-0.80 = electoral democracy, LDI 0.20-0.42 = electoral autocracy # Just-reclassified means near boundary: LDI ≈ 0.40-0.42 vdem_ldi_est = 0.41 # just below electoral democracy threshold L_from_VDem = vdem_ldi_est * 100 w(f"**V-Dem (Sep 2025):** Reclassified US as 'electoral autocracy'") w(f" - Estimated LDI ≈ {vdem_ldi_est} (just below electoral democracy threshold of 0.42)") w(f" - Conversion: L_VDem = LDI x 100 = **{L_from_VDem:.1f}**") w(f" - Rationale: V-Dem category boundaries map approximately to L scale") w() # TCF Democracy Meter 2025: 57/100 tcf_score = 57 # TCF is 0-100, appears to already be on similar scale # TCF calibration: TCF 57 for US. TCF 100 = full democracy ≈ L=95 # L_TCF = TCF * 0.95 or closer to 1:1 since TCF is explicitly 0-100 L_from_TCF = tcf_score * 0.95 w(f"**TCF Democracy Meter 2025:** Score = {tcf_score}/100") w(f" - Conversion: L_TCF = {tcf_score} x 0.95 = **{L_from_TCF:.1f}**") w() # Cross-index consensus cross_index_Ls = [L_from_FH, L_from_VDem, L_from_TCF] cross_mean = mean(cross_index_Ls) cross_min = min(cross_index_Ls) cross_max = max(cross_index_Ls) cross_range = cross_max - cross_min thesis_L = 48 deviation = thesis_L - cross_mean w(f"### Cross-Index Summary") w() w(f"| Index | Raw Score | Converted L |") w(f"|-------|-----------|-------------|") w(f"| Freedom House 2024 | {fh_score}/100 | {L_from_FH:.1f} |") w(f"| V-Dem 2025 (est.) | LDI {vdem_ldi_est} | {L_from_VDem:.1f} |") w(f"| TCF Democracy Meter 2025 | {tcf_score}/100 | {L_from_TCF:.1f} |") w(f"| **Cross-Index Mean** | | **{cross_mean:.1f}** |") w(f"| **Thesis L (2025)** | | **{thesis_L}** |") w() w(f"- Cross-index range: {cross_min:.1f} to {cross_max:.1f} (span = {cross_range:.1f} points)") w(f"- Cross-index mean: {cross_mean:.1f}") w(f"- Thesis L=48 deviation from cross-index mean: **{deviation:+.1f} points**") w() if abs(deviation) < 10: w(f"**VERDICT:** L=48 is within {abs(deviation):.1f} points of cross-index consensus -- **broadly consistent** given index methodology differences.") elif abs(deviation) < 20: w(f"**VERDICT:** L=48 is {abs(deviation):.1f} points below cross-index consensus -- **moderately aggressive** but within the range spanned by V-Dem's reclassification.") else: w(f"**VERDICT:** L=48 is {abs(deviation):.1f} points from cross-index consensus -- **significantly divergent** from external benchmarks.") w() # Note: V-Dem's own assessment (L~41) is actually *below* thesis L=48 w(f"**Key observation:** V-Dem's reclassification implies L ~ {L_from_VDem:.0f}, which is *below* the thesis L=48.") w(f"The thesis value sits between Freedom House ({L_from_FH:.0f}) and V-Dem ({L_from_VDem:.0f}), closer to TCF ({L_from_TCF:.0f}).") w() # ---------------------------------------------------------- # T4: Sensitivity Propagation Analysis # ---------------------------------------------------------- w("## T4: Sensitivity Propagation Analysis") w() L_values = [38, 43, 48, 53, 58, 65, 70, 77, 84] # Historical base rate computation: what % of countries at L recovered above 80 within 15 years? def compute_recovery_rate(L_target, window=5, recovery_threshold=80, recovery_years=15): """For countries that were at L within +/-window of L_target, what fraction recovered above recovery_threshold within recovery_years?""" # Build country time series country_series = defaultdict(list) for d in DATA: country_series[d["country"]].append((d["year"], d["liberty"])) episodes = 0 recoveries = 0 for country, series in country_series.items(): series.sort() for i, (yr, lib) in enumerate(series): if abs(lib - L_target) <= window: # Check if recovered within recovery_years for j in range(i + 1, len(series)): yr_j, lib_j = series[j] if yr_j - yr <= recovery_years: if lib_j >= recovery_threshold: recoveries += 1 break else: break episodes += 1 return recoveries, episodes # Key thesis conclusions at L=48 to check which flip def thesis_conclusions(L): """Return dict of key thesis conclusions and whether they hold at this L.""" return { "Below Event Horizon (L<52-55)": L < 53.5, # Canonical EH range: L≈52-55 "Classified as autocracy (L<40)": L < 40, "In democracy basin (L>70)": L > 70, "AR(1) predicts further decline": ar1_predict(L) < L, "Liberty-yield > 15% (high risk premium)": liberty_yield(L) > 15, "Status: NOT FREE (L<40)": L < 40, "Status: PARTLY FREE (40<=L<70)": 40 <= L < 70, "Status: FREE (L>=70)": L >= 70, } # Reference conclusions at L=48 ref_conclusions = thesis_conclusions(48) w("### Full Sensitivity Table") w() # Header w("| L | Stage | Below EH? | AR(1) L(t+1) | Recovery Rate (15yr) | Velocity from L=94 | Liberty-Yield | Conclusions Flipped vs L=48 |") w("|---|-------|-----------|--------------|----------------------|---------------------|---------------|----------------------------|") for L in L_values: stage = get_stage(L) eh = "YES" if below_event_horizon(L) else "NO" ar1 = ar1_predict(L) # Recovery rate recs, eps = compute_recovery_rate(L, window=5) rate_str = f"{recs}/{eps} ({100*recs/eps:.1f}%)" if eps > 0 else "N/A" # Velocity from L=94 in 2015 to L in 2025 (10 years) velocity = (L - 94) / 10 # Liberty yield ly = liberty_yield(L) # Conclusions that flip current = thesis_conclusions(L) flipped = [k for k in ref_conclusions if current[k] != ref_conclusions[k]] flip_str = "; ".join(flipped) if flipped else "None" w(f"| {L} | {stage} | {eh} | {ar1:.1f} | {rate_str} | {velocity:+.1f}/yr | {ly:.1f}% | {flip_str} |") w() # Detailed narrative for key L values w("### Detailed Sensitivity Narratives") w() for L in L_values: stage = get_stage(L) eh = below_event_horizon(L) ar1 = ar1_predict(L) ar1_direction = "further decline" if ar1 < L else "recovery" velocity = (L - 94) / 10 ly = liberty_yield(L) recs, eps = compute_recovery_rate(L, window=5) rate = (100 * recs / eps) if eps > 0 else 0 w(f"**L = {L}:**") w(f"- Stage: {stage}") w(f"- Event Horizon: {'BELOW (trapped)' if eh else 'ABOVE (escape possible)'}") w(f"- AR(1) next-year prediction: {ar1:.1f} ({'declining' if ar1 < L else 'recovering'})") w(f"- Historical recovery to L>80 within 15yr: {recs}/{eps} episodes ({rate:.1f}%)") w(f"- Required velocity from 2015 (L=94): {velocity:+.1f} pts/year") w(f"- Liberty-yield (risk premium): {ly:.1f}%") w() # ---------------------------------------------------------- # T5: Historical US Velocity Context # ---------------------------------------------------------- w("## T5: Historical US Velocity Context") w() # Compute all 10-year declines (or closest to 10 years) country_series = defaultdict(list) for d in DATA: country_series[d["country"]].append((d["year"], d["liberty"])) for c in country_series: country_series[c].sort() declines_10yr = [] # (country, year_start, year_end, L_start, L_end, decline, years) for country, series in country_series.items(): for i in range(len(series)): yr_i, L_i = series[i] for j in range(i + 1, len(series)): yr_j, L_j = series[j] span = yr_j - yr_i if 8 <= span <= 12: # "exactly 10 years or closest" decline = L_i - L_j if decline > 0: # only actual declines declines_10yr.append((country, yr_i, yr_j, L_i, L_j, decline, span)) # Sort by decline magnitude (largest first) declines_10yr.sort(key=lambda x: -x[5]) w(f"Total 10-year decline episodes found (8-12 year windows): **{len(declines_10yr)}**") w() # Top 20 w("### Top 20 Fastest 10-Year Declines") w() w("| Rank | Country | Period | L_start | L_end | Decline | Years |") w("|------|---------|--------|---------|-------|---------|-------|") for rank, (country, y1, y2, L1, L2, dec, span) in enumerate(declines_10yr[:20], 1): w(f"| {rank} | {country} | {y1}-{y2} | {L1:.0f} | {L2:.0f} | {dec:.0f} | {span} |") w() # Where does US 94->48 rank? us_decline_46 = 46 # 94 - 48 rank_46 = sum(1 for d in declines_10yr if d[5] >= us_decline_46) total_declines = len(declines_10yr) w(f"### US Decline Rankings") w() w(f"**94 -> 48 (decline = 46 points, 2015-2025):**") w(f"- Rank: **{rank_46}** out of {total_declines} episodes") w(f"- Percentile: top {100 * rank_46 / total_declines:.1f}% of all 10-year declines") w() us_decline_17 = 17 # 94 - 77 rank_17 = sum(1 for d in declines_10yr if d[5] >= us_decline_17) w(f"**94 -> 77 (decline = 17 points):**") w(f"- Rank: **{rank_17}** out of {total_declines} episodes") w(f"- Percentile: top {100 * rank_17 / total_declines:.1f}%") w() us_decline_29 = 29 # 94 - 65 rank_29 = sum(1 for d in declines_10yr if d[5] >= us_decline_29) w(f"**94 -> 65 (decline = 29 points):**") w(f"- Rank: **{rank_29}** out of {total_declines} episodes") w(f"- Percentile: top {100 * rank_29 / total_declines:.1f}%") w() # Context: what are comparable declines? w("### Context: Countries with similar 10-year declines to US 94->48 (decline >= 40)") w() big_declines = [(c, y1, y2, L1, L2, d, s) for c, y1, y2, L1, L2, d, s in declines_10yr if d >= 40] if big_declines: w("| Country | Period | L_start | L_end | Decline | Context |") w("|---------|--------|---------|-------|---------|---------|") for country, y1, y2, L1, L2, dec, span in big_declines: context = "" if "Afghanistan" in country: context = "Soviet invasion / Taliban" elif "Iraq" in country: context = "Saddam consolidation / invasion" elif "Chile" in country: context = "Pinochet coup" elif "Iran" in country: context = "Islamic Revolution" elif "Venezuela" in country: context = "Chavez consolidation" elif "Russia" in country: context = "Putin consolidation" elif "Myanmar" in country or "Burma" in country: context = "Military coup" elif "Hungary" in country: context = "Orban consolidation" elif "Nicaragua" in country: context = "Ortega consolidation" elif "United States" in country: context = "Per thesis scoring" else: context = "Regime change" w(f"| {country} | {y1}-{y2} | {L1:.0f} | {L2:.0f} | {dec:.0f} | {context} |") w() else: w("No 10-year declines >= 40 points found.") w() # Also find the actual US data point pair closest to 10 years from L=94 us_series = [(d["year"], d["liberty"]) for d in US_DATA] w("### US Liberty Score Trajectory (full history)") w() w("| Year | L | Stage | Event Horizon (L≈52-55) |") w("|------|---|-------|--------------------------|") for yr, lib in us_series: w(f"| {yr} | {lib:.0f} | {get_stage(lib)} | {'BELOW' if lib < 53.5 else 'IN RANGE' if lib < 55 else 'ABOVE'} |") w() # Compute actual 10-year US change from 2015 us_2015 = None us_2025 = None for yr, lib in us_series: if yr == 2015: us_2015 = lib if yr == 2025: us_2025 = lib if us_2015 and us_2025: actual_decline = us_2015 - us_2025 w(f"**Actual US 10-year change (2015-2025):** {us_2015:.0f} -> {us_2025:.0f} = **-{actual_decline:.0f} points**") # But from peak us_2010 = None for yr, lib in us_series: if yr == 2010: us_2010 = lib if us_2010: w(f"**From peak (2010, L={us_2010:.0f}) to 2025:** {us_2010:.0f} -> {us_2025:.0f} = **-{us_2010 - us_2025:.0f} points** over 15 years") w() # ============================================================ # RP-04: TYRANNY VALIDATION # ============================================================ w("---") w() w("# RP-04: TYRANNY VALIDATION") w() # ---------------------------------------------------------- # T1: Error Propagation Analysis (Monte Carlo) # ---------------------------------------------------------- w("## T1: Error Propagation Analysis (Monte Carlo, 10,000 draws)") w() # Get most recent observation per country latest_by_country = {} for d in DATA: c = d["country"] if c not in latest_by_country or d["year"] > latest_by_country[c]["year"]: latest_by_country[c] = d N_MC = 10000 mc_results = {} # country -> {mean_T, std_T, ci_lo, ci_hi, original_T} countries_high_uncertainty = 0 countries_reclassified = 0 T_THRESHOLD = 50 for country, obs in sorted(latest_by_country.items()): L_orig = obs["liberty"] C_orig = obs["chaos"] T_orig = obs["tyranny"] T_samples = [] for _ in range(N_MC): L_noisy = L_orig + random.uniform(-5, 5) C_noisy = C_orig + random.uniform(-5, 5) T_noisy = 100.0 - L_noisy - C_noisy T_noisy = max(0, min(100, T_noisy)) T_samples.append(T_noisy) m = mean(T_samples) s = stdev(T_samples) ci_lo = percentile(T_samples, 5) ci_hi = percentile(T_samples, 95) mc_results[country] = { "mean_T": m, "std_T": s, "ci_lo": ci_lo, "ci_hi": ci_hi, "original_T": T_orig, "L_orig": L_orig, "C_orig": C_orig, } if s > 12: countries_high_uncertainty += 1 # Check reclassification: original side of T=50 vs possibility of crossing orig_above = T_orig > T_THRESHOLD # If CI spans the threshold, reclassification is possible if (ci_lo < T_THRESHOLD < ci_hi): countries_reclassified += 1 w(f"**Countries analyzed:** {len(mc_results)}") w(f"**Monte Carlo draws per country:** {N_MC}") w(f"**Error injection:** L += U(-5,+5), C += U(-5,+5)") w() w(f"**Countries with T uncertainty (std) > 12 points:** {countries_high_uncertainty}") w(f"**Countries where T could cross T=50 threshold (90% CI spans 50):** {countries_reclassified}") w() # Show sample of results - top 20 by uncertainty w("### Countries with highest T uncertainty (top 20 by std)") w() w("| Country | L | C | T_orig | T_mean | T_std | 90% CI |") w("|---------|---|---|--------|--------|-------|--------|") sorted_by_std = sorted(mc_results.items(), key=lambda x: -x[1]["std_T"]) for country, r in sorted_by_std[:20]: w(f"| {country} | {r['L_orig']:.0f} | {r['C_orig']:.0f} | {r['original_T']:.0f} | {r['mean_T']:.1f} | {r['std_T']:.1f} | [{r['ci_lo']:.1f}, {r['ci_hi']:.1f}] |") w() # Countries near threshold that could be reclassified w("### Countries vulnerable to reclassification around T=50") w() reclassifiable = [(c, r) for c, r in mc_results.items() if r["ci_lo"] < T_THRESHOLD < r["ci_hi"]] reclassifiable.sort(key=lambda x: abs(x[1]["original_T"] - 50)) if reclassifiable: w("| Country | T_orig | T_mean | 90% CI | Distance from 50 |") w("|---------|--------|--------|--------|-------------------|") for country, r in reclassifiable[:20]: dist = abs(r["original_T"] - 50) w(f"| {country} | {r['original_T']:.0f} | {r['mean_T']:.1f} | [{r['ci_lo']:.1f}, {r['ci_hi']:.1f}] | {dist:.0f} |") w() # Theoretical analysis of error propagation w("### Theoretical Error Propagation") w() w("Since T = 100 - L - C, and errors are U(-5,+5) on both L and C:") w("- Var(L_noise) = Var(U(-5,5)) = (10)^2/12 = 8.33") w("- Var(C_noise) = 8.33") w("- T_noisy = 100 - (L+eL) - (C+eC) = T - eL - eC") w("- Var(T_noisy) = Var(eL) + Var(eC) = 16.67 (assuming independence)") w(f"- Theoretical std(T_noisy) = sqrt(16.67) = **{math.sqrt(16.67):.2f}**") w("- This is constant across all countries (error does not depend on T level)") w() actual_stds = [r["std_T"] for r in mc_results.values()] w(f"- Empirical mean std(T) across countries: **{mean(actual_stds):.2f}** (should be ~4.08)") w(f"- Empirical std of std(T): {stdev(actual_stds):.3f}") w() w(f"**VERDICT:** T uncertainty from +/-5 measurement error is ~{mean(actual_stds):.1f} points (moderate). " f"Only {countries_reclassified} countries have 90% CIs spanning the T=50 autocracy threshold. " f"{'Zero' if countries_high_uncertainty == 0 else str(countries_high_uncertainty)} countries have std > 12.") w() # ---------------------------------------------------------- # T2: Internal Consistency Check # ---------------------------------------------------------- w("## T2: Internal Consistency Check") w() # Verify L + T + C = 100 sums = [] violations = 0 for d in DATA: s = d["liberty"] + d["tyranny"] + d["chaos"] sums.append(s) if abs(s - 100) > 0.01: violations += 1 w(f"**Constraint: L + T + C = 100**") w(f"- Observations checked: {len(DATA)}") w(f"- Violations (|sum - 100| > 0.01): **{violations}**") w(f"- Sum range: [{min(sums):.2f}, {max(sums):.2f}]") w(f"- Mean sum: {mean(sums):.4f}") w() # Correlations all_L = [d["liberty"] for d in DATA] all_T = [d["tyranny"] for d in DATA] all_C = [d["chaos"] for d in DATA] r_LT = correlation(all_L, all_T) r_LC = correlation(all_L, all_C) r_TC = correlation(all_T, all_C) w(f"**Pairwise Correlations (n={len(DATA)}):**") w() w(f"| Pair | Pearson r | Interpretation |") w(f"|------|-----------|---------------|") w(f"| L vs T | {r_LT:.4f} | {'Near-perfect anti-correlation' if r_LT < -0.90 else 'Strong negative' if r_LT < -0.70 else 'Moderate negative' if r_LT < -0.40 else 'Weak'} |") w(f"| L vs C | {r_LC:.4f} | {'Strong negative' if r_LC < -0.70 else 'Moderate negative' if r_LC < -0.40 else 'Weak negative' if r_LC < 0 else 'Positive'} |") w(f"| T vs C | {r_TC:.4f} | {'Strong negative' if r_TC < -0.70 else 'Moderate negative' if r_TC < -0.40 else 'Weak negative' if r_TC < 0 else 'Positive'} |") w() # Assess whether system is effectively bivariate w(f"**Is the system effectively bivariate?**") w() if r_LT < -0.90: w(f"- r(L,T) = {r_LT:.4f} < -0.90: **YES, T is near-redundant with 100-L**") w(f" T adds almost no information beyond what L already provides.") w(f" The ternary system is effectively a bivariate L-C system.") elif r_LT < -0.80: w(f"- r(L,T) = {r_LT:.4f}: **PARTIALLY** - T is highly predictable from L but C introduces some independent variation.") else: w(f"- r(L,T) = {r_LT:.4f}: **NO** - C provides enough independent variation that T is not simply 100-L.") w() # Check C variance w(f"**Chaos (C) distribution summary:**") w(f"- Mean(C) = {mean(all_C):.2f}") w(f"- Std(C) = {stdev(all_C):.2f}") w(f"- Range: [{min(all_C):.0f}, {max(all_C):.0f}]") w(f"- If C has low variance, then T ≈ 100 - L and system is bivariate.") w() # ---------------------------------------------------------- # T3: Classification Concordance # ---------------------------------------------------------- w("## T3: Classification Concordance") w() # Use most recent observation per country latest_obs = list(latest_by_country.values()) autocracy_by_L = [d["liberty"] < 40 for d in latest_obs] autocracy_by_T = [d["tyranny"] > 50 for d in latest_obs] n_agree = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a == b) n_disagree = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a != b) n_total = len(latest_obs) kappa = cohens_kappa(autocracy_by_L, autocracy_by_T) w(f"**Classification definitions:**") w(f"- Definition A (Liberty-based): Autocracy if L < 40") w(f"- Definition B (Tyranny-based): Autocracy if T > 50") w() w(f"**Results (most recent observation per country, n={n_total}):**") w(f"- Agree: {n_agree} ({100*n_agree/n_total:.1f}%)") w(f"- Disagree: {n_disagree} ({100*n_disagree/n_total:.1f}%)") w(f"- Cohen's Kappa: **{kappa:.4f}**") w() # Contingency table tp = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a and b) tn = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if not a and not b) fp = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if not a and b) fn = sum(1 for a, b in zip(autocracy_by_L, autocracy_by_T) if a and not b) w(f"**Contingency Table:**") w(f"```") w(f" T > 50 (autocracy) T <= 50 (not)") w(f"L < 40 (autoc) {tp:3d} {fn:3d}") w(f"L >= 40 (not) {fp:3d} {tn:3d}") w(f"```") w() # Interpretation if kappa > 0.80: kappa_interp = "Almost perfect agreement" elif kappa > 0.60: kappa_interp = "Substantial agreement" elif kappa > 0.40: kappa_interp = "Moderate agreement" elif kappa > 0.20: kappa_interp = "Fair agreement" else: kappa_interp = "Slight/poor agreement" w(f"**Kappa interpretation:** {kappa_interp}") w() # Show disagreement countries w("### Countries classified differently by L<40 vs T>50") w() disagree_countries = [] for d in latest_obs: a = d["liberty"] < 40 b = d["tyranny"] > 50 if a != b: disagree_countries.append(d) if disagree_countries: disagree_countries.sort(key=lambda x: x["country"]) w("| Country | L | T | C | L<40? | T>50? |") w("|---------|---|---|---|-------|-------|") for d in disagree_countries: w(f"| {d['country']} | {d['liberty']:.0f} | {d['tyranny']:.0f} | {d['chaos']:.0f} | {'Yes' if d['liberty'] < 40 else 'No'} | {'Yes' if d['tyranny'] > 50 else 'No'} |") w() else: w("No countries classified differently.") w() # ---------------------------------------------------------- # T4: Do Key Findings Survive Alternative T? # ---------------------------------------------------------- w("## T4: Alternative T Definition -- Do Key Findings Survive?") w() w("**Original:** T = 100 - L - C") w("**Alternative:** T_alt = 100 - L (ignoring chaos entirely)") w() # Attractor basin analysis with original T # Democracy basin: L > 70, Autocracy basin: L < 30 # These are L-based, so T definition doesn't change them! # But let's check distribution shape # Get latest L for each country latest_Ls = [d["liberty"] for d in latest_obs] # Bimodal distribution check # Bin L values into 10-point bins bins = list(range(0, 101, 10)) bin_counts = [0] * len(bins) for L in latest_Ls: for i in range(len(bins) - 1): if bins[i] <= L < bins[i + 1]: bin_counts[i] += 1 break if L == 100: bin_counts[-2] += 1 # put 100 in 90-100 bin w("### L Distribution (latest observation per country)") w() w("| L Range | Count (T=100-L-C) | Count (T_alt=100-L) |") w("|---------|-------------------|---------------------|") for i in range(len(bins) - 1): w(f"| {bins[i]:2d}-{bins[i+1]:2d} | {bin_counts[i]:3d} | {bin_counts[i]:3d} |") w() w("*Note: Since attractor basins are defined by L thresholds (L>70 = democracy, L<30 = autocracy),") w("the T definition does NOT affect basin membership. The bimodal structure is preserved identically.*") w() # But check: does T_alt vs T_orig change the dynamics? # Compute T and T_alt for all observations all_T_orig = [d["tyranny"] for d in DATA] all_T_alt = [100 - d["liberty"] for d in DATA] r_T_Talt = correlation(all_T_orig, all_T_alt) w(f"**Correlation between T and T_alt:** r = {r_T_Talt:.4f}") w() # Count democracy / autocracy basin populations n_demo_basin = sum(1 for L in latest_Ls if L > 70) n_auto_basin = sum(1 for L in latest_Ls if L < 30) n_middle = sum(1 for L in latest_Ls if 30 <= L <= 70) w(f"**Basin populations (latest obs):**") w(f"- Democracy basin (L > 70): {n_demo_basin} countries") w(f"- Autocracy basin (L < 30): {n_auto_basin} countries") w(f"- Middle zone (30-70): {n_middle} countries") w() # Check if bimodal by looking at bin structure demo_mass = sum(bin_counts[7:]) # 70+ auto_mass = sum(bin_counts[:3]) # 0-29 middle_mass = sum(bin_counts[3:7]) # 30-69 w(f"**Bimodality test:**") w(f"- Upper mode (L >= 70): {demo_mass} countries ({100*demo_mass/len(latest_Ls):.1f}%)") w(f"- Lower mode (L < 30): {auto_mass} countries ({100*auto_mass/len(latest_Ls):.1f}%)") w(f"- Valley (30-69): {middle_mass} countries ({100*middle_mass/len(latest_Ls):.1f}%)") w() if demo_mass > middle_mass and auto_mass > 0: w("**VERDICT:** Bimodal structure IS preserved regardless of T definition, because basins are L-defined.") else: w("**VERDICT:** Distribution does not show strong bimodality.") w(f"Using T_alt = 100-L changes T values but **does not alter** any L-based findings (stages, event horizon, basins).") w(f"Since r(T, T_alt) = {r_T_Talt:.4f}, the alternative T is {'nearly identical' if r_T_Talt > 0.95 else 'similar but distinct' if r_T_Talt > 0.80 else 'meaningfully different'}.") w() # ---------------------------------------------------------- # T5: Chaos Distribution Analysis # ---------------------------------------------------------- w("## T5: Chaos Distribution Analysis") w() all_C = [d["chaos"] for d in DATA] c_mean = mean(all_C) c_std = stdev(all_C) c_min = min(all_C) c_max = max(all_C) c_skew = skewness(all_C) c_median = percentile(all_C, 50) c_iqr_lo = percentile(all_C, 25) c_iqr_hi = percentile(all_C, 75) w(f"**Chaos (C) Distribution Statistics (n={len(all_C)}):**") w() w(f"| Statistic | Value |") w(f"|-----------|-------|") w(f"| Mean | {c_mean:.2f} |") w(f"| Std Dev | {c_std:.2f} |") w(f"| Min | {c_min:.0f} |") w(f"| Max | {c_max:.0f} |") w(f"| Median | {c_median:.1f} |") w(f"| IQR | [{c_iqr_lo:.1f}, {c_iqr_hi:.1f}] |") w(f"| Skewness | {c_skew:.3f} |") w() # Histogram of C c_bins = list(range(0, 81, 5)) c_hist = [0] * len(c_bins) for c_val in all_C: for i in range(len(c_bins) - 1): if c_bins[i] <= c_val < c_bins[i + 1]: c_hist[i] += 1 break if c_val >= c_bins[-1]: c_hist[-1] += 1 w("### Chaos Histogram") w() w("| C Range | Count | Bar |") w("|---------|-------|-----|") max_count = max(c_hist) if max(c_hist) > 0 else 1 for i in range(len(c_bins) - 1): bar = "#" * int(40 * c_hist[i] / max_count) w(f"| {c_bins[i]:2d}-{c_bins[i+1]:2d} | {c_hist[i]:4d} | {bar} |") if c_hist[-1] > 0: bar = "#" * int(40 * c_hist[-1] / max_count) w(f"| {c_bins[-1]:2d}+ | {c_hist[-1]:4d} | {bar} |") w() # Check if C is near-constant (low variance) w("### Is the system effectively bivariate?") w() cv = c_std / c_mean if c_mean > 0 else 0 # coefficient of variation w(f"- Coefficient of variation (CV) = std/mean = {cv:.3f}") w(f"- If CV < 0.3, C is relatively stable -> system is approximately bivariate") w(f"- CV = {cv:.3f}: C is {'relatively stable' if cv < 0.3 else 'moderately variable' if cv < 0.6 else 'highly variable'}") w() # Partial correlation of T with L, controlling for... # Since T = 100-L-C, the partial correlation of T with some external Y controlling for L # would capture only the C component. We don't have external Y, but we can compute # the partial correlation of T and C controlling for L. # Partial corr(T,C|L) = (r_TC - r_TL*r_CL) / sqrt((1-r_TL^2)*(1-r_CL^2)) r_TL = correlation(all_T, all_L) r_TC_val = correlation(all_T, all_C) r_CL = correlation(all_C, all_L) denom = math.sqrt(max(0.0001, (1 - r_TL**2) * (1 - r_CL**2))) partial_r_TC_given_L = (r_TC_val - r_TL * r_CL) / denom w(f"**Partial correlation r(T, C | L) = {partial_r_TC_given_L:.4f}**") w(f"- This captures how much T varies with C *after* removing L's influence") w(f"- Since T = 100-L-C by construction, partial_r(T,C|L) should be strongly negative") w(f" (higher C means lower T, given fixed L)") w() # What fraction of T variance is explained by L alone? r2_TL = r_TL ** 2 w(f"**R-squared of T ~ L: {r2_TL:.4f}**") w(f"- L alone explains {100*r2_TL:.1f}% of T variance") w(f"- Remaining {100*(1-r2_TL):.1f}% is attributable to C variation") w() if r2_TL > 0.90: verdict = "T is >90% determined by L. The ternary system is EFFECTIVELY BIVARIATE. T adds negligible independent information." elif r2_TL > 0.75: verdict = f"T is {100*r2_TL:.0f}% determined by L. C contributes meaningfully but the system is predominantly bivariate." else: verdict = f"T is only {100*r2_TL:.0f}% determined by L. C contributes substantial independent variation. The ternary system is genuinely trivariate." w(f"**VERDICT:** {verdict}") w() # ============================================================ # FINAL SYNTHESIS # ============================================================ w("---") w() w("# SYNTHESIS: Key Findings") w() w("## RP-02 Summary") w() w(f"1. **Cross-index consensus** places US at L={cross_mean:.0f} (range {cross_min:.0f}-{cross_max:.0f}). " f"The thesis L=48 sits within this range, between FH ({L_from_FH:.0f}) and V-Dem ({L_from_VDem:.0f}).") w() w(f"2. **Sensitivity analysis** shows the event horizon (L≈52-55) is the critical threshold. " f"At L=48, the US is below it; at L=65+, it is above. " f"The AR(1) model predicts {'continued decline' if ar1_predict(48) < 48 else 'slight recovery'} from L=48 " f"(next-year prediction: {ar1_predict(48):.1f}).") w() # Find the actual rank for the narrative w(f"3. **Historical velocity context:** A 46-point decline in 10 years (94->48) ranks " f"#{rank_46} out of {total_declines} episodes. " f"{'This is historically unprecedented for an established democracy.' if rank_46 <= 10 else 'This is historically rare but not unprecedented.'}") w() w("## RP-04 Summary") w() w(f"4. **Error propagation:** +/-5 point measurement error produces ~{mean(actual_stds):.1f} point T uncertainty. " f"{countries_reclassified} countries have CIs spanning the T=50 autocracy threshold.") w() w(f"5. **Internal consistency:** L+T+C=100 holds exactly ({violations} violations). " f"r(L,T)={r_LT:.3f} -- {'T is nearly redundant with L' if r_LT < -0.90 else 'T captures some independent variation from C'}.") w() w(f"6. **Classification concordance:** L<40 vs T>50 agree on {100*n_agree/n_total:.0f}% of countries " f"(kappa={kappa:.3f}, {kappa_interp.lower()}).") w() w(f"7. **Alternative T (ignoring C):** r(T, T_alt)={r_T_Talt:.3f}. " f"Bimodal attractor structure is {'preserved' if r_T_Talt > 0.90 else 'altered'} because basins are L-defined.") w() w(f"8. **Chaos distribution:** C has mean={c_mean:.1f}, std={c_std:.1f}, range [{c_min:.0f},{c_max:.0f}]. " f"L explains {100*r2_TL:.0f}% of T variance. " f"{'The system is effectively bivariate.' if r2_TL > 0.85 else 'C contributes meaningful independent variation.'}") w() # ============================================================ # WRITE OUTPUT # ============================================================ output = "\n".join(OUT) with open("/tmp/pt-data/rp02-rp04-results.md", "w") as f: f.write(output) print("\n" + "=" * 60) print("RESULTS WRITTEN TO: /tmp/pt-data/rp02-rp04-results.md") print("=" * 60) print() print(output)