#!/usr/bin/env python3
"""
Entropy-based observable-diameter estimator on complex projective space CP^n.

Interpretation
--------------
We identify CP^n with the projective pure-state space of C^(n+1).  To define
an entanglement entropy observable we choose a factorization

    n + 1 = d_A * d_B,

so the projective space is CP^(d_A d_B - 1).  For a projective point [psi],
represented by a unit vector psi in C^(d_A d_B), define the observable

    S_A([psi]) = -Tr(rho_A log_2 rho_A),
    rho_A = Tr_B |psi><psi|.

The true observable diameter ObsDiam(X; -kappa) is the supremum over all
1-Lipschitz observables.  This script only uses the von Neumann entropy
observable, so it reports:

1) the partial diameter of the push-forward entropy distribution,
2) an optional Lipschitz-normalized proxy obtained by dividing by an empirical
   Lipschitz constant estimated with the Fubini-Study metric.

Hence the output is best interpreted as an entropy-based observable-diameter
proxy, not as the exact observable diameter of CP^n.

Hayden-inspired comparison
--------------------------
Hayden/Leung/Winter show that the entanglement entropy of a Haar-random pure
state is highly concentrated in high dimension.  The script overlays two
useful theoretical guides:

- a one-sided lower-tail cutoff derived from the standard Hayden bound,
- a Levy/Hayden scaling width of order (log d_A)/sqrt(d_A d_B), centered at
  the empirical median, to visualize concentration-of-measure decay.

Sampling method
---------------
A Haar-random pure state on C^(d_A d_B) can be generated by normalizing a
complex Gaussian vector.  Equivalently, we sample a complex Gaussian matrix
G in C^(d_A x d_B); then vec(G)/||G|| is Haar-random and
rho_A = G G^* / Tr(G G^*).

Outputs
-------
The script writes:
- a CSV summary table,
- per-system entropy histograms,
- a concentration summary plot across dimensions,
- a normalized observable-proxy plot if Lipschitz estimation is enabled,
- a tail plot for the largest system.
"""

from __future__ import annotations

import argparse
import csv
import math
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, List, Sequence, Tuple

import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm

# A commonly used explicit constant in expositions of Hayden's concentration
# bound in natural logs.  We keep the entropy in bits, in which the same
# constant remains after the base conversion in the exponent.
HAYDEN_C = 1.0 / (8.0 * math.pi ** 2)


def parse_dims(spec: str) -> List[Tuple[int, int]]:
    dims: List[Tuple[int, int]] = []
    for item in spec.split(","):
        token = item.strip().lower()
        if not token:
            continue
        if "x" not in token:
            raise ValueError(f"Bad dimension token '{item}'. Use forms like 4x8,8x16.")
        a_str, b_str = token.split("x", 1)
        d_a = int(a_str)
        d_b = int(b_str)
        if d_a <= 1 or d_b <= 1:
            raise ValueError("Both subsystem dimensions must be >= 2.")
        if d_a > d_b:
            d_a, d_b = d_b, d_a
        dims.append((d_a, d_b))
    if not dims:
        raise ValueError("No dimensions were parsed.")
    return dims


def haar_matrix(d_a: int, d_b: int, rng: np.random.Generator) -> np.ndarray:
    real = rng.normal(size=(d_a, d_b))
    imag = rng.normal(size=(d_a, d_b))
    return (real + 1j * imag) / math.sqrt(2.0)


def reduced_density_from_matrix(g: np.ndarray) -> np.ndarray:
    rho = g @ g.conj().T
    tr = float(np.trace(rho).real)
    rho /= tr
    return rho


def entropy_bits_from_rho(rho: np.ndarray, tol: float = 1e-14) -> float:
    eigvals = np.linalg.eigvalsh(rho)
    eigvals = np.clip(eigvals.real, 0.0, 1.0)
    eigvals = eigvals[eigvals > tol]
    if eigvals.size == 0:
        return 0.0
    return float(-np.sum(eigvals * np.log2(eigvals)))


def random_state_and_entropy(
    d_a: int, d_b: int, rng: np.random.Generator
) -> Tuple[np.ndarray, float]:
    g = haar_matrix(d_a, d_b, rng)
    rho_a = reduced_density_from_matrix(g)
    entropy_bits = entropy_bits_from_rho(rho_a)
    psi = g.reshape(-1)
    psi /= np.linalg.norm(psi)
    return psi, entropy_bits


def partial_diameter(samples: np.ndarray, mass: float) -> Tuple[float, float, float]:
    if not 0.0 < mass <= 1.0:
        raise ValueError("mass must lie in (0, 1].")
    x = np.sort(np.asarray(samples, dtype=float))
    n = x.size
    if n == 0:
        raise ValueError("samples must be non-empty")
    if n == 1:
        return 0.0, float(x[0]), float(x[0])
    m = int(math.ceil(mass * n))
    if m <= 1:
        return 0.0, float(x[0]), float(x[0])
    widths = x[m - 1 :] - x[: n - m + 1]
    idx = int(np.argmin(widths))
    left = float(x[idx])
    right = float(x[idx + m - 1])
    return float(right - left), left, right


def fubini_study_distance(psi: np.ndarray, phi: np.ndarray) -> float:
    overlap = abs(np.vdot(psi, phi))
    overlap = min(1.0, max(0.0, float(overlap)))
    return float(math.acos(overlap))


def empirical_lipschitz_constant(
    states: Sequence[np.ndarray],
    values: np.ndarray,
    rng: np.random.Generator,
    num_pairs: int,
) -> Tuple[float, float]:
    n = len(states)
    if n < 2 or num_pairs <= 0:
        return float("nan"), float("nan")
    ratios = []
    values = np.asarray(values, dtype=float)
    for _ in range(num_pairs):
        i = int(rng.integers(0, n))
        j = int(rng.integers(0, n - 1))
        if j >= i:
            j += 1
        d_fs = fubini_study_distance(states[i], states[j])
        if d_fs < 1e-12:
            continue
        ratio = abs(values[i] - values[j]) / d_fs
        ratios.append(ratio)
    if not ratios:
        return float("nan"), float("nan")
    arr = np.asarray(ratios, dtype=float)
    return float(np.max(arr)), float(np.quantile(arr, 0.99))


def hayden_mean_lower_bound_bits(d_a: int, d_b: int) -> float:
    return math.log2(d_a) - d_a / (2.0 * math.log(2.0) * d_b)


def hayden_beta_bits(d_a: int, d_b: int) -> float:
    return d_a / (math.log(2.0) * d_b)


def hayden_alpha_bits(d_a: int, d_b: int, kappa: float) -> float:
    dim = d_a * d_b
    return (math.log2(d_a) / math.sqrt(HAYDEN_C * (dim - 1.0))) * math.sqrt(math.log(1.0 / kappa))


def hayden_one_sided_width_bits(d_a: int, d_b: int, kappa: float) -> float:
    return hayden_beta_bits(d_a, d_b) + hayden_alpha_bits(d_a, d_b, kappa)


def hayden_lower_cutoff_bits(d_a: int, d_b: int, kappa: float) -> float:
    return math.log2(d_a) - hayden_one_sided_width_bits(d_a, d_b, kappa)


def levy_hayden_scaling_width_bits(d_a: int, d_b: int, kappa: float) -> float:
    dim = d_a * d_b
    half_width = (math.log2(d_a) / math.sqrt(HAYDEN_C * (dim - 1.0))) * math.sqrt(math.log(2.0 / kappa))
    return 2.0 * half_width


def hayden_deficit_tail_bound_bits(d_a: int, d_b: int, deficits_bits: np.ndarray) -> np.ndarray:
    beta = hayden_beta_bits(d_a, d_b)
    dim = d_a * d_b
    log_term = math.log2(d_a)
    shifted = np.maximum(np.asarray(deficits_bits, dtype=float) - beta, 0.0)
    exponent = -(dim - 1.0) * HAYDEN_C * (shifted ** 2) / (log_term ** 2)
    bound = np.exp(exponent)
    bound[deficits_bits <= beta] = 1.0
    return np.clip(bound, 0.0, 1.0)


def page_average_entropy_bits(d_a: int, d_b: int) -> float:
    # Exact Page formula in bits for d_b >= d_a.
    harmonic_tail = sum(1.0 / k for k in range(d_b + 1, d_a * d_b + 1))
    nats = harmonic_tail - (d_a - 1.0) / (2.0 * d_b)
    return nats / math.log(2.0)


@dataclass
class SystemResult:
    d_a: int
    d_b: int
    projective_dim: int
    num_samples: int
    kappa: float
    mass: float
    entropy_bits: np.ndarray
    partial_diameter_bits: float
    interval_left_bits: float
    interval_right_bits: float
    mean_bits: float
    median_bits: float
    std_bits: float
    page_average_bits: float
    hayden_mean_lower_bits: float
    hayden_cutoff_bits: float
    hayden_one_sided_width_bits: float
    levy_scaling_width_bits: float
    empirical_lipschitz_max: float
    empirical_lipschitz_q99: float
    normalized_proxy_max: float
    normalized_proxy_q99: float


def simulate_system(
    d_a: int,
    d_b: int,
    num_samples: int,
    kappa: float,
    rng: np.random.Generator,
    lipschitz_pairs: int,
) -> Tuple[SystemResult, List[np.ndarray]]:
    entropies = np.empty(num_samples, dtype=float)
    states: List[np.ndarray] = []
    for idx in tqdm(range(num_samples),desc=f"Simulating system for {d_a}x{d_b} with kappa={kappa}", unit="samples"):
        psi, s_bits = random_state_and_entropy(d_a, d_b, rng)
        entropies[idx] = s_bits
        states.append(psi)

    mass = 1.0 - kappa
    width, left, right = partial_diameter(entropies, mass)
    lip_max, lip_q99 = empirical_lipschitz_constant(states, entropies, rng, lipschitz_pairs)

    normalized_proxy_max = width / lip_max if lip_max == lip_max and lip_max > 0 else float("nan")
    normalized_proxy_q99 = width / lip_q99 if lip_q99 == lip_q99 and lip_q99 > 0 else float("nan")

    result = SystemResult(
        d_a=d_a,
        d_b=d_b,
        projective_dim=d_a * d_b - 1,
        num_samples=num_samples,
        kappa=kappa,
        mass=mass,
        entropy_bits=entropies,
        partial_diameter_bits=width,
        interval_left_bits=left,
        interval_right_bits=right,
        mean_bits=float(np.mean(entropies)),
        median_bits=float(np.median(entropies)),
        std_bits=float(np.std(entropies, ddof=1)) if num_samples > 1 else 0.0,
        page_average_bits=page_average_entropy_bits(d_a, d_b),
        hayden_mean_lower_bits=hayden_mean_lower_bound_bits(d_a, d_b),
        hayden_cutoff_bits=hayden_lower_cutoff_bits(d_a, d_b, kappa),
        hayden_one_sided_width_bits=hayden_one_sided_width_bits(d_a, d_b, kappa),
        levy_scaling_width_bits=levy_hayden_scaling_width_bits(d_a, d_b, kappa),
        empirical_lipschitz_max=lip_max,
        empirical_lipschitz_q99=lip_q99,
        normalized_proxy_max=normalized_proxy_max,
        normalized_proxy_q99=normalized_proxy_q99,
    )
    return result, states


def write_summary_csv(results: Sequence[SystemResult], out_path: Path) -> None:
    fieldnames = [
        "d_a",
        "d_b",
        "projective_dim",
        "num_samples",
        "kappa",
        "mass",
        "partial_diameter_bits",
        "interval_left_bits",
        "interval_right_bits",
        "mean_bits",
        "median_bits",
        "std_bits",
        "page_average_bits",
        "hayden_mean_lower_bits",
        "hayden_cutoff_bits",
        "hayden_one_sided_width_bits",
        "levy_scaling_width_bits",
        "empirical_lipschitz_max_bits_per_rad",
        "empirical_lipschitz_q99_bits_per_rad",
        "normalized_proxy_max_rad",
        "normalized_proxy_q99_rad",
    ]
    with out_path.open("w", newline="") as fh:
        writer = csv.DictWriter(fh, fieldnames=fieldnames)
        writer.writeheader()
        for r in results:
            writer.writerow(
                {
                    "d_a": r.d_a,
                    "d_b": r.d_b,
                    "projective_dim": r.projective_dim,
                    "num_samples": r.num_samples,
                    "kappa": r.kappa,
                    "mass": r.mass,
                    "partial_diameter_bits": r.partial_diameter_bits,
                    "interval_left_bits": r.interval_left_bits,
                    "interval_right_bits": r.interval_right_bits,
                    "mean_bits": r.mean_bits,
                    "median_bits": r.median_bits,
                    "std_bits": r.std_bits,
                    "page_average_bits": r.page_average_bits,
                    "hayden_mean_lower_bits": r.hayden_mean_lower_bits,
                    "hayden_cutoff_bits": r.hayden_cutoff_bits,
                    "hayden_one_sided_width_bits": r.hayden_one_sided_width_bits,
                    "levy_scaling_width_bits": r.levy_scaling_width_bits,
                    "empirical_lipschitz_max_bits_per_rad": r.empirical_lipschitz_max,
                    "empirical_lipschitz_q99_bits_per_rad": r.empirical_lipschitz_q99,
                    "normalized_proxy_max_rad": r.normalized_proxy_max,
                    "normalized_proxy_q99_rad": r.normalized_proxy_q99,
                }
            )


def plot_histogram(result: SystemResult, outdir: Path) -> Path:
    plt.figure(figsize=(8.5, 5.5))
    ent = result.entropy_bits
    plt.hist(ent, bins=40, density=True, alpha=0.75)
    plt.axvline(math.log2(result.d_a), linestyle="--", linewidth=2, label=r"$\log_2 d_A$")
    plt.axvline(result.mean_bits, linestyle="-.", linewidth=2, label="empirical mean")
    plt.axvline(result.page_average_bits, linestyle=":", linewidth=2, label="Page average")
    local_min = float(np.min(ent))
    local_max = float(np.max(ent))
    local_range = max(local_max - local_min, 1e-9)
    if result.hayden_cutoff_bits >= local_min - 0.15 * local_range:
        plt.axvline(result.hayden_cutoff_bits, linestyle="-", linewidth=2, label="Hayden cutoff")
    plt.axvspan(result.interval_left_bits, result.interval_right_bits, alpha=0.18, label=f"shortest {(result.mass):.0%} interval")
    plt.xlim(local_min - 0.12 * local_range, local_max + 0.35 * local_range)
    plt.xlabel("Entropy of entanglement S_A (bits)")
    plt.ylabel("Empirical density")
    plt.title(
        f"Entropy distribution on CP^{result.projective_dim} via C^{result.d_a} ⊗ C^{result.d_b}"
    )
    plt.legend(frameon=False)
    plt.tight_layout()
    out_path = outdir / f"entropy_histogram_{result.d_a}x{result.d_b}.png"
    plt.savefig(out_path, dpi=180)
    plt.close()
    return out_path


def plot_tail(result: SystemResult, outdir: Path) -> Path:
    deficits = math.log2(result.d_a) - np.sort(result.entropy_bits)
    n = deficits.size
    ccdf = 1.0 - (np.arange(1, n + 1) / n)
    ccdf = np.maximum(ccdf, 1.0 / n)
    x_grid = np.linspace(0.0, max(float(np.max(deficits)), result.hayden_one_sided_width_bits) * 1.05, 250)
    bound = hayden_deficit_tail_bound_bits(result.d_a, result.d_b, x_grid)

    plt.figure(figsize=(8.5, 5.5))
    plt.semilogy(deficits, ccdf, marker="o", linestyle="none", markersize=3, alpha=0.5, label="empirical tail")
    plt.semilogy(x_grid, bound, linewidth=2, label="Hayden lower-tail bound")
    plt.axvline(hayden_beta_bits(result.d_a, result.d_b), linestyle="--", linewidth=1.8, label=r"$\beta$")
    plt.xlabel(r"Entropy deficit $\log_2 d_A - S_A$ (bits)")
    plt.ylabel(r"Tail probability $\Pr[\log_2 d_A - S_A > t]$")
    plt.title(f"Entropy-deficit tail for C^{result.d_a} ⊗ C^{result.d_b}")
    plt.legend(frameon=False)
    plt.tight_layout()
    out_path = outdir / f"entropy_tail_{result.d_a}x{result.d_b}.png"
    plt.savefig(out_path, dpi=180)
    plt.close()
    return out_path


def plot_concentration_summary(results: Sequence[SystemResult], outdir: Path) -> Path:
    x = np.array([r.projective_dim for r in results], dtype=float)
    partial_width = np.array([r.partial_diameter_bits for r in results], dtype=float)
    std = np.array([r.std_bits for r in results], dtype=float)
    mean_deficit = np.array([math.log2(r.d_a) - r.mean_bits for r in results], dtype=float)

    plt.figure(figsize=(8.5, 5.5))
    plt.plot(x, partial_width, marker="o", linewidth=2, label=r"shortest $(1-\kappa)$ entropy interval")
    plt.plot(x, std, marker="s", linewidth=2, label="empirical standard deviation")
    plt.plot(x, mean_deficit, marker="^", linewidth=2, label=r"mean deficit $\log_2 d_A - \mathbb{E}S_A$")
    plt.xlabel(r"Projective dimension $n = d_A d_B - 1$")
    plt.ylabel(r"Bits")
    plt.title("Empirical concentration of the entropy observable on CP^n")
    plt.legend(frameon=False)
    plt.tight_layout()
    out_path = outdir / "entropy_partial_diameter_vs_projective_dimension.png"
    plt.savefig(out_path, dpi=180)
    plt.close()
    return out_path


def plot_normalized_proxy(results: Sequence[SystemResult], outdir: Path) -> Path | None:
    good = [r for r in results if r.normalized_proxy_q99 == r.normalized_proxy_q99]
    if not good:
        return None
    x = np.array([r.projective_dim for r in good], dtype=float)
    y_max = np.array([r.normalized_proxy_max for r in good], dtype=float)
    y_q99 = np.array([r.normalized_proxy_q99 for r in good], dtype=float)

    plt.figure(figsize=(8.5, 5.5))
    plt.plot(x, y_max, marker="o", linewidth=2, label="width / sampled Lipschitz max")
    plt.plot(x, y_q99, marker="s", linewidth=2, label="width / sampled Lipschitz q99")
    plt.xlabel(r"Projective dimension $n = d_A d_B - 1$")
    plt.ylabel("Empirical normalized proxy (radians)")
    plt.title("Lipschitz-normalized entropy proxy for observable diameter")
    plt.legend(frameon=False)
    plt.tight_layout()
    out_path = outdir / "normalized_entropy_proxy_vs_projective_dimension.png"
    plt.savefig(out_path, dpi=180)
    plt.close()
    return out_path


def print_console_summary(results: Sequence[SystemResult]) -> None:
    print("dA dB  CP^n     mean(bits)  part_diam(bits)  Page(bits)  Hayden_cutoff(bits)  L_emp_q99")
    for r in results:
        lip_q99 = f"{r.empirical_lipschitz_q99:.4f}" if r.empirical_lipschitz_q99 == r.empirical_lipschitz_q99 else "nan"
        print(
            f"{r.d_a:2d} {r.d_b:2d}  {r.projective_dim:5d}  "
            f"{r.mean_bits:10.6f}  {r.partial_diameter_bits:15.6f}  "
            f"{r.page_average_bits:10.6f}  {r.hayden_cutoff_bits:20.6f}  {lip_q99}"
        )


def build_argument_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--dims",
        default="4x4,8x8,12x12,16x16,32x32,64x64,128x128",
        help="Comma-separated subsystem sizes, e.g. 4x4,8x8,8x16",
    )
    parser.add_argument("--samples", type=int, default=10**6, help="Samples per system")
    parser.add_argument("--kappa", type=float, default=1e-3, help="Observable-diameter loss parameter kappa")
    parser.add_argument(
        "--lipschitz-pairs",
        type=int,
        default=6000,
        help="Number of random state pairs used for empirical Lipschitz estimation",
    )
    parser.add_argument("--seed", type=int, default=7, help="RNG seed")
    parser.add_argument(
        "--outdir",
        type=str,
        default="cpn_entropy_output",
        help="Output directory for CSV and plots",
    )
    return parser


def main() -> None:
    parser = build_argument_parser()
    args = parser.parse_args()

    if not 0.0 < args.kappa < 1.0:
        raise ValueError("kappa must lie in (0, 1)")
    if args.samples < 10:
        raise ValueError("Use at least 10 samples per system")

    dims = parse_dims(args.dims)
    rng = np.random.default_rng(args.seed)

    outdir = Path(args.outdir)
    outdir.mkdir(parents=True, exist_ok=True)

    results: List[SystemResult] = []
    for d_a, d_b in dims:
        result, _states = simulate_system(
            d_a=d_a,
            d_b=d_b,
            num_samples=args.samples,
            kappa=args.kappa,
            rng=rng,
            lipschitz_pairs=args.lipschitz_pairs,
        )
        results.append(result)
        plot_histogram(result, outdir)

    results = sorted(results, key=lambda r: r.projective_dim)
    write_summary_csv(results, outdir / "entropy_observable_summary.csv")
    plot_concentration_summary(results, outdir)
    plot_normalized_proxy(results, outdir)
    plot_tail(results[-1], outdir)
    print_console_summary(results)
    print(f"\nWrote results to: {outdir.resolve()}")


if __name__ == "__main__":
    main()