Files
HonorThesis/chapters/chap1.tex
2026-02-01 13:41:12 -06:00

564 lines
24 KiB
TeX

% chapters/chap1.tex
\documentclass[../main.tex]{subfiles}
% If this chapter is compiled *by itself*, we must load only its own .bib
% and print its bibliography at the end of the chapter.
\ifSubfilesClassLoaded{
\addbibresource{../main.bib}
}
\usepackage{amsmath, amsfonts, amsthm}
\usepackage{fancyhdr,parskip}
\usepackage{fullpage}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% add special notation supports
\usepackage[mathscr]{euscript}
\usepackage{mathtools}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% add image package and directory
\usepackage{graphicx}
\usepackage{tikz}
\graphicspath{{../images/}}
\begin{document}
\chapter{Concentration of Measure And Quantum Entanglement}
First, we will build the mathematical model describing the behavior of quantum system and why they makes sense for physicists and meaningful for general publics.
\section{Motivation}
First, we introduce a motivation for introducing non-commutative probability theory to the study of quantum mechanics. This section is mainly based on the book~\cite{kummer1998elements}.
\subsection{Light polarization and the violation of Bell's inequality}
The light which comes through a polarizer is polarized in a certain direction. If we fix the first filter and rotate the second filter, we will observe the intensity of the light will change.
The light intensity decreases with $\alpha$ (the angle between the two filters). The light should vanish when $\alpha=\pi/2$.
However, for a system of 3 polarizing filters $F_1,F_2,F_3$, having directions $\alpha_1,\alpha_2,\alpha_3$, if we put them on the optical bench in pairs, then we will have three random variables $P_1,P_2,P_3$.
\begin{figure}[h]
\centering
\includegraphics[width=0.7\textwidth]{Filter_figure.png}
\caption{The light polarization experiment, image from \cite{kummer1998elements}}
\label{fig:Filter_figure}
\end{figure}
\begin{theorem}
\label{theorem:Bell's_3_variable_inequality}
Bell's 3 variable inequality:
For any three random variables $P_1,P_2,P_3$ in a classical probability space, we have
$$
\operatorname{Prob}(P_1=1,P_3=0)\leq \operatorname{Prob}(P_1=1,P_2=0)+\operatorname{Prob}(P_2=1,P_3=0)
$$
\end{theorem}
\begin{proof}
By the law of total probability there are only two possibility if we don't observe any light passing the filter pair $F_i,F_j$, it means the photon is either blocked by $F_i$ or $F_j$, it means
$$
\begin{aligned}
\operatorname{Prob}(P_1=1,P_3=0)&=\operatorname{Prob}(P_1=1,P_2=0,P_3=0)\\
&+\operatorname{Prob}(P_1=1,P_2=1,P_3=0)\\
&\leq\operatorname{Prob}(P_1=1,P_2=0)+\operatorname{Prob}(P_2=1,P_3=0)
\end{aligned}
$$
\end{proof}
However, according to our experimental measurement, for any pair of polarizers $F_i,F_j$, by the complement rule, we have
$$
\begin{aligned}
\operatorname{Prob}(P_i=1,P_j=0)&=\operatorname{Prob}(P_i=1)-\operatorname{Prob}(P_i=1,P_j=1)\\
&=\frac{1}{2}-\frac{1}{2}\cos^2(\alpha_i-\alpha_j)\\
&=\frac{1}{2}\sin^2(\alpha_i-\alpha_j)
\end{aligned}
$$
This leads to a contradiction if we apply the inequality to the experimental data.
$$
\frac{1}{2}\sin^2(\alpha_1-\alpha_3)\leq\frac{1}{2}\sin^2(\alpha_1-\alpha_2)+\frac{1}{2}\sin^2(\alpha_2-\alpha_3)
$$
If $\alpha_1=0,\alpha_2=\frac{\pi}{6},\alpha_3=\frac{\pi}{3}$, then
$$
\begin{aligned}
\frac{1}{2}\sin^2(-\frac{\pi}{3})&\leq\frac{1}{2}\sin^2(-\frac{\pi}{6})+\frac{1}{2}\sin^2(\frac{\pi}{6}-\frac{\pi}{3})\\
\frac{3}{8}&\leq\frac{1}{8}+\frac{1}{8}\\
\frac{3}{8}&\leq\frac{1}{4}
\end{aligned}
$$
Other revised experiments (e.g., Aspect's experiment, calcium entangled photon experiment) are also conducted and the inequality is still violated.
\subsection{The true model of light polarization}
The full description of the light polarization is given below:
State of polarization of a photon: $\psi=\alpha|0\rangle+\beta|1\rangle$, where $|0\rangle$ and $|1\rangle$ are the two orthogonal polarization states in $\mathbb{C}^2$.
Polarization filter (generalized 0,1 valued random variable): orthogonal projection $P_\alpha$ on $\mathbb{C}^2$ corresponding to the direction $\alpha$ (operator satisfies $P_\alpha^*=P_\alpha=P_\alpha^2$).
The matrix representation of $P_\alpha$ is given by
$$
P_\alpha=\begin{pmatrix}
\cos^2(\alpha) & \cos(\alpha)\sin(\alpha)\\
\cos(\alpha)\sin(\alpha) & \sin^2(\alpha)
\end{pmatrix}
$$
Probability of a photon passing through the filter $P_\alpha$ is given by $\langle P_\alpha\psi,\psi\rangle$; this is $\cos^2(\alpha)$ if we set $\psi=|0\rangle$.
Since the probability of a photon passing through the three filters is not commutative, it is impossible to discuss $\operatorname{Prob}(P_1=1,P_3=0)$ in the classical setting.
We now show how the experimentally observed probability
$$
\frac{1}{2}\sin^2(\alpha_i-\alpha_j)
$$
arises from the operator model.
Assume the incoming light is \emph{unpolarized}. It is therefore described by
the density matrix
$$
\rho=\frac{1}{2} I .
$$
Let $P_{\alpha_i}$ and $P_{\alpha_j}$ be the orthogonal projections corresponding
to the two polarization filters with angles $\alpha_i$ and $\alpha_j$.
The probability that a photon passes the first filter $P_{\alpha_i}$ is given by the Born rule:
$$
\operatorname{Prob}(P_i=1)
=\operatorname{tr}(\rho P_{\alpha_i})
=\frac{1}{2} \operatorname{tr}(P_{\alpha_i})
=\frac{1}{2}
$$
If the photon passes the first filter, the post-measurement state is given by the L\"uders rule:
$$
\rho \longmapsto
\rho_i
=\frac{P_{\alpha_i}\rho P_{\alpha_i}}{\operatorname{tr}(\rho P_{\alpha_i})}
= P_{\alpha_i}.
$$
The probability that the photon then passes the second filter is
$$
\operatorname{Prob}(P_j=1 \mid P_i=1)
=\operatorname{tr}(P_{\alpha_i} P_{\alpha_j})
=\cos^2(\alpha_i-\alpha_j).
$$
Hence, the probability that the photon passes $P_{\alpha_i}$ and is then blocked by $P_{\alpha_j}$ is
$$
\begin{aligned}
\operatorname{Prob}(P_i=1, P_j=0)
&= \operatorname{Prob}(P_i=1)
- \operatorname{Prob}(P_i=1, P_j=1) \\
&= \frac12 - \frac12 \cos^2(\alpha_i-\alpha_j) \\
&= \frac12 \sin^2(\alpha_i-\alpha_j).
\end{aligned}
$$
This agrees with the experimentally observed transmission probabilities, but it should be emphasized that this quantity corresponds to a \emph{sequential measurement} rather than a joint probability in the classical sense.
\section{Concentration of measure phenomenon}
\begin{defn}
$\eta$-Lipschitz function
Let $(X,\operatorname{dist}_X)$ and $(Y,\operatorname{dist}_Y)$ be two metric spaces. A function $f:X\to Y$ is said to be $\eta$-Lipschitz if there exists a constant $L\in \mathbb{R}$ such that
\[
\operatorname{dist}_Y(f(x),f(y))\leq L\operatorname{dist}_X(x,y)
\]
for all $x,y\in X$. And $\eta=\|f\|_{\operatorname{Lip}}=\inf_{L\in \mathbb{R}}L$.
\end{defn}
That basically means that the function $f$ should not change the distance between any two pairs of points in $X$ by more than a factor of $L$.
This is a stronger condition than continuity, every Lipschitz function is continuous, but not every continuous function is Lipschitz.
\begin{lemma}
\label{lemma:isoperimetric_inequality_on_sphere}
Isoperimetric inequality on the sphere:
Let $\sigma_n(A)$ denote the normalized area of $A$ on the $n$-dimensional sphere $S^n$. That is, $\sigma_n(A)\coloneqq\frac{\operatorname{Area}(A)}{\operatorname{Area}(S^n)}$.
Let $\epsilon>0$. Then for any subset $A\subset S^n$, given the area $\sigma_n(A)$, the spherical caps minimize the volume of the $\epsilon$-neighborhood of $A$.
Suppose $\sigma^n(\cdot)$ is the normalized volume measure on the sphere $S^n(1)$, then for any closed subset $\Omega\subset S^n(1)$, we take a metric ball $B_\Omega$ of $S^n(1)$ with $\sigma^n(B_\Omega)=\sigma^n(\Omega)$. Then we have
\[
\sigma^n(U_r(\Omega))\geq \sigma^n(U_r(B_\Omega))
\]
where $U_r(A)=\{x\in X:d(x,A)< r\}$
\end{lemma}
Intuitively, the lemma means that the spherical caps are the most efficient way to cover the sphere.
Here, the efficiency is measured by the epsilon-neighborhood of the boundary of the spherical cap.
To prove the lemma, we need to have a good understanding of the Riemannian geometry of the sphere. For now, let's just take the lemma for granted.
\subsection{Levy's concentration theorem}
\begin{theorem}
\label{theorem:Levy's_concentration_theorem}
Levy's concentration theorem:
An arbitrary 1-Lipschitz function $f:S^n\to \mathbb{R}$ concentrates near a single value $a_0\in \mathbb{R}$ as strongly as the distance function does.
That is,
\[
\mu\{x\in S^n: |f(x)-a_0|\geq\epsilon\} < \kappa_n(\epsilon)\leq 2\exp\left(-\frac{(n-1)\epsilon^2}{2}\right)
\]
where
\[
\kappa_n(\epsilon)=\frac{\int_\epsilon^{\frac{\pi}{2}}\cos^{n-1}(t)dt}{\int_0^{\frac{\pi}{2}}\cos^{n-1}(t)dt}
\]
$a_0$ is the \textbf{Levy mean} of function $f$, that is, the level set $f^{-1}:\mathbb{R}\to S^n$ divides the sphere into equal halves, characterized by the following equality:
\[
\mu(f^{-1}(-\infty,a_0])\geq \frac{1}{2} \text{ and } \mu(f^{-1}[a_0,\infty))\geq \frac{1}{2}
\]
\end{theorem}
We will prove the theorem via the Maxwell-Boltzmann distribution law.~\cite{shioya2014metricmeasuregeometry}
\begin{defn}
\label{defn:Gaussian_measure}
Gaussian measure:
We denote the Gaussian measure on $\mathbb{R}^k$ as $\gamma^k$.
$$
d\gamma^k(x)\coloneqq\frac{1}{\sqrt{2\pi}^k}\exp(-\frac{1}{2}\|x\|^2)dx
$$
$x\in \mathbb{R}^k$, $\|x\|^2=\sum_{i=1}^k x_i^2$ is the Euclidean norm, and $dx$ is the Lebesgue measure on $\mathbb{R}^k$.
\end{defn}
Basically, you can consider the Gaussian measure as the normalized Lebesgue measure on $\mathbb{R}^k$ with standard deviation $1$.
It also has another name, the Projective limit theorem.~\cite{romanvershyni}
If $X\sim \operatorname{Unif}(S^n(\sqrt{n}))$, then for any fixed unit vector $x$ we have $\langle X,x\rangle\to N(0,1)$ in distribution as $n\to \infty$.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{../images/maxwell.png}
\caption{Maxwell-Boltzmann distribution law, image from \cite{romanvershyni}}
\label{fig:Maxwell-Boltzmann_distribution_law}
\end{figure}
\begin{lemma}
\label{lemma:Maxwell-Boltzmann_distribution_law}
Maxwell-Boltzmann distribution law:
For any natural number $k$,
\[
\frac{d(\pi_{n,k})_*\sigma^n(x)}{dx}\to \frac{d\gamma^k(x)}{dx}
\]
where $(\pi_{n,k})_*\sigma^n$ is the push-forward measure of $\sigma^n$ by $\pi_{n,k}$.
In other words,
\[
(\pi_{n,k})_*\sigma^n\to \gamma^k\text{ weakly as }n\to \infty
\]
\end{lemma}
\begin{proof}
We denote the $n$-dimensional volume measure on $\mathbb{R}^k$ as $\operatorname{vol}_k$.
Observe that $\pi_{n,k}^{-1}(x),x\in \mathbb{R}^k$ is isometric to $S^{n-k}(\sqrt{n-\|x\|^2})$, that is, for any $x\in \mathbb{R}^k$, $\pi_{n,k}^{-1}(x)$ is a sphere with radius $\sqrt{n-\|x\|^2}$ (by the definition of $\pi_{n,k}$).
So,
\[
\begin{aligned}
\frac{d(\pi_{n,k})_*\sigma^n(x)}{dx}&=\frac{\operatorname{vol}_{n-k}(\pi_{n,k}^{-1}(x))}{\operatorname{vol}_k(S^n(\sqrt{n}))}\\
&=\frac{(n-\|x\|^2)^{\frac{n-k}{2}}}{\int_{\|x\|\leq \sqrt{n}}(n-\|x\|^2)^{\frac{n-k}{2}}dx}\\
\end{aligned}
\]
as $n\to \infty$.
Note that $\lim_{n\to \infty}(1-\frac{a}{n})^n=e^{-a}$ for any $a>0$.
$(n-\|x\|^2)^{\frac{n-k}{2}}=\left(n(1-\frac{\|x\|^2}{n})\right)^{\frac{n-k}{2}}\to n^{\frac{n-k}{2}}\exp(-\frac{\|x\|^2}{2})$
So
\[
\begin{aligned}
\frac{(n-\|x\|^2)^{\frac{n-k}{2}}}{\int_{\|x\|\leq \sqrt{n}}(n-\|x\|^2)^{\frac{n-k}{2}}dx}&=\frac{e^{-\frac{\|x\|^2}{2}}}{\int_{x\in \mathbb{R}^k}e^{-\frac{\|x\|^2}{2}}dx}\\
&=\frac{1}{(2\pi)^{\frac{k}{2}}}e^{-\frac{\|x\|^2}{2}}\\
&=\frac{d\gamma^k(x)}{dx}
\end{aligned}
\]
\end{proof}
Now we can prove Levy's concentration theorem, the proof is from~\cite{shioya2014metricmeasuregeometry}.
\begin{proof}
Let $f_n:S^n(\sqrt{n})\to \mathbb{R}$, $n=1,2,\ldots$, be 1-Lipschitz functions.
Let $x$ and $x'$ be two given real numbers and $\gamma^1(-\infty,x]=\overline{\sigma}_\infty[-\infty,x']$, suppose $\sigma_\infty\{x'\}=0$, where $\{\sigma_i\}$ is a sequence of Borel probability measures on $\mathbb{R}$.
We want to show that, for all non-negative real numbers $\epsilon_1$ and $\epsilon_2$.
$$
\sigma_\infty[x'-\epsilon_1,x'+\epsilon_2]\geq \gamma^1[x-\epsilon_1,x+\epsilon_2]
$$
Consider the two spherical cap $\Omega_+\coloneq \{f_{n_i}\geq x'\}$ and $\Omega_-\coloneq \{f_{n_i}\leq x\}$. Note that $\Omega_+\cup \Omega_-=S^{n_i}(\sqrt{n_i})$.
It is sufficient to show that,
$$
U_{\epsilon_1}(\Omega_+)\cup U_{\epsilon_2}(\Omega_-)\subset \{x'-\epsilon_1\leq f_{n_i}\leq x'+\epsilon_2\}
$$
By 1-Lipschitz continuity of $f_{n_i}$, we have for all $\zeta\in U_{\epsilon_1}(\Omega_+)$, there is a point $\xi\in \Omega_+$ such that $d(\zeta,\xi)\leq \epsilon_1$. So $U_{\epsilon_1}(\Omega_+)\subset \{f_{n_i}\geq x'-\epsilon_1\}$. With the same argument, we have $U_{\epsilon_2}(\Omega_-)\subset \{f_{n_i}\leq x+\epsilon_2\}$.
So the push-forward measure of $(f_{n_i})_*\sigma^{n_i}$ of $[x'-\epsilon_1,x'+\epsilon_2]$ is
\[
\begin{aligned}
(f_{n_i})_*\sigma^{n_i}[x'-\epsilon_1,x'+\epsilon_2]&=\sigma^{n_i}(x'-\epsilon_1\leq f_{n_i}\leq x'+\epsilon_2)\\
&\geq \sigma^{n_i}(U_{\epsilon_1}(\Omega_+)\cap U_{\epsilon_2}(\Omega_-))\\
&=\sigma^{n_i}(U_{\epsilon_1}(\Omega_+))+\sigma^{n_i}(U_{\epsilon_2}(\Omega_-))-1\\
\end{aligned}
\]
By the lemma~\ref{lemma:isoperimetric_inequality_on_sphere}, we have
\[
\sigma^{n_i}(U_{\epsilon_1}(\Omega_+))\geq \sigma^{n_i}(U_{\epsilon_1}(B_{\Omega_+}))\quad \text{and} \quad \sigma^{n_i}(U_{\epsilon_2}(\Omega_-))\geq \sigma^{n_i}(U_{\epsilon_2}(B_{\Omega_-}))
\]
By the lemma~\ref{lemma:Maxwell-Boltzmann_distribution_law}, we have
\[
\sigma^{n_i}(U_{\epsilon_1}(\Omega_+))+\sigma^{n_i}(U_{\epsilon_2}(\Omega_-))\to \gamma^1[x'-\epsilon_1,x'+\epsilon_2]+\gamma^1[x-\epsilon_1,x+\epsilon_2]
\]
Therefore,
$$
\begin{aligned}
\sigma_\infty[x'-\epsilon_1,x'+\epsilon_2]&\geq \liminf_{i\to \infty}(f_{n_i})_*\sigma^{n_i}[x'-\epsilon_1,x'+\epsilon_2]\\
&\geq \gamma^1[x'-\epsilon_1,\infty)\cap \gamma^1(-\infty,x+\epsilon_2]-1\\
&=\gamma^1[x-\epsilon_1,x+\epsilon_2]
\end{aligned}
$$
\end{proof}
The full proof of Levy's concentration theorem requires more digestion for cases where $\overline{\sigma}_\infty\neq \delta_{\pm\infty}$ but I don't have enough time to do so. This section may be filled in the next semester.
\section{The application of the concentration of measure phenomenon in non-commutative probability theory}
In quantum communication, we can pass classical bits by sending quantum states. However, by the indistinguishability (Proposition~\ref{prop:indistinguishability}) of quantum states, we cannot send an infinite number of classical bits over a single qubit. There exists a bound for zero-error classical communication rate over a quantum channel.
\begin{theorem}
\label{theorem:Holevo_bound}
Holevo bound:
The maximal amount of classical information that can be transmitted by a quantum system is given by the Holevo bound. $\log_2(d)$ is the maximum amount of classical information that can be transmitted by a quantum system with $d$ levels (that is, basically, the number of qubits).
\end{theorem}
The proof of the Holevo bound can be found in~\cite{Nielsen_Chuang_2010}. In current state of the project, this theorem is not heavily used so we will not make annotated proof here.
\subsection{Quantum communication}
To surpass the Holevo bound, we need to use the entanglement of quantum states.
\begin{defn}
\label{defn:Bell_state}
Bell state:
The Bell states are the following four states:
\[
|\Phi^+\rangle=\frac{1}{\sqrt{2}}(|00\rangle+|11\rangle),\quad |\Phi^-\rangle=\frac{1}{\sqrt{2}}(|00\rangle-|11\rangle)
\]
\[
|\Psi^+\rangle=\frac{1}{\sqrt{2}}(|01\rangle+|10\rangle),\quad |\Psi^-\rangle=\frac{1}{\sqrt{2}}(|01\rangle-|10\rangle)
\]
These are a basis of the 2-qubit Hilbert space.
\end{defn}
\subsection{Superdense coding and entanglement}
The description of the superdense coding can be found in~\cite{gupta2015functionalanalysisquantuminformation} and~\cite{Hayden}.
Suppose $A$ and $B$ share a Bell state (or other maximally entangled state) $|\Phi^+\rangle=\frac{1}{\sqrt{2}}(|00\rangle+|11\rangle)$, where $A$ holds the first part and $B$ holds the second part.
$A$ wishes to send 2 \textbf{classical bits} to $B$.
$A$ performs one of four Pauli unitaries (some fancy quantum gates named X, Y, Z, I) on the combined state of entangled qubits $\otimes$ one qubit. Then $A$ sends the resulting one qubit to $B$.
This operation extends the initial one entangled qubit to a system of one of four orthogonal Bell states.
$B$ performs a measurement on the combined state of the one qubit and the entangled qubits he holds.
$B$ decodes the result and obtains the 2 classical bits sent by $A$.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{superdense_coding.png}
\caption{Superdense coding, image from \cite{Hayden}}
\label{fig:superdense_coding}
\end{figure}
Note that superdense coding is a way to send 2 classical bits of information by sending 1 qubit with 1 entangled qubit. \textbf{The role of the entangled qubit} is to help them to distinguish the 4 possible states of the total 3 qubits system where 2 of them (the pair of entangled qubits) are mathematically the same.
Additionally, no information can be gained by measuring a pair of entangled qubits. To send information from $A$ to $B$, we need to physically send the qubits from $A$ to $B$. That means, we cannot send information faster than the speed of light.
% TODO: FILL the description of the superdense coding here.
\subsection{Hayden's concentration of measure phenomenon}
The application of the concentration of measure phenomenon in the superdense coding can be realized in random sampling the entangled qubits~\cite{Hayden}:
It is a theorem connecting the following mathematical structure:
\begin{figure}[h]
\centering
\begin{tikzpicture}[node distance=30mm, thick,
main/.style={draw, draw=white},
towards/.style={->},
towards_imp/.style={->,red},
mutual/.style={<->}
]
% define nodes
\node[main] (cp) {$\mathbb{C}P^{d_A d_B-1}$};
\node[main] (pa) [left of=cp] {$\mathcal{P}(A\otimes B)$};
\node[main] (sa) [below of=pa] {$S_A$};
\node[main] (rng) [right of=sa] {$[0,\infty)\subset \mathbb{R}$};
% draw edges
\draw[mutual] (cp) -- (pa);
\draw[towards] (pa) -- node[left] {$\operatorname{Tr}_B$} (sa);
\draw[towards_imp] (pa) -- node[above right] {$f$} (rng);
\draw[towards] (sa) -- node[above] {$H(\psi_A)$} (rng);
\end{tikzpicture}
\caption{Mathematical structure for Hayden's concentration of measure phenomenon}
\label{fig:Hayden_concentration_of_measure_phenomenon}
\end{figure}
\begin{itemize}
\item The red arrow is the concentration of measure effect. $f=H(\operatorname{Tr}_B(\psi))$.
\item $S_A$ denotes the mixed states on $A$.
\end{itemize}
To prove the concentration of measure phenomenon, we need to analyze the following elements involved in figure~\ref{fig:Hayden_concentration_of_measure_phenomenon}:
First, we need to define what is a random state in a bipartite system. In fact, for pure states, there is a unique uniform distribution under Haar measure that is unitarily invariant.
$U(n)$ is the group of all $n\times n$ \textbf{unitary matrices} over $\mathbb{C}$,
$$
U(n)=\{A\in \mathbb{C}^{n\times n}: A^*A=AA^*=I_n\}
$$
The uniqueness of such measurement came from the lemma below~\cite{Elizabeth_book}
\begin{lemma}
Let $(U(n), \| \cdot \|, \mu)$ be a metric measure space where $\| \cdot \|$ is the Hilbert-Schmidt norm and $\mu$ is the measure function.
The Haar measure on $U(n)$ is the unique probability measure that is invariant under the action of $U(n)$ on itself.
That is, fixing $B\in U(n)$, $\forall A\in U(n)$, $\mu(A\cdot B)=\mu(B\cdot A)=\mu(B)$.
The Haar measure is the unique probability measure that is invariant under the action of $U(n)$ on itself.
\end{lemma}
The existence and uniqueness of the Haar measure is a theorem in compact lie group theory. For this research topic, we will not prove it.
A random pure state $\varphi$ is any random variable distributed according to the unitarily invariant probability measure on the pure states $\mathcal{P}(A)$ of the system $A$, denoted by $\varphi\in_R\mathcal{P}(A)$.
It is trivial that for the space of pure state, we can easily apply the Haar measure as the unitarily invariant probability measure since the space of pure state is $S^n$ for some $n$. However, for the case of mixed states, that is a bit complicated and we need to use partial tracing to defined the rank-$s$ random states.
\begin{defn}
Rank-$s$ random state.
For a system $A$ and an integer $s\geq 1$, consider the distribution onn the mixed states $\mathcal{S}(A)$ of A induced by the partial trace over the second factor form the uniform distribution on pure states of $A\otimes\mathbb{C}^s$. Any random variable $\rho$ distributed as such will be called a rank-$s$ random states; denoted as $\rho\in_R \mathcal{S}_s(A)$. And $\mathcal{P}(A)=\mathcal{S}_1(A)$.
\end{defn}
Due to time constrains of the projects, the following lemma is demonstrated but not investigated thoroughly through the research:
\begin{lemma}
\label{pages_lemma}
Page's lemma for expected entropy of mixed states
Choose a random pure state $\sigma=|\psi\rangle\langle\psi|$ from $A'\otimes A$.
The expected value of the entropy of entanglement is known and satisfies a concentration inequality known as Page's formula~\cite{Pages_conjecture,Pages_conjecture_simple_proof,Bengtsson_Życzkowski_2017}[15.72]. The detailed proof is not fully explored in this project and is intended to be done in the next semester.
\[
\mathbb{E}[H(\psi_A)] \geq \log_2(d_A)-\frac{1}{2\ln(2)}\frac{d_A}{d_B}
\]
\end{lemma}
It basically provides a lower bound for the expected entropy of entanglement. Experimentally, we can have the following result (see Figure~\ref{fig:entropy_vs_dim}):
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{entropy_vs_dim.png}
\caption{Entropy vs dimension}
\label{fig:entropy_vs_dim}
\end{figure}
Then we have bound for Lipschitz constant $\eta$ of the map $H(\varphi_A)$
\begin{lemma}
The Lipschitz constant $\eta$ of $S(\varphi_A)$ is upper bounded by $\sqrt{8}\log_2(d_A)$ for $d_A\geq 3$.
\end{lemma}
From Levy's lemma, we have
If we define $\beta=\frac{1}{\ln(2)}\frac{d_A}{d_B}$, then we have
\[
\operatorname{Pr}[H(\psi_A) < \log_2(d_A)-\alpha-\beta] \leq \exp\left(-\frac{1}{8\pi^2\ln(2)}\frac{(d_Ad_B-1)\alpha^2}{(\log_2(d_A))^2}\right)
\]
where $d_B\geq d_A\geq 3$~\cite{Hayden_2006}.
Experimentally, we can have the following result:
As the dimension of the Hilbert space increases, the chance of getting an almost maximally entangled state increases (see Figure~\ref{fig:entropy_vs_dA}).
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{entropy_vs_dA.png}
\caption{Entropy vs $d_A$}
\label{fig:entropy_vs_dA}
\end{figure}
% When compiled standalone, print this chapter's references at the end.
\ifSubfilesClassLoaded{
\printbibliography[title={References for Chapter 1}]
}
\end{document}