HonorThesis/chapters/chap1.tex

% chapters/chap1.tex
\documentclass[../main.tex]{subfiles}

% If this chapter is compiled *by itself*, we must load only its own .bib
% and print its bibliography at the end of the chapter.
\ifSubfilesClassLoaded{
  \addbibresource{../main.bib}
}

\usepackage{amsmath, amsfonts, amsthm}
\usepackage{fancyhdr,parskip}
\usepackage{fullpage}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% add special notation supports
\usepackage[mathscr]{euscript}
\usepackage{mathtools}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% add image package and directory
\usepackage{graphicx}
\usepackage{tikz}
\graphicspath{{../images/}}


\begin{document}


\chapter{Concentration of Measure And Quantum Entanglement}


First, we will build the mathematical model describing the behavior of quantum system and why they makes sense for physicists and meaningful for general publics.

\section{Motivation}

First, we introduce a motivation for introducing non-commutative probability theory to the study of quantum mechanics. This section is mainly based on the book~\cite{kummer1998elements}.

\subsection{Light polarization and the violation of Bell's inequality}

The light which comes through a polarizer is polarized in a certain direction. If we fix the first filter and rotate the second filter, we will observe the intensity of the light will change.

The light intensity decreases with $\alpha$ (the angle between the two filters). The light should vanish when $\alpha=\pi/2$.

However, for a system of 3 polarizing filters $F_1,F_2,F_3$, having directions $\alpha_1,\alpha_2,\alpha_3$, if we put them on the optical bench in pairs, then we will have three random variables $P_1,P_2,P_3$.

\begin{figure}[h]
	\centering
	\includegraphics[width=0.7\textwidth]{Filter_figure.png}
	\caption{The light polarization experiment, image from \cite{kummer1998elements}}
	\label{fig:Filter_figure}
\end{figure}

\begin{theorem}
	\label{theorem:Bell's_3_variable_inequality}
	Bell's 3 variable inequality:

	For any three random variables $P_1,P_2,P_3$ in a classical probability space, we have

	$$
	\operatorname{Prob}(P_1=1,P_3=0)\leq \operatorname{Prob}(P_1=1,P_2=0)+\operatorname{Prob}(P_2=1,P_3=0)
	$$
\end{theorem}

\begin{proof}
	By the law of total probability there are only two possibility if we don't observe any light passing the filter pair $F_i,F_j$, it means the photon is either blocked by $F_i$ or $F_j$, it means

    $$
    \begin{aligned}
    \operatorname{Prob}(P_1=1,P_3=0)&=\operatorname{Prob}(P_1=1,P_2=0,P_3=0)\\
    &+\operatorname{Prob}(P_1=1,P_2=1,P_3=0)\\
    &\leq\operatorname{Prob}(P_1=1,P_2=0)+\operatorname{Prob}(P_2=1,P_3=0)
    \end{aligned}
    $$
\end{proof}

However, according to our experimental measurement, for any pair of polarizers $F_i,F_j$, by the complement rule, we have
$$
\begin{aligned}
\operatorname{Prob}(P_i=1,P_j=0)&=\operatorname{Prob}(P_i=1)-\operatorname{Prob}(P_i=1,P_j=1)\\
&=\frac{1}{2}-\frac{1}{2}\cos^2(\alpha_i-\alpha_j)\\
&=\frac{1}{2}\sin^2(\alpha_i-\alpha_j)
\end{aligned}
$$

This leads to a contradiction if we apply the inequality to the experimental data.

$$
\frac{1}{2}\sin^2(\alpha_1-\alpha_3)\leq\frac{1}{2}\sin^2(\alpha_1-\alpha_2)+\frac{1}{2}\sin^2(\alpha_2-\alpha_3)
$$

If $\alpha_1=0,\alpha_2=\frac{\pi}{6},\alpha_3=\frac{\pi}{3}$, then

$$
\begin{aligned}
\frac{1}{2}\sin^2(-\frac{\pi}{3})&\leq\frac{1}{2}\sin^2(-\frac{\pi}{6})+\frac{1}{2}\sin^2(\frac{\pi}{6}-\frac{\pi}{3})\\
\frac{3}{8}&\leq\frac{1}{8}+\frac{1}{8}\\
\frac{3}{8}&\leq\frac{1}{4}
\end{aligned}
$$

Other revised experiments (e.g., Aspect's experiment, calcium entangled photon experiment) are also conducted and the inequality is still violated.

\subsection{The true model of light polarization}

The full description of the light polarization is given below:

State of polarization of a photon: $\psi=\alpha|0\rangle+\beta|1\rangle$, where $|0\rangle$ and $|1\rangle$ are the two orthogonal polarization states in $\mathbb{C}^2$.

Polarization filter (generalized 0,1 valued random variable): orthogonal projection $P_\alpha$ on $\mathbb{C}^2$ corresponding to the direction $\alpha$ (operator satisfies $P_\alpha^*=P_\alpha=P_\alpha^2$).

The matrix representation of $P_\alpha$ is given by

$$
P_\alpha=\begin{pmatrix}
\cos^2(\alpha) & \cos(\alpha)\sin(\alpha)\\
\cos(\alpha)\sin(\alpha) & \sin^2(\alpha)
\end{pmatrix}
$$

Probability of a photon passing through the filter $P_\alpha$ is given by $\langle P_\alpha\psi,\psi\rangle$; this is $\cos^2(\alpha)$ if we set $\psi=|0\rangle$.

Since the probability of a photon passing through the three filters is not commutative, it is impossible to discuss $\operatorname{Prob}(P_1=1,P_3=0)$ in the classical setting.

We now show how the experimentally observed probability
$$
\frac{1}{2}\sin^2(\alpha_i-\alpha_j)
$$
arises from the operator model.

Assume the incoming light is \emph{unpolarized}. It is therefore described by
the density matrix
$$
\rho=\frac{1}{2} I .
$$

Let $P_{\alpha_i}$ and $P_{\alpha_j}$ be the orthogonal projections corresponding
to the two polarization filters with angles $\alpha_i$ and $\alpha_j$.

The probability that a photon passes the first filter $P_{\alpha_i}$ is given by the Born rule:

$$
\operatorname{Prob}(P_i=1)
=\operatorname{tr}(\rho P_{\alpha_i})
=\frac{1}{2} \operatorname{tr}(P_{\alpha_i})
=\frac{1}{2}
$$

If the photon passes the first filter, the post-measurement state is given by the L\"uders rule:

$$
\rho \longmapsto
\rho_i
=\frac{P_{\alpha_i}\rho P_{\alpha_i}}{\operatorname{tr}(\rho P_{\alpha_i})}
= P_{\alpha_i}.
$$

The probability that the photon then passes the second filter is

$$
\operatorname{Prob}(P_j=1 \mid P_i=1)
=\operatorname{tr}(P_{\alpha_i} P_{\alpha_j})
=\cos^2(\alpha_i-\alpha_j).
$$

Hence, the probability that the photon passes $P_{\alpha_i}$ and is then blocked by $P_{\alpha_j}$ is

$$
\begin{aligned}
\operatorname{Prob}(P_i=1, P_j=0)
&= \operatorname{Prob}(P_i=1)
   - \operatorname{Prob}(P_i=1, P_j=1) \\
&= \frac12 - \frac12 \cos^2(\alpha_i-\alpha_j) \\
&= \frac12 \sin^2(\alpha_i-\alpha_j).
\end{aligned}
$$

This agrees with the experimentally observed transmission probabilities, but it should be emphasized that this quantity corresponds to a \emph{sequential measurement} rather than a joint probability in the classical sense.

\section{Concentration of measure phenomenon}

\begin{defn}
	$\eta$-Lipschitz function

    Let $(X,\operatorname{dist}_X)$ and $(Y,\operatorname{dist}_Y)$ be two metric spaces. A function $f:X\to Y$ is said to be $\eta$-Lipschitz if there exists a constant $L\in \mathbb{R}$ such that
    \[
    \operatorname{dist}_Y(f(x),f(y))\leq L\operatorname{dist}_X(x,y)
    \]
    for all $x,y\in X$. And $\eta=\|f\|_{\operatorname{Lip}}=\inf_{L\in \mathbb{R}}L$.
\end{defn}

That basically means that the function $f$ should not change the distance between any two pairs of points in $X$ by more than a factor of $L$.

This is a stronger condition than continuity, every Lipschitz function is continuous, but not every continuous function is Lipschitz.

\begin{lemma}
	\label{lemma:isoperimetric_inequality_on_sphere}
	Isoperimetric inequality on the sphere:

    Let $\sigma_n(A)$ denote the normalized area of $A$ on the $n$-dimensional sphere $S^n$. That is, $\sigma_n(A)\coloneqq\frac{\operatorname{Area}(A)}{\operatorname{Area}(S^n)}$.

    Let $\epsilon>0$. Then for any subset $A\subset S^n$, given the area $\sigma_n(A)$, the spherical caps minimize the volume of the $\epsilon$-neighborhood of $A$.

    Suppose $\sigma^n(\cdot)$ is the normalized volume measure on the sphere $S^n(1)$, then for any closed subset $\Omega\subset S^n(1)$, we take a metric ball $B_\Omega$ of $S^n(1)$ with $\sigma^n(B_\Omega)=\sigma^n(\Omega)$. Then we have

    \[
    \sigma^n(U_r(\Omega))\geq \sigma^n(U_r(B_\Omega))
    \]

    where $U_r(A)=\{x\in X:d(x,A)< r\}$
\end{lemma}

Intuitively, the lemma means that the spherical caps are the most efficient way to cover the sphere.

Here, the efficiency is measured by the epsilon-neighborhood of the boundary of the spherical cap.

To prove the lemma, we need to have a good understanding of the Riemannian geometry of the sphere. For now, let's just take the lemma for granted.

\subsection{Levy's concentration theorem}

\begin{theorem}
	\label{theorem:Levy's_concentration_theorem}
	Levy's concentration theorem:

    An arbitrary 1-Lipschitz function $f:S^n\to \mathbb{R}$ concentrates near a single value $a_0\in \mathbb{R}$ as strongly as the distance function does.

    That is,
    \[
    \mu\{x\in S^n: |f(x)-a_0|\geq\epsilon\} < \kappa_n(\epsilon)\leq 2\exp\left(-\frac{(n-1)\epsilon^2}{2}\right)
    \]
    where
    \[
    \kappa_n(\epsilon)=\frac{\int_\epsilon^{\frac{\pi}{2}}\cos^{n-1}(t)dt}{\int_0^{\frac{\pi}{2}}\cos^{n-1}(t)dt}
    \]
    $a_0$ is the \textbf{Levy mean} of function $f$, that is, the level set $f^{-1}:\mathbb{R}\to S^n$ divides the sphere into equal halves, characterized by the following equality:
    \[
    \mu(f^{-1}(-\infty,a_0])\geq \frac{1}{2} \text{ and } \mu(f^{-1}[a_0,\infty))\geq \frac{1}{2}
    \]
\end{theorem}

We will prove the theorem via the Maxwell-Boltzmann distribution law.~\cite{shioya2014metricmeasuregeometry}

\begin{defn}
	\label{defn:Gaussian_measure}
	Gaussian measure:

    We denote the Gaussian measure on $\mathbb{R}^k$ as $\gamma^k$.

    $$
    d\gamma^k(x)\coloneqq\frac{1}{\sqrt{2\pi}^k}\exp(-\frac{1}{2}\|x\|^2)dx
    $$

    $x\in \mathbb{R}^k$, $\|x\|^2=\sum_{i=1}^k x_i^2$ is the Euclidean norm, and $dx$ is the Lebesgue measure on $\mathbb{R}^k$.

\end{defn}

Basically, you can consider the Gaussian measure as the normalized Lebesgue measure on $\mathbb{R}^k$ with standard deviation $1$.

It also has another name, the Projective limit theorem.~\cite{romanvershyni}

If $X\sim \operatorname{Unif}(S^n(\sqrt{n}))$, then for any fixed unit vector $x$ we have $\langle X,x\rangle\to N(0,1)$ in distribution as $n\to \infty$.

\begin{figure}[h]
    \centering
    \includegraphics[width=0.8\textwidth]{../images/maxwell.png}
    \caption{Maxwell-Boltzmann distribution law, image from \cite{romanvershyni}}
    \label{fig:Maxwell-Boltzmann_distribution_law}
\end{figure}

\begin{lemma}
	\label{lemma:Maxwell-Boltzmann_distribution_law}
    Maxwell-Boltzmann distribution law:

    For any natural number $k$,
    \[
    \frac{d(\pi_{n,k})_*\sigma^n(x)}{dx}\to \frac{d\gamma^k(x)}{dx}
    \]
    where $(\pi_{n,k})_*\sigma^n$ is the push-forward measure of $\sigma^n$ by $\pi_{n,k}$.

    In other words,
    \[
    (\pi_{n,k})_*\sigma^n\to \gamma^k\text{ weakly as }n\to \infty
    \]
\end{lemma}

\begin{proof}
    We denote the $n$-dimensional volume measure on $\mathbb{R}^k$ as $\operatorname{vol}_k$.

    Observe that $\pi_{n,k}^{-1}(x),x\in \mathbb{R}^k$ is isometric to $S^{n-k}(\sqrt{n-\|x\|^2})$, that is, for any $x\in \mathbb{R}^k$, $\pi_{n,k}^{-1}(x)$ is a sphere with radius $\sqrt{n-\|x\|^2}$ (by the definition of $\pi_{n,k}$).

    So,
    \[
    \begin{aligned}
    \frac{d(\pi_{n,k})_*\sigma^n(x)}{dx}&=\frac{\operatorname{vol}_{n-k}(\pi_{n,k}^{-1}(x))}{\operatorname{vol}_k(S^n(\sqrt{n}))}\\
    &=\frac{(n-\|x\|^2)^{\frac{n-k}{2}}}{\int_{\|x\|\leq \sqrt{n}}(n-\|x\|^2)^{\frac{n-k}{2}}dx}\\
    \end{aligned}
    \]
    as $n\to \infty$.

    Note that $\lim_{n\to \infty}(1-\frac{a}{n})^n=e^{-a}$ for any $a>0$.

    $(n-\|x\|^2)^{\frac{n-k}{2}}=\left(n(1-\frac{\|x\|^2}{n})\right)^{\frac{n-k}{2}}\to n^{\frac{n-k}{2}}\exp(-\frac{\|x\|^2}{2})$

    So
    \[
    \begin{aligned}
    \frac{(n-\|x\|^2)^{\frac{n-k}{2}}}{\int_{\|x\|\leq \sqrt{n}}(n-\|x\|^2)^{\frac{n-k}{2}}dx}&=\frac{e^{-\frac{\|x\|^2}{2}}}{\int_{x\in \mathbb{R}^k}e^{-\frac{\|x\|^2}{2}}dx}\\
    &=\frac{1}{(2\pi)^{\frac{k}{2}}}e^{-\frac{\|x\|^2}{2}}\\
    &=\frac{d\gamma^k(x)}{dx}
    \end{aligned}
    \]
\end{proof}

Now we can prove Levy's concentration theorem, the proof is from~\cite{shioya2014metricmeasuregeometry}.

\begin{proof}
    Let $f_n:S^n(\sqrt{n})\to \mathbb{R}$, $n=1,2,\ldots$, be 1-Lipschitz functions.

    Let $x$ and $x'$ be two given real numbers and $\gamma^1(-\infty,x]=\overline{\sigma}_\infty[-\infty,x']$, suppose $\sigma_\infty\{x'\}=0$, where $\{\sigma_i\}$ is a sequence of Borel probability measures on $\mathbb{R}$.

    We want to show that, for all non-negative real numbers $\epsilon_1$ and $\epsilon_2$.

    $$
    \sigma_\infty[x'-\epsilon_1,x'+\epsilon_2]\geq \gamma^1[x-\epsilon_1,x+\epsilon_2]
    $$

    Consider the two spherical cap $\Omega_+\coloneq \{f_{n_i}\geq x'\}$ and $\Omega_-\coloneq \{f_{n_i}\leq x\}$. Note that $\Omega_+\cup \Omega_-=S^{n_i}(\sqrt{n_i})$.

    It is sufficient to show that,

    $$
    U_{\epsilon_1}(\Omega_+)\cup U_{\epsilon_2}(\Omega_-)\subset \{x'-\epsilon_1\leq f_{n_i}\leq x'+\epsilon_2\}
    $$

    By 1-Lipschitz continuity of $f_{n_i}$, we have for all $\zeta\in U_{\epsilon_1}(\Omega_+)$, there is a point $\xi\in \Omega_+$ such that $d(\zeta,\xi)\leq \epsilon_1$. So $U_{\epsilon_1}(\Omega_+)\subset \{f_{n_i}\geq x'-\epsilon_1\}$. With the same argument, we have $U_{\epsilon_2}(\Omega_-)\subset \{f_{n_i}\leq x+\epsilon_2\}$.

    So the push-forward measure of $(f_{n_i})_*\sigma^{n_i}$ of $[x'-\epsilon_1,x'+\epsilon_2]$ is

    \[
    \begin{aligned}
    (f_{n_i})_*\sigma^{n_i}[x'-\epsilon_1,x'+\epsilon_2]&=\sigma^{n_i}(x'-\epsilon_1\leq f_{n_i}\leq x'+\epsilon_2)\\
    &\geq \sigma^{n_i}(U_{\epsilon_1}(\Omega_+)\cap U_{\epsilon_2}(\Omega_-))\\
    &=\sigma^{n_i}(U_{\epsilon_1}(\Omega_+))+\sigma^{n_i}(U_{\epsilon_2}(\Omega_-))-1\\
    \end{aligned}
    \]

    By the lemma~\ref{lemma:isoperimetric_inequality_on_sphere}, we have

    \[
    \sigma^{n_i}(U_{\epsilon_1}(\Omega_+))\geq \sigma^{n_i}(U_{\epsilon_1}(B_{\Omega_+}))\quad \text{and} \quad \sigma^{n_i}(U_{\epsilon_2}(\Omega_-))\geq \sigma^{n_i}(U_{\epsilon_2}(B_{\Omega_-}))
    \]

    By the lemma~\ref{lemma:Maxwell-Boltzmann_distribution_law}, we have

    \[
    \sigma^{n_i}(U_{\epsilon_1}(\Omega_+))+\sigma^{n_i}(U_{\epsilon_2}(\Omega_-))\to \gamma^1[x'-\epsilon_1,x'+\epsilon_2]+\gamma^1[x-\epsilon_1,x+\epsilon_2]
    \]

    Therefore,

    $$
    \begin{aligned}
    \sigma_\infty[x'-\epsilon_1,x'+\epsilon_2]&\geq \liminf_{i\to \infty}(f_{n_i})_*\sigma^{n_i}[x'-\epsilon_1,x'+\epsilon_2]\\
    &\geq \gamma^1[x'-\epsilon_1,\infty)\cap \gamma^1(-\infty,x+\epsilon_2]-1\\
    &=\gamma^1[x-\epsilon_1,x+\epsilon_2]
    \end{aligned}
    $$

\end{proof}

The full proof of Levy's concentration theorem requires more digestion for cases where $\overline{\sigma}_\infty\neq \delta_{\pm\infty}$ but I don't have enough time to do so. This section may be filled in the next semester.

\section{The application of the concentration of measure phenomenon in non-commutative probability theory}

In quantum communication, we can pass classical bits by sending quantum states. However, by the indistinguishability (Proposition~\ref{prop:indistinguishability}) of quantum states, we cannot send an infinite number of classical bits over a single qubit. There exists a bound for zero-error classical communication rate over a quantum channel.

\begin{theorem}
	\label{theorem:Holevo_bound}
	Holevo bound:

	The maximal amount of classical information that can be transmitted by a quantum system is given by the Holevo bound. $\log_2(d)$ is the maximum amount of classical information that can be transmitted by a quantum system with $d$ levels (that is, basically, the number of qubits).
\end{theorem}

The proof of the Holevo bound can be found in~\cite{Nielsen_Chuang_2010}. In current state of the project, this theorem is not heavily used so we will not make annotated proof here.

\subsection{Quantum communication}

To surpass the Holevo bound, we need to use the entanglement of quantum states.

\begin{defn}
	\label{defn:Bell_state}
	Bell state:

        The Bell states are the following four states:

        \[
        |\Phi^+\rangle=\frac{1}{\sqrt{2}}(|00\rangle+|11\rangle),\quad |\Phi^-\rangle=\frac{1}{\sqrt{2}}(|00\rangle-|11\rangle)
        \]
        \[
        |\Psi^+\rangle=\frac{1}{\sqrt{2}}(|01\rangle+|10\rangle),\quad |\Psi^-\rangle=\frac{1}{\sqrt{2}}(|01\rangle-|10\rangle)
        \]
        These are a basis of the 2-qubit Hilbert space.
\end{defn}


\subsection{Superdense coding and entanglement}

The description of the superdense coding can be found in~\cite{gupta2015functionalanalysisquantuminformation} and~\cite{Hayden}.

Suppose $A$ and $B$ share a Bell state (or other maximally entangled state) $|\Phi^+\rangle=\frac{1}{\sqrt{2}}(|00\rangle+|11\rangle)$, where $A$ holds the first part and $B$ holds the second part.

$A$ wishes to send 2 \textbf{classical bits} to $B$.

$A$ performs one of four Pauli unitaries (some fancy quantum gates named X, Y, Z, I) on the combined state of entangled qubits $\otimes$ one qubit. Then $A$ sends the resulting one qubit to $B$.

This operation extends the initial one entangled qubit to a system of one of four orthogonal Bell states.

$B$ performs a measurement on the combined state of the one qubit and the entangled qubits he holds.

$B$ decodes the result and obtains the 2 classical bits sent by $A$.

\begin{figure}[h]
	\centering
	\includegraphics[width=0.8\textwidth]{superdense_coding.png}
	\caption{Superdense coding, image from \cite{Hayden}}
	\label{fig:superdense_coding}
\end{figure}

Note that superdense coding is a way to send 2 classical bits of information by sending 1 qubit with 1 entangled qubit. \textbf{The role of the entangled qubit} is to help them to distinguish the 4 possible states of the total 3 qubits system where 2 of them (the pair of entangled qubits) are mathematically the same.

Additionally, no information can be gained by measuring a pair of entangled qubits. To send information from  $A$ to $B$, we need to physically send the qubits from $A$ to $B$. That means, we cannot send information faster than the speed of light.

% TODO: FILL the description of the superdense coding here.

\subsection{Hayden's concentration of measure phenomenon}

The application of the concentration of measure phenomenon in the superdense coding can be realized in random sampling the entangled qubits~\cite{Hayden}:

It is a theorem connecting the following mathematical structure:

\begin{figure}[h]
    \centering
    \begin{tikzpicture}[node distance=30mm, thick,
        main/.style={draw, draw=white},
        towards/.style={->},
        towards_imp/.style={->,red},
        mutual/.style={<->}
        ]
        % define nodes
        \node[main] (cp) {$\mathbb{C}P^{d_A d_B-1}$};
        \node[main] (pa) [left of=cp] {$\mathcal{P}(A\otimes B)$};
        \node[main] (sa) [below of=pa] {$S_A$};
        \node[main] (rng) [right of=sa] {$[0,\infty)\subset \mathbb{R}$};

        % draw edges
        \draw[mutual] (cp) -- (pa);
        \draw[towards] (pa) -- node[left] {$\operatorname{Tr}_B$} (sa);
        \draw[towards_imp] (pa) -- node[above right] {$f$} (rng);
        \draw[towards] (sa) -- node[above] {$H(\psi_A)$} (rng);
    \end{tikzpicture}
    \caption{Mathematical structure for Hayden's concentration of measure phenomenon}
    \label{fig:Hayden_concentration_of_measure_phenomenon}
\end{figure}

\begin{itemize}
    \item The red arrow is the concentration of measure effect. $f=H(\operatorname{Tr}_B(\psi))$.
    \item $S_A$ denotes the mixed states on $A$.
\end{itemize}

To prove the concentration of measure phenomenon, we need to analyze the following elements involved in figure~\ref{fig:Hayden_concentration_of_measure_phenomenon}:


The existence and uniqueness of the Haar measure is a theorem in compact lie group theory. For this research topic, we will not prove it.

Due to time constrains of the projects, the following lemma is demonstrated but not investigated thoroughly through the research:


\begin{lemma}
    \label{pages_lemma}

    Page's lemma for expected entropy of mixed states

    Choose a random pure state $\sigma=|\psi\rangle\langle\psi|$ from $A'\otimes A$.

    The expected value of the entropy of entanglement is known and satisfies a concentration inequality known as Page's formula~\cite{Pages_conjecture,Pages_conjecture_simple_proof,Bengtsson_Zyczkowski_2017}[15.72].

    $$
    \mathbb{E}[H(\psi_A)]=\frac{1}{\ln(2)}\left(\sum_{j=d_B+1}^{d_Ad_B}\frac{1}{j}-\frac{d_A-1}{2d_B}\right) \geq \log_2(d_A)-\frac{1}{2\ln(2)}\frac{d_A}{d_B}
    $$

\end{lemma}

It basically provides a lower bound for the expected entropy of entanglement. Experimentally, we can have the following result (see Figure~\ref{fig:entropy_vs_dim}):

\begin{figure}[h]
	\centering
	\includegraphics[width=0.8\textwidth]{entropy_vs_dim.png}
	\caption{Entropy vs dimension}
	\label{fig:entropy_vs_dim}
\end{figure}

Then we have bound for Lipschitz constant $\eta$ of the map $S(\varphi_A): \mathcal{P}(A\otimes B)\to \R$

\begin{lemma}
    The Lipschitz constant $\eta$ of $S(\varphi_A)$ is upper bounded by $\sqrt{8}\log_2(d_A)$ for $d_A\geq 3$.
\end{lemma}

\begin{proof}
    The proof use lagrange multiplier method to find the maximum of the gradient of $S(\varphi_A)$.
    %
    TODO: use lagrange multiplier method to find the maximum of the gradient of $S(\varphi_A)$.
    %
\end{proof}

From Levy's lemma, we have

If we define $\beta=\frac{1}{\ln(2)}\frac{d_A}{d_B}$, then we have

$$
\operatorname{Pr}[H(\psi_A) < \log_2(d_A)-\alpha-\beta] \leq \exp\left(-\frac{1}{8\pi^2\ln(2)}\frac{(d_Ad_B-1)\alpha^2}{(\log_2(d_A))^2}\right)
$$

where $d_B\geq d_A\geq 3$~\cite{Hayden_2006}.

Experimentally, we can have the following result:

As the dimension of the Hilbert space increases, the chance of getting an almost maximally entangled state increases (see Figure~\ref{fig:entropy_vs_dA}).

\begin{figure}[h]
	\centering
	\includegraphics[width=0.8\textwidth]{entropy_vs_dA.png}
	\caption{Entropy vs $d_A$}
	\label{fig:entropy_vs_dA}
\end{figure}

% When compiled standalone, print this chapter's references at the end.
\ifSubfilesClassLoaded{
  \printbibliography[title={References for Chapter 1}]
}

\end{document}