diff --git a/latex/chapters/chap0.pdf b/latex/chapters/chap0.pdf
index d980ecc..c058eb6 100644
Binary files a/latex/chapters/chap0.pdf and b/latex/chapters/chap0.pdf differ
diff --git a/latex/chapters/chap0.tex b/latex/chapters/chap0.tex
index 8bbe8c5..00b97b8 100644
--- a/latex/chapters/chap0.tex
+++ b/latex/chapters/chap0.tex
@@ -309,7 +309,6 @@ $$
 \operatorname{Tr}_{\mathscr{B}}(T)=\sum_{i=1}^n a_i \operatorname{Tr}(B_i) A_i
 $$
 
-
 \end{defn}
 Or we can define the map $L_v: \mathscr{A}\to \mathscr{A}\otimes \mathscr{B}$ by
 
@@ -509,7 +508,7 @@ Recall from classical probability theory, we call the initial probability distri
 
     Given a non-commutative probability space $(\mathscr{B}(\mathscr{H}),\mathscr{P})$,
 
-    A state is a unit vector $\bra{\psi}$ in the Hilbert space $\mathscr{H}$, such that $\bra{\psi}\ket{\psi}=1$. 
+    A state is a unit vector $\ket{\psi}$ in the Hilbert space $\mathscr{H}$, such that $\bra{\psi}\ket{\psi}=1$. 
     
     Every state uniquely defines a map $\rho:\mathscr{P}\to[0,1]$, $\rho(P)=\bra{\psi}P\ket{\psi}$ (commonly named as density operator) such that:
     \begin{itemize}
@@ -518,7 +517,7 @@ Recall from classical probability theory, we call the initial probability distri
     \end{itemize}
 \end{defn}
 
-Note that the pure states are the density operators that can be represented by a unit vector $\bra{\psi}$ in the Hilbert space $\mathscr{H}$, whereas mixed states are the density operators that cannot be represented by a unit vector in the Hilbert space $\mathscr{H}$.
+Note that the pure states are the density operators that can be represented by a unit vector $\ket{\psi}$ in the Hilbert space $\mathscr{H}$, whereas mixed states are the density operators that cannot be represented by a unit vector in the Hilbert space $\mathscr{H}$.
 
 If $(|\psi_1\rangle,|\psi_2\rangle,\cdots,|\psi_n\rangle)$ is an orthonormal basis of $\mathscr{H}$ consisting of eigenvectors of $\rho$, for the eigenvalues $p_1,p_2,\cdots,p_n$, then $p_j\geq 0$ and $\sum_{j=1}^n p_j=1$.
 
diff --git a/latex/chapters/chap2.pdf b/latex/chapters/chap2.pdf
index 7bb4a17..b5ffef0 100644
Binary files a/latex/chapters/chap2.pdf and b/latex/chapters/chap2.pdf differ
diff --git a/presentation/ZheyuanWu_HonorThesis_Presentation.pdf b/presentation/ZheyuanWu_HonorThesis_Presentation.pdf
index d8b1fb8..f11149e 100644
Binary files a/presentation/ZheyuanWu_HonorThesis_Presentation.pdf and b/presentation/ZheyuanWu_HonorThesis_Presentation.pdf differ
diff --git a/presentation/ZheyuanWu_HonorThesis_Presentation.tex b/presentation/ZheyuanWu_HonorThesis_Presentation.tex
index fac8934..281b47c 100644
--- a/presentation/ZheyuanWu_HonorThesis_Presentation.tex
+++ b/presentation/ZheyuanWu_HonorThesis_Presentation.tex
@@ -16,6 +16,7 @@
 \usepackage{tabularx}
 \usepackage{colortbl}
 \usepackage{tikz}
+\usepackage{braket}
 
 \DeclareMathOperator{\sen}{sen}
 \DeclareMathOperator{\tg}{tg}
@@ -67,243 +68,305 @@
   \hypersetup{linkcolor=black}
   \tableofcontents
 \end{frame}
-\section{Motivation}
+\section{Formulation of Quantum Entangement}
+\begin{frame}{Why I'm here?}
 
-\begin{frame}{Light polarization and non-commutative probability}
-        \begin{figure}
-            \includegraphics[width=0.6\textwidth]{../latex/images/Filter_figure.png}
-        \end{figure}
+  \centering
+  \Large\itshape
+  ``I think I can safely say that nobody understands quantum mechanics.''
+
+  \vspace{1em}
+  \normalsize --- Richard Feynman
+\end{frame}
+
+\begin{frame}{Non-commutative probability space}
+    We begin our discussion on a general type of probability space.
+
+    \begin{block}{Non-commutative probability space}
+    \label{defn:non-commutative_probability_space}
+
+    A non-commutative probability space is a pair $(\mathscr{B}(\mathscr{H}),\mathscr{P})$, where $\mathscr{B}(\mathscr{H})$ is the set of all \textbf{bounded} linear operators on $\mathscr{H}$.
+
+    $\mathscr{P}$ is the set of all orthogonal projections on $\mathscr{B}(\mathscr{H})$.
+
+    The set $\mathscr{P}=\{P\in\mathscr{B}(\mathscr{H}):P^*=P=P^2\}$ is the set of all orthogonal projections on $\mathscr{B}(\mathscr{H})$.
+    \end{block}
+    
+\begin{table}[H]
+    \centering
+    \renewcommand{\arraystretch}{1}
+    \label{tab:analog_of_classical_probability_theory_and_non_commutative_probability_theory}
+    {\tiny
+        \begin{tabular}{|p{0.45\linewidth}|p{0.45\linewidth}|}
+            \hline
+            \textbf{Classical probability}                                                                                                                      & \textbf{Non-commutative probability}                                                                                                                      \\
+            \hline
+            Sample space $\Omega$, cardinality $\vert\Omega\vert=n$, example: $\Omega=\{0,1\}$                                                                  & Complex Hilbert space $\mathscr{H}$, dimension $\dim\mathscr{H}=n$, example: $\mathscr{H}=\mathbb{C}^2$                                                   \\
+            \hline
+            Common algebra of $\mathbb{C}$ valued functions                                                                                                     & Algebra of bounded operators $\mathcal{B}(\mathscr{H})$                                                                                                   \\
+            \hline
+            Events: indicator functions of sets                                                                                                                 & Projections: space of orthogonal projections $\mathscr{P}\subseteq\mathscr{B}(\mathscr{H})$                                                               \\
+            \hline
+            functions $f$ such that $f^2=f=\overline{f}$                                                                                                        & orthogonal projections $P$ such that $P^*=P=P^2$                                                                                                          \\
+            \hline
+            $\mathbb{I}_{f^{-1}(\{\lambda\})}$ is the indicator function of the set $f^{-1}(\{\lambda\})$                                                       & $P(\lambda)$ is the orthogonal projection to eigenspace                                                                                                   \\
+            \hline
+            $f=\sum_{\lambda\in \operatorname{Range}(f)}\lambda \mathbb{I}_{f^{-1}(\{\lambda\})}$                                                               & $A=\sum_{\lambda\in \operatorname{sp}(A)}\lambda P(\lambda)$                                                                                              \\
+            \hline
+            Probability measure $\mu$ on $\Omega$                                                                                                               & Density operator $\rho$ on $\mathscr{H}$                                                                                                                  \\
+            \hline
+    \end{tabular}
+    }
+    \end{table}
+\end{frame}
+
+\begin{frame}{Quantum states}
+    Given a non-commutative probability space $(\mathscr{B}(\mathscr{H}),\mathscr{P})$,
+
+    \begin{block}{Definition of (Quantum) State}
+        
+    A state is a unit vector $\ket{\psi}$ in the Hilbert space $\mathscr{H}$, such that $\bra{\psi}\ket{\psi}=1$. 
+    
+    Every state uniquely defines a map $\rho:\mathscr{P}\to[0,1]$, $\rho(P)=\bra{\psi}P\ket{\psi}$ (commonly named as density operator) such that:
     \begin{itemize}
-        \item Light passing through a polarizer becomes polarized in the direction of that filter.
-        \item If two filters are placed with relative angle $\alpha$, the transmitted intensity decreases as $\alpha$ increases.
-        \item In particular, the transmitted intensity vanishes when $\alpha=\pi/2$.
+        \item $\rho(O)=0$, where $O$ is the zero projection, and $\rho(I)=1$, where $I$ is the identity projection.
+        \item If $P_1,P_2,\ldots,P_n$ are pairwise disjoint orthogonal projections, then $\rho(P_1 + P_2 + \cdots + P_n) = \sum_{i=1}^n \rho(P_i)$.
     \end{itemize}
+    \end{block}
+
+    Here $\psi$ is just a label for the vector. $\ket{\cdot}$ is called the ket (column vector), where the counterpart $\bra{\psi}$ is called the bra, used to denote the vector dual to $\psi$ (row vector/linear functional of $\ket{\psi})$.
+
 \end{frame}
 
-\begin{frame}{Polarization experiment}
+\begin{frame}{Quantum measurements}
 
-    \vspace{0.5em}
-    Now consider three filters $F_1,F_2,F_3$ with directions
-    $$
-    \alpha_1,\alpha_2,\alpha_3.
-    $$
-    Testing them pairwise suggests introducing three $0$--$1$ random variables
-    $$
-    P_1,P_2,P_3,
-    $$
-    where $P_i=1$ means that the photon passes filter $F_i$.
+\begin{block}{Definition of Quantum Measurement}
 
-    \vspace{0.5em}
-    If these were classical random variables on one probability space, they would satisfy a Bell-type inequality.
+    A measurement (observation) of a system prepared in a given state produces an outcome $x$, $x$ is a physical event that is a subset of the set of all possible outcomes. For each $x$, we associate a measurement operator $M_x$ on $\mathscr{H}$.
+
+    Given the initial state (pure state, unit vector) $u$, the probability of measurement outcome $x$ is given by:
+    $$
+        p(x)=\|M_xu\|^2
+    $$
+    Note that to make sense of this definition, the collection of measurement operators $\{M_x\}$ must satisfy the completeness requirement:
+    $$
+        1=\sum_{x\in X} p(x)=\sum_{x\in X}\|M_xu\|^2=\sum_{x\in X}\langle M_xu,M_xu\rangle=\langle u,(\sum_{x\in X}M_x^*M_x)u\rangle
+    $$
+    So $\sum_{x\in X}M_x^*M_x=I$ (Law of total probability).
+
+\end{block}
 \end{frame}
 
-\begin{frame}{A classical Bell-type inequality}
-    \begin{block}{Bell-type inequality}
-    For any classical random variables $P_1,P_2,P_3\in\{0,1\}$,
+\begin{frame}{Backgrounds: Motivation of Tensor product}
+
+Recall from the traditional notation of product space of two vector spaces $V$ and $W$, that is, $V\times W$, is the set of all ordered pairs $(\ket{v},\ket{w})$ where $\ket{v}\in V$ and $\ket{w}\in W$.
+
+The space has dimension $\dim V+\dim W$.
+
+We want to define a vector space with the notation of multiplication of two vectors from different vector spaces.
+
+That is
+
+$$
+    (\ket{v_1}+\ket{v_2})\otimes \ket{w}=(\ket{v_1}\otimes \ket{w})+(\ket{v_2}\otimes \ket{w})
+$$
+$$
+    \ket{v}\otimes (\ket{w_1}+\ket{w_2})=(\ket{v}\otimes \ket{w_1})+(\ket{v}\otimes \ket{w_2})
+$$
+
+and enables scalar multiplication by
+
+$$
+    \lambda (\ket{v}\otimes \ket{w})=(\lambda \ket{v})\otimes \ket{w}=\ket{v}\otimes (\lambda \ket{w})
+$$
+
+And we wish to build a way to associate the basis of $V$ and $W$ with the basis of $V\otimes W$. That makes the tensor product a vector space with dimension $\dim V\times \dim W$.
+
+\end{frame}
+
+\begin{frame}{Backgrounds: Tensor product of vectors}
+
+    \begin{block}{Definition of Bilinear functional}
+    A bilinear functional is a bilinear function $\beta:V\times W\to \mathbb{F}$ satisfying the condition that $\ket{v}\to \beta(\ket{v},\ket{w})$ is a linear functional for all $\ket{w}\in W$ and $\ket{w}\to \beta(\ket{v},\ket{w})$ is a linear functional for all $\ket{v}\in V$.
+
+    \end{block}
+
+    The vector space of all bilinear functionals is denoted by $\mathcal{B}(V, W)$.
+    \begin{block}{Definition of Tensor product of vectors}
+        Let $V, W$ be two vector spaces.
+
+        Let $V'$ and $W'$ be the dual spaces of $V$ and $W$, respectively, that is $V'=\{\psi:V\to \mathbb{F}\}$ and $W'=\{\phi:W\to \mathbb{F}\}$, $\psi, \phi$ are linear functionals.
+
+        The \textbf{tensor product of vectors} $v\in V$ and $w\in W$ is the bilinear functional defined by $\forall (\psi,\phi)\in V'\times W'$ given by the notation
+
+        $$
+            (v\otimes w)(\psi,\phi)=\psi(v)\phi(w)
+        $$
+    \end{block}
+\end{frame}
+
+\begin{frame}{Backgrounds: Tensor product of vector spaces}
+    \begin{block}{Definition of Tensor product of vector spaces}
+    The tensor product of two vector spaces $V$ and $W$ is the vector space $\mathcal{B}(V',W')$
+
+    Notice that the basis of such vector space is the linear combination of the basis of $V'$ and $W'$, that is, if $\{e_i\}$ is the basis of $V'$ and $\{f_j\}$ is the basis of $W'$, then $\{e_i\otimes f_j\}$ is the basis of $\mathcal{B}(V', W')$.
+
+    Since $\{e_i\}$ and $\{f_j\}$ are bases of $V'$ and $W'$, respectively, then we can always find a set of linear functionals $\{\phi_i\}$ and $\{\psi_j\}$ such that $\phi_i(e_j)=\delta_{ij}$ and $\psi_j(f_i)=\delta_{ij}$. (Here $\delta_{ij}=1$ if $i=j$ and $0$ otherwise.)
+
     $$
-    \operatorname{Prob}(P_1=1,P_3=0)
-    \leq
-    \operatorname{Prob}(P_1=1,P_2=0)
-    +
-    \operatorname{Prob}(P_2=1,P_3=0).
+        V\otimes W=\left\{\sum_{i=1}^n \sum_{j=1}^m a_{ij} \phi_i(v)\psi_j(w): \phi_i\in V', \psi_j\in W'\right\}
     $$
     \end{block}
 
-    \vspace{0.5em}
-    \begin{proof}
-    The event $\{P_1=1,P_3=0\}$ splits into two disjoint cases according to whether $P_2=0$ or $P_2=1$:
-    $$
-    \{P_1=1,P_3=0\}
-    =
-    \{P_1=1,P_2=0,P_3=0\}
-    \sqcup
-    \{P_1=1,P_2=1,P_3=0\}.
-    $$
-    Therefore,
-    $$
-    \begin{aligned}
-    \operatorname{Prob}(P_1=1,P_3=0)
-    &=
-    \operatorname{Prob}(P_1=1,P_2=0,P_3=0) \\
-    &\quad+
-    \operatorname{Prob}(P_1=1,P_2=1,P_3=0) \\
-    &\leq
-    \operatorname{Prob}(P_1=1,P_2=0)
-    +
-    \operatorname{Prob}(P_2=1,P_3=0).
-    \end{aligned}
-    $$
-    \end{proof}
+Note that $\sum_{i=1}^n \sum_{j=1}^m a_{ij} \phi_i(v)\psi_j(w)$ is a bilinear functional that maps $V'\times W'$ to $\mathbb{F}$.
 \end{frame}
 
-\begin{frame}{Experimental law}
-    For unpolarized incoming light, the \textbf{observed transition law} for a pair of filters is
-    $$
-    \operatorname{Prob}(P_i=1,P_j=0)
-    =
-    \operatorname{Prob}(P_i=1)-\operatorname{Prob}(P_i=1,P_j=1).
-    $$
-
-    Using the polarization law,
-    $$
-    \operatorname{Prob}(P_i=1)=\frac12,
-    \qquad
-    \operatorname{Prob}(P_i=1,P_j=1)=\frac12\cos^2(\alpha_i-\alpha_j),
-    $$
-    hence
-    $$
-    \operatorname{Prob}(P_i=1,P_j=0)
-    =
-    \frac12-\frac12\cos^2(\alpha_i-\alpha_j)
-    =
-    \frac12\sin^2(\alpha_i-\alpha_j).
-    $$
-
-    \vspace{0.5em}
-    So the experimentally observed probabilities depend only on the angle difference $\alpha_i-\alpha_j$.
-\end{frame}
-
-\begin{frame}{Violation of the classical inequality}
-    Substituting the experimental law into the classical inequality gives
-    $$
-    \frac12\sin^2(\alpha_1-\alpha_3)
-    \leq
-    \frac12\sin^2(\alpha_1-\alpha_2)
-    +
-    \frac12\sin^2(\alpha_2-\alpha_3).
-    $$
-
-    Choose
-    $$
-    \alpha_1=0,\qquad
-    \alpha_2=\frac{\pi}{6},\qquad
-    \alpha_3=\frac{\pi}{3}.
-    $$
-
-    Then
-    $$
-    \begin{aligned}
-    \frac12\sin^2\!\left(-\frac{\pi}{3}\right)
-    &\leq
-    \frac12\sin^2\!\left(-\frac{\pi}{6}\right)
-    +
-    \frac12\sin^2\!\left(-\frac{\pi}{6}\right) \\
-    \frac38 &\leq \frac18+\frac18 \\
-    \frac38 &\leq \frac14,
-    \end{aligned}
-    $$
-    which is false.
-
-    \vspace{0.5em}
-    Therefore the pairwise polarization data cannot come from one classical probability model with random variables $P_1,P_2,P_3$.
-\end{frame}
-
-\begin{frame}{The quantum model of polarization}
-    The correct model uses a Hilbert space rather than classical events.
-
-    \begin{itemize}
-        \item A pure polarization state is a vector
-        $$
-        \psi=\alpha|0\rangle+\beta|1\rangle \in \mathbb{C}^2.
-        $$
-        \item A filter at angle $\alpha$ is represented by the orthogonal projection
-        $$
-        P_\alpha=
-        \begin{pmatrix}
-        \cos^2\alpha & \cos\alpha\sin\alpha \\
-        \cos\alpha\sin\alpha & \sin^2\alpha
-        \end{pmatrix}.
-        $$
-        \item For a pure state $\psi$, the probability of passing the filter is
-        $$
-        \langle P_\alpha\psi,\psi\rangle.
-        $$
-    \end{itemize}
-
-    \vspace{0.4em}
-    The key point is that sequential measurements are described by \emph{ordered products} of projections, and these need not commute.
-\end{frame}
-
-\begin{frame}{Recovering the observed law from the operator model}
-    Assume the incoming light is unpolarized, so its state is the density matrix
-    $$
-    \rho=\frac12 I.
-    $$
-
-    The probability of passing the first filter $P_{\alpha_i}$ is
-    $$
-    \operatorname{Prob}(P_i=1)
-    =
-    \operatorname{tr}(\rho P_{\alpha_i})
-    =
-    \frac12\operatorname{tr}(P_{\alpha_i})
-    =
-    \frac12.
-    $$
-
-    If the photon passes the first filter, the post-measurement state is
-    $$
-    \rho_i
-    =
-    \frac{P_{\alpha_i}\rho P_{\alpha_i}}{\operatorname{tr}(\rho P_{\alpha_i})}
-    =
-    P_{\alpha_i}.
-    $$
-
-
-    $$
-    P_\alpha=
-    \begin{pmatrix}
-    \cos^2\alpha & \cos\alpha\sin\alpha \\
-    \cos\alpha\sin\alpha & \sin^2\alpha
-    \end{pmatrix}.
-    $$
-
-
-    Therefore
-    $$
-    \operatorname{Prob}(P_j=1\mid P_i=1)
-    =
-    \operatorname{tr}(\rho_i P_{\alpha_j})
-    =
-    \operatorname{tr}(P_{\alpha_i}P_{\alpha_j})
-    =
-    \cos^2(\alpha_i-\alpha_j).
-    $$
-
-\end{frame}
-\begin{frame}{Recovering the observed law from the operator model (cont.)}
+\begin{frame}{Backgrounds: Trace}
     
+\label{defn:trace}
 
-    $$
-    \begin{aligned}
-    \operatorname{Prob}(P_i=1,P_j=0)
-    &=
-    \operatorname{Prob}(P_i=1)
-    \bigl(1-\operatorname{Prob}(P_j=1\mid P_i=1)\bigr) \\
-    &=
-    \frac12\bigl(1-\cos^2(\alpha_i-\alpha_j)\bigr) \\
-    &=
-    \frac12\sin^2(\alpha_i-\alpha_j).
-    \end{aligned}
-    $$
+\begin{block}{Trace}
+Let $T$ be a linear operator on $\mathscr{H}$, $(e_1,e_2,\cdots,e_n)$ be a basis of $\mathscr{H}$ and $(\epsilon_1,\epsilon_2,\cdots,\epsilon_n)$ be a basis of dual space $\mathscr{H}^*$. Then the trace of $T$ is defined by
 
-    This matches the experiment exactly.
+$$
+\operatorname{Tr}(T)=\sum_{i=1}^n \epsilon_i(T(e_i))=\sum_{i=1}^n \langle e_i,T(e_i)\rangle
+$$
+
+\end{block}
+
+This is equivalent to the sum of the diagonal elements of $T$.
+
+\vspace{1em}
+Q: How we generalize the trace to a subsystem of a larger, entangled quantum system $A\otimes B$?
 \end{frame}
 
-\begin{frame}{Conclusion}
-    \begin{itemize}
-        \item The classical model predicts a Bell-type inequality for three $0$--$1$ random variables.
-        \item The polarization experiment violates that inequality.
-        \item The resolution is that the quantities measured are \emph{sequential probabilities}, not joint probabilities of classical random variables.
-        \item In quantum probability, events are modeled by projections on a Hilbert space, and measurement order matters.
-    \end{itemize}
+\begin{frame}{Backgrounds: Partial trace}
 
-    \vspace{0.6em}
-    This is one of the basic motivations for passing from classical probability to non-commutative probability.
+\begin{block}{Definition of Partial trace}
+
+Let $T$ be a linear operator on $\mathscr{H}=\mathscr{A}\otimes \mathscr{B}$, where $\mathscr{A}$ and $\mathscr{B}$ are finite-dimensional Hilbert spaces.
+
+An operator $T$ on $\mathscr{H}=\mathscr{A}\otimes \mathscr{B}$ can be written as 
+
+$$
+T=\sum_{i=1}^n a_i A_i\otimes B_i
+$$
+
+where $A_i$ is a linear operator on $\mathscr{A}$ and $B_i$ is a linear operator on $\mathscr{B}$.
+
+The $\mathscr{B}$-partial trace of $T$ ($\operatorname{Tr}_{\mathscr{B}}(T):\mathcal{L}(\mathscr{A}\otimes \mathscr{B})\to \mathcal{L}(\mathscr{A})$) is the linear operator on $\mathscr{A}$ defined by
+
+$$
+\operatorname{Tr}_{\mathscr{B}}(T)=\sum_{i=1}^n a_i \operatorname{Tr}(B_i) A_i
+$$
+
+\end{block}
+
+\end{frame}
+
+\begin{frame}{Information theory in classical systems}
+
+In probability theory, an important measurement of uncertainty is entropy.
+
+It characterizes the information content of a random variable.
+
+\begin{block}{Shannon entropy}
+Given a classical probability vector $p=(p_1,\dots,p_n)$ with $\sum_i p_i=1$,
+$$
+H(p)=-\sum_{i=1}^n p_i \log_2 p_i.
+$$
+This measures uncertainty of a \emph{chosen measurement outcome}.
+\end{block}
+
+\end{frame}
+
+\begin{frame}{Information theory in quantum systems}
+
+
+\begin{block}{von Neumann entropy}
+For a density matrix $\rho$,
+$$
+S(\rho)=-\operatorname{Tr}(\rho\log_2\rho).
+$$
+This measures the intrinsic mixedness of the quantum state and is basis-independent.
+\end{block}
+
+\begin{block}{Entanglement entropy}
+For a bipartite pure state $|\Psi\rangle\in \mathcal{H}_A\otimes\mathcal{H}_B$, define the reduced state $\rho_A=\operatorname{Tr}_B\bigl(|\Psi\rangle\langle\Psi|\bigr).$ Its entanglement entropy is
+$$
+E(|\Psi\rangle)=S(\rho_A).
+$$
+Thus entanglement entropy is the von Neumann entropy of a subsystem, and it measures how entangled the bipartite pure state is.
+\end{block}
+
+\end{frame}
+
+\begin{frame}{Conclusion of Non-commutative probability space}
+
+    \begin{table}[H]
+    \centering
+    {\tiny
+        \begin{tabular}{|p{0.45\linewidth}|p{0.45\linewidth}|}
+            \hline
+            \textbf{Classical probability}                                                                                                                      & \textbf{Non-commutative probability}                                                                                                                      \\
+            \hline
+            Sample space $\Omega$, cardinality $\vert\Omega\vert=n$, example: $\Omega=\{0,1\}$                                                                  & Complex Hilbert space $\mathscr{H}$, dimension $\dim\mathscr{H}=n$, example: $\mathscr{H}=\mathbb{C}^2$                                                   \\
+            \hline
+            Common algebra of $\mathbb{C}$ valued functions                                                                                                     & Algebra of bounded operators $\mathcal{B}(\mathscr{H})$                                                                                                   \\
+            \hline
+            $f\mapsto \bar{f}$ complex conjugation                                                                                                              & $P\mapsto P^*$ adjoint                                                                                                                                    \\
+            \hline
+            Events: indicator functions of sets                                                                                                                 & Projections: space of orthogonal projections $\mathscr{P}\subseteq\mathscr{B}(\mathscr{H})$                                                               \\
+            \hline
+            functions $f$ such that $f^2=f=\overline{f}$                                                                                                        & orthogonal projections $P$ such that $P^*=P=P^2$                                                                                                          \\
+            \hline
+            $\mathbb{R}$-valued functions $f=\overline{f}$                                                                                                      & self-adjoint operators $A=A^*$                                                                                                                            \\
+            \hline
+            $\mathbb{I}_{f^{-1}(\{\lambda\})}$ is the indicator function of the set $f^{-1}(\{\lambda\})$                                                       & $P(\lambda)$ is the orthogonal projection to eigenspace                                                                                                   \\
+            \hline
+            $f=\sum_{\lambda\in \operatorname{Range}(f)}\lambda \mathbb{I}_{f^{-1}(\{\lambda\})}$                                                               & $A=\sum_{\lambda\in \operatorname{sp}(A)}\lambda P(\lambda)$                                                                                              \\
+            \hline
+            Probability measure $\mu$ on $\Omega$                                                                                                               & Density operator $\rho$ on $\mathscr{H}$                                                                                                                  \\
+            \hline
+            Delta measure $\delta_\omega$                                                                                                                       & Pure state $\rho=\vert\psi\rangle\langle\psi\vert$                                                                                                        \\
+            \hline
+            $\mu$ is non-negative measure and $\sum_{i=1}^n\mu(\{i\})=1$                                                                                        & $\rho$ is positive semi-definite and $\operatorname{Tr}(\rho)=1$                                                                                          \\
+            \hline
+            Expected value of random variable $f$ is $\mathbb{E}_{\mu}(f)=\sum_{i=1}^n f(i)\mu(\{i\})$                                                          & Expected value of operator $A$ is $\mathbb{E}_\rho(A)=\operatorname{Tr}(\rho A)$                                                                          \\
+            \hline
+            Variance of random variable $f$ is $\operatorname{Var}_\mu(f)=\sum_{i=1}^n (f(i)-\mathbb{E}_\mu(f))^2\mu(\{i\})$                                    & Variance of operator $A$ is $\operatorname{Var}_\rho(A)=\operatorname{Tr}(\rho A^2)-\operatorname{Tr}(\rho A)^2$                                          \\
+            \hline
+            Covariance of random variables $f$ and $g$ is $\operatorname{Cov}_\mu(f,g)=\sum_{i=1}^n (f(i)-\mathbb{E}_\mu(f))(g(i)-\mathbb{E}_\mu(g))\mu(\{i\})$ & Covariance of operators $A$ and $B$ is $\operatorname{Cov}_\rho(A,B)=\operatorname{Tr}(\rho A\circ B)-\operatorname{Tr}(\rho A)\operatorname{Tr}(\rho B)$ \\
+            \hline
+            Composite system is given by Cartesian product of the sample spaces $\Omega_1\times\Omega_2$                                                        & Composite system is given by tensor product of the Hilbert spaces $\mathscr{H}_1\otimes\mathscr{H}_2$                                                     \\
+            \hline
+            Product measure $\mu_1\times\mu_2$ on $\Omega_1\times\Omega_2$                                                                                      & Tensor product of space $\rho_1\otimes\rho_2$ on $\mathscr{H}_1\otimes\mathscr{H}_2$                                                                      \\
+            \hline
+            Marginal distribution $\pi_*v$                                                                                                                      & Partial trace $\operatorname{Tr}_2(\rho)$                                                                                                                 \\
+            \hline
+        \end{tabular}
+    }
+    \vspace{0.5cm}
+\end{table}
+\end{frame}
+
+
+\begin{frame}{So what?}
+    
+\begin{block}{Lemma: That's all we need.}
+ All quantum operations can be constructed by composing four kinds of transformations: 
+
+  \begin{enumerate}
+    \item Unitary operations. $U(\cdot)$ for any quantum state. $A^* A=AA^*=I$, $A$ is the matrix of $U$. (It is possible to apply a non-unitary operation for an open quantum system, but usually leads to non-recoverable loss of information)
+    \item Extend the system. Given a quantum state $\rho\in\mathcal{H}^N$, we can extend it to a larger quantum system by "entangle" it with some new states $\sigma\in \mathcal{H}^K$ and get $\rho'=\rho\otimes\sigma\in \mathcal{H}^N\otimes \mathcal{H}^K$.
+    \item Partial trace. Given a quantum state $\rho\in\mathcal{H}^N$ and some reference state $\sigma\in\mathcal {H}^K$, we can trace out some subsystems and get a new state $\rho'\in\mathcal{H}^{N-K}$.
+    \item Selective measurement. Given a quantum state, we measure it and get a classical bit.
+  \end{enumerate}
+\end{block}
 \end{frame}
 
 \section{Concentration on Spheres and quantum states}
+
 \begin{frame}{Quantum states: pure vs.\ mixed}
     \begin{itemize}
         \item A finite-dimensional quantum system is modeled by a complex Hilbert space (a complete inner product space)
@@ -315,8 +378,8 @@
         \psi \in \mathcal H, \qquad \|\psi\|=1.
         $$
         \item A \textbf{mixed state} is represented by a density matrix
-        $$
-        \rho \geq 0, \qquad \operatorname{tr}(\rho)=1.
+        $$    
+        \rho=\sum_{j=1}^n p_j|\psi_j\rangle\langle\psi_j|
         $$
         \item Pure states describe maximal information; mixed states describe probabilistic mixtures or partial information.
     \end{itemize}
@@ -327,7 +390,7 @@
     \end{block}
 \end{frame}
 
-\begin{frame}{Why pure states are not vectors}
+\begin{frame}{Pure states live in the complex projective space}
     \begin{itemize}
         \item Two nonzero vectors that differ by a nonzero complex scalar represent the same physical state:
         $$
@@ -338,16 +401,16 @@
     \end{itemize}
 
     \vspace{0.4em}
-    Hence the space of pure states is
+    Hence the space of pure states (denoted by $\mathcal{P}(\mathcal H)$) is
     $$
-    \mathbb P(\mathcal H)
+    \mathcal{P}(\mathcal H)
     =
     (\mathcal H \setminus \{0\})/\mathbb C^\times.
     $$
 
     After choosing a basis $\mathcal H \cong \mathbb C^{n+1}$, this becomes
     $$
-    \mathbb P(\mathcal H) \cong \mathbb C P^n.
+    \mathcal{P}(\mathcal H) \cong \mathbb C P^n.
     $$
 \end{frame}
 
@@ -375,83 +438,86 @@
     is the \textbf{Hopf fibration}.
 \end{frame}
 
-\begin{frame}{How the metric descends to $\mathbb C P^n$}
-    \begin{itemize}
-        \item The sphere $S^{2n+1}$ inherits the round metric from the Euclidean metric on
-        $$
-        \mathbb C^{n+1} \cong \mathbb R^{2n+2}.
-        $$
-        \item The fibers of the Hopf map are circles
-        $$
-        p^{-1}([z]) = \{e^{i\theta}z : \theta \in \mathbb R\}.
-        $$
-        \item Tangent vectors split into:
-        \begin{itemize}
-            \item \textbf{vertical directions}: tangent to the $S^1$-fiber,
-            \item \textbf{horizontal directions}: orthogonal complement to the fiber.
-        \end{itemize}
-        \item The differential $dp$ identifies horizontal vectors on the sphere with tangent vectors on $\mathbb C P^n$.
-    \end{itemize}
+\begin{frame}{The induced riemmanian metric: Fubini--Study metric}
 
-    \vspace{0.4em}
-    This allows the round metric on $S^{2n+1}$ to define a metric on $\mathbb C P^n$.
+    
+\begin{block}{Definition of Riemannian metric}
+
+  Let $M$ be a smooth manifold. A \textit{\textbf{Riemannian metric}} on $M$ is a smooth covariant tensor field $g\in \mathcal{T}^2(M)$ such that for each $p\in M$, $g_p$ is an inner product on $T_pM$ (Vector space formed by the tangent vectors relative to the manifold $M$ at $p$).
+
+  $g_p(v,v)\geq 0$ for each $p\in M$ and each $v\in T_pM$. equality holds if and only if $v=0$.
+
+\end{block}
+\begin{itemize}
+    \item 
+    The geometric picture is
+    $$
+    S^{2n+1}
+    \xrightarrow{\text{Hopf fibration}}
+    \mathbb C P^n,
+    \qquad
+    \text{round metric}
+    \rightsquigarrow
+    \text{Fubini--Study metric}.
+    $$
+    
+    
+    \begin{columns}[T]
+        \column{0.5\textwidth}
+        The sphere $S^{2n+1}\subset \mathbb C^{n+1}$ has the \textbf{round metric}
+    $$
+    g_{\mathrm{round}}=\sum_{j=0}^n (dx_j^2+dy_j^2)\big|_{S^{2n+1}},
+    $$
+    induced from the Euclidean metric on $\mathbb R^{2n+2}$.
+
+    \column{0.5\textwidth} In homogeneous coordinates $[z]\in\mathbb C P^n$, the \textbf{Fubini--Study metric} is
+    $$
+    g_{FS}
+    =
+    \frac{\langle dz,dz\rangle \langle z,z\rangle-|\langle z,dz\rangle|^2}{\langle z,z\rangle^2},
+    $$
+    \end{columns}
+
+\end{itemize}
 \end{frame}
 
-\begin{frame}{The induced metric: Fubini--Study metric}
-    \begin{itemize}
-        \item The metric on $\mathbb C P^n$ obtained from the Hopf quotient is the
-        \textbf{Fubini--Study metric}.
-        \item So the geometric picture is:
-        $$
-        S^{2n+1}
-        \xrightarrow{\text{Hopf fibration}}
-        \mathbb C P^n
-        $$
-        $$
-        \text{round metric}
-        \rightsquigarrow
-        \text{Fubini--Study metric}.
-        $$
-        \item The normalized Riemannian volume measure induced by this metric gives the natural probability measure on pure states.
-    \end{itemize}
+\begin{frame}{So what?}
 
-    \vspace{0.5em}
-    \begin{block}{Proof roadmap}
-    To prove this carefully, one usually shows:
-    \begin{enumerate}
-        \item $p:S^{2n+1}\to \mathbb C P^n$ is a smooth surjective submersion,
-        \item the vertical space is the tangent space to the $S^1$-orbit,
-        \item horizontal lifts are well defined,
-        \item the quotient metric is exactly the Fubini--Study metric.
-    \end{enumerate}
-    \end{block}
+    With everything we have here, we are ready to answer the question:
+
+    \vspace{2em}
+
+    \begin{center}
+        \textbf{How a random bipartite pure state $\mathcal{P}(A\otimes B)$ is distributed on the complex projective space? And how entangled $H(\psi_A)$ it is?}
+    \end{center}
+    
 \end{frame}
 
+\section{Volume Distribution in High Dimensional Spaces}
+
 \begin{frame}{Maxwell-Boltzmann Distribution Law}
-    \begin{columns}[T]
-        \column{0.58\textwidth}
-        Consider the orthogonal projection
+        \begin{figure}[H]
+            \includegraphics[width=0.7\textwidth]{../latex/images/maxwell.png}
+        \end{figure}
+        Consider the orthogonal projection $0\leq k< n$
         $$
         \pi_{n,k}:S^n(\sqrt{n})\to \mathbb{R}^k.
         $$
-        Its push-forward measure converges to the standard Gaussian:
+        Its push-forward measure converges to the standard Gaussian as dimensions increase $n\to \infty$.
         $$
         (\pi_{n,k})_*\sigma^n\to \gamma^k.
         $$
 
-        \vspace{0.5em}
-        This explains why Gaussian behavior emerges from high-dimensional spheres and supports the proof strategy for Levy concentration.
-
-        \column{0.42\textwidth}
-        \begin{figure}
-            \includegraphics[width=\textwidth]{../latex/images/maxwell.png}
-        \end{figure}
-    \end{columns}
+        Another familiar name when $k=1$ is the central limit theorem.
 \end{frame}
 
 \begin{frame}{Levy Concentration}
-    \begin{block}{Levy's theorem}
-        If $f:S^n\to \mathbb{R}$ is $1$-Lipschitz, then there exists a median $a_0$ such that
+
+    \begin{block}{Definition of Lipschitz function}
+         A function $f:X\to Y$, where $X,Y$ are metric spaces, is $L$-Lipschitz if there exists a constant $L$ such that $|f(x)-f(y)|\leq L|y-x|$ for all $x,y\in S^n$.
+    \end{block}
+    \begin{block}{Levy's lemma}
+        If $f:S^n\to \mathbb{R}$ is $1$-Lipschitz, then there exists a $a_0$ such that for $\epsilon>0$,
         $$
         \mu\{x\in S^n:|f(x)-a_0|\geq \epsilon\}
         \leq
@@ -461,33 +527,12 @@
 
     \begin{itemize}
         \item In high dimension, most Lipschitz observables are almost constant.
-        \item This is the geometric mechanism behind generic entanglement.
+        \item Here $a_0$ resembles the "median" of the set $f(S^n)$, that is half of the measure of the observations is bounded below/above by $a_0$.
     \end{itemize}
 \end{frame}
 
-
 \section{Main Result}
 
-\begin{frame}{Generic Entanglement Theorem}
-    \begin{block}{Hayden--Leung--Winter}
-        Let $\psi\in \mathcal{P}(A\otimes B)$ be Haar-random and define
-        $$
-        \beta=\frac{1}{\ln(2)}\frac{d_A}{d_B}.
-        $$
-        For $d_B\geq d_A\geq 3$,
-        $$
-        \operatorname{Pr}[H(\psi_A)<\log_2(d_A)-\alpha-\beta]
-        \leq
-        \exp\left(
-        -\frac{1}{8\pi^2\ln(2)}
-        \frac{(d_Ad_B-1)\alpha^2}{(\log_2 d_A)^2}
-        \right).
-        $$
-    \end{block}
-
-    With overwhelming probability, a random pure state is almost maximally entangled.
-\end{frame}
-
 \begin{frame}{How the Entropy Observable Fits In}
     \begin{figure}
         \centering
@@ -510,8 +555,8 @@
     \end{figure}
 
     \begin{itemize}
+        \item Recall that $\mathcal{P}(A\otimes B)$ is the set of pure states on $A\otimes B$. $\operatorname{Tr}_B$ is the partial trace over $B$. $\mathcal{S}(A)$ is the set of mixed states on $A$. $H$ is the shannon entropy function, $H(\psi_A)$ is the entanglement entropy function.
         \item The red arrow is the observable to which concentration is applied.
-        \item The projective description is natural because global phase does not change the physical state.
     \end{itemize}
 \end{frame}
 
@@ -536,11 +581,40 @@
     Levy concentration plus these two estimates produces the exponential entropy tail bound.
 \end{frame}
 
-\section{Geometry of State Space}
+
+\begin{frame}{Generic Entanglement Theorem}
+    \begin{block}{Hayden--Leung--Winter}
+        Let $\psi\in \mathcal{P}(A\otimes B)$ be a random pure state on $A\otimes B$ and define
+        $$
+        \beta=\frac{1}{\ln(2)}\frac{d_A}{d_B}.
+        $$
+        For $d_B\geq d_A\geq 3$, with $\alpha\geq 0$ by our choice,
+        $$
+        \operatorname{Pr}[H(\psi_A)<\log_2(d_A)-\alpha-\beta]
+        \leq
+        \exp\left(
+        -\frac{1}{8\pi^2\ln(2)}
+        \frac{(d_Ad_B-1)\alpha^2}{(\log_2 d_A)^2}
+        \right).
+        $$
+    \end{block}
+
+    As $d_B\to \infty$, with overwhelming probability $1-\exp\left(
+        -\frac{1}{8\pi^2\ln(2)}
+        \frac{(d_Ad_B-1)\alpha^2}{(\log_2 d_A)^2}
+        \right)=1-\Theta(e^{-c d_B})$, a random pure state is almost maximally entangled $\log_2(d_A)-\frac{1}{2\ln(2)}\frac{d_A}{d_B}=\log_2(d_A)-\Theta(\frac{1}{d_B})$.
+\end{frame}
+
+\begin{frame}{A natual question from the observables}
+    
+    \textbf{What does the hayden--leung--winter theorem generalize the behavior of the lipschitz function $S^n\to \mathbb{R}$ and the lipschitz function $\mathbb{C}P^n\to \mathbb{R}$ as $n\to \infty$?}
+\end{frame}
+
+\section{Metric-Measure space after Gromov}
 
 \begin{frame}{Observable diameter: the inner definition}
     \begin{block}{Partial diameter on $\mathbb{R}$}
-        Let $\nu$ be a Borel probability measure on $\mathbb{R}$ and let $\alpha \in (0,1]$.
+        Let $\nu$ (nu) be a Borel probability measure on $\mathbb{R}$ and let $\alpha \in (0,1]$.
         The \textbf{partial diameter} of $\nu$ at mass level $\alpha$ is
         $$
         \diameter(\nu;\alpha):=
@@ -586,9 +660,8 @@
 \end{frame}
 
 \begin{frame}{A Geometric Consequence}
-    In this thesis, entropy functions are used as concrete observables to estimate observable diameter, and the Hopf fibration helps transfer information between $S^{2n+1}$ and $\mathbb{C}P^n$.
     \vspace{0.4em}
-    \begin{block}{Projective-space estimate}
+    \begin{block}{Projective-space estimate from Gromov}
         For $0<\kappa<1$,
         $$
         \obdiam(\mathbb{C}P^n(1);-\kappa)\leq O(\sqrt{n}).
@@ -600,6 +673,30 @@
         \item Then use the Hopf map $S^{2n+1}(1)\to \mathbb{C}P^n$.
         \item This gives a geometric explanation for why many projective-space observables concentrate.
     \end{itemize}
+
+
+    Hayden's work suggests that if the entropy function is a ``good'' proxy for the observable diameter, the difference of \textbf{the order of the growth} between $\obdiam(\mathbb{C}P^n(1);-\kappa)$ and $\obdiam(S^{2n+1}(1);-\kappa)$ should be smaller than $O(\sqrt{n})$.
+\end{frame}
+
+\begin{frame}{A conjecture}
+    \begin{block}{Wu's conjecture}
+
+        For $0<\kappa<1$,
+        $$
+        \obdiam(\mathbb{C}P^n(1);-\kappa)= O(\sqrt{n}).
+        $$
+    
+    \end{block}
+
+    Additional works need to be done to verify this conjecture.
+\begin{itemize}
+    \item Entropy function is not globally lipschitz, so we need to bound the deficit of the entropy function.
+
+    \item Normalize by the Lipschitz constant of $f$ to obtain a weak lower bound for the observable diameter with the algebraic varieties on $\mathbb{C}P^n(1)$.
+
+    \item Continue to study and interpret the overall concentration mechanism geometrically through the positive Ricci curvature of the Fubini--Study metric and Lévy--Gromov type inequalities.
+\end{itemize}
+    
 \end{frame}
 
 \section{Numerical Section}
@@ -612,8 +709,7 @@
         \item Compare concentration across:
         \begin{itemize}
             \item real spheres,
-            \item complex projective spaces,
-            \item symmetric states via Majorana stellar representation.
+            \item complex projective spaces
         \end{itemize}
     \end{itemize}
 \end{frame}
@@ -639,28 +735,65 @@
     As dimension increases, the entropy distribution concentrates near the maximal value.
 \end{frame}
 
+\begin{frame}{Results for concentration of random states in lower dimensional spaces}
+    \begin{columns}[T]
+        \column{0.5\textwidth}
+        \begin{figure}
+            \includegraphics[width=\textwidth]{../results/exp-20260311-154003/hist_sphere_16.png}
+        \end{figure}
+        \centering
+        Entropy distribution for $S^{15}$
+
+        \column{0.5\textwidth}
+        \begin{figure}
+            \includegraphics[width=\textwidth]{../results/exp-20260311-154003/hist_cp_16x16.png}
+        \end{figure}
+        \centering
+        Entropy distribution for $\mathbb{C}P^{16}\otimes\mathbb{C}P^{16}$
+    \end{columns}
+\end{frame}
+
+
+\begin{frame}{Results for concentration of random states in higher dimensional spaces}
+    \begin{columns}[T]
+        \column{0.5\textwidth}
+        \begin{figure}
+            \includegraphics[width=\textwidth]{../results/exp-20260311-154003/hist_sphere_256.png}
+        \end{figure}
+        \centering
+        Entropy distribution for $S^{255}$
+
+        \column{0.5\textwidth}
+        \begin{figure}
+            \includegraphics[width=\textwidth]{../results/exp-20260311-154003/hist_cp_256x256.png}
+        \end{figure}
+        \centering
+        Entropy distribution for $\mathbb{C}P^{256}\otimes\mathbb{C}P^{256}$
+    \end{columns}
+\end{frame}
+
 \section{Conclusion}
 
 \begin{frame}{Conclusion and Outlook}
     \begin{itemize}
-        \item Concentration of measure explains generic high entanglement in large bipartite systems.
         \item Complex projective space provides the natural geometric setting for pure quantum states.
+        \item Concentration of measure explains generic high entanglement in large bipartite systems.
         \item Observable diameter gives a way to phrase concentration geometrically.
         \item Ongoing directions:
         \begin{itemize}
-            \item sharper estimates for $\mathbb{C}P^n$,
-            \item deeper use of Fubini--Study geometry,
-            \item Majorana stellar representation for symmetric states.
+            \item sharper estimates for $\mathbb{C}P^n$
+            \item deeper use of Fubini--Study geometry
+            \item recursive learning on new theorems and mathematical tools
         \end{itemize}
     \end{itemize}
 \end{frame}
 
-\section{References}
-\begin{frame}[allowframebreaks]{References}
-    \nocite{*}
-    \bibliographystyle{apalike}
-    \bibliography{references}
-\end{frame}
+% \section{References}
+% \begin{frame}[allowframebreaks]{References}
+%     \nocite{*}
+%     \bibliographystyle{apalike}
+%     \bibliography{references}
+% \end{frame}
 
 \begin{frame}
 \begin{center}