diff --git a/chapters/chap0.pdf b/chapters/chap0.pdf
index ff94b6e..e34671f 100644
Binary files a/chapters/chap0.pdf and b/chapters/chap0.pdf differ
diff --git a/chapters/chap0.tex b/chapters/chap0.tex
index 3d4cd07..f948e87 100644
--- a/chapters/chap0.tex
+++ b/chapters/chap0.tex
@@ -175,6 +175,94 @@ Note that $\sum_{i=1}^n \sum_{j=1}^m a_{ij} \phi_i(v)\psi_j(w)$ is a bilinear fu
 
 This enables basis-free construction of vector spaces with proper multiplication and scalar multiplication.
 
+\begin{examples}[Examples of tensor product for vectors]
+
+    Let $V = \mathbb{C}^2, W = \mathbb{C}^3$, choose bases $\{\ket{0}, \ket{1}\} \subset V, \{\ket{0}, \ket{1}, \ket{2}\} \subset W$.
+
+    $$
+    v=\begin{pmatrix}
+            v_1 \\
+            v_2
+        \end{pmatrix}=v_1\ket{0}+v_2\ket{1}\in V,w=\begin{pmatrix}
+            w_1 \\
+            w_2 \\
+            w_3
+        \end{pmatrix}=w_1\ket{0}+w_2\ket{1}+w_3\ket{2}\in W
+    $$.
+
+    Then the tensor product $v\otimes w$ is given by
+
+    $$
+        v\otimes w=\begin{pmatrix}
+            v_1 w_1 &v_1 w_2 &v_1 w_3 \\
+            v_2 w_1 &v_2 w_2 &v_2 w_3
+        \end{pmatrix}\in \mathbb{C}^6
+    $$
+\end{examples}
+
+\begin{examples}[Examples of tensor product for vector spaces]
+
+Let $V = \mathbb{C}^2, W = \mathbb{C}^3$, choose bases $\{\ket{0}, \ket{1}\} \subset V, \{\ket{0}, \ket{1}, \ket{2}\} \subset W.$
+
+Then a basis of the tensor product is
+$$
+\{
+\ket{00}, \ket{01}, \ket{02},
+\ket{10}, \ket{11}, \ket{12}
+\},
+$$
+where $\ket{ij} := \ket{i}\otimes\ket{j}$.
+
+An example element of $V \otimes W$ is
+$$
+\ket{\psi}
+=
+2\,\ket{0}\otimes\ket{1}
++
+(1+i)\,\ket{1}\otimes\ket{0}
+-
+i\,\ket{1}\otimes\ket{2}.
+$$
+
+With respect to the ordered basis
+$$
+(\ket{00}, \ket{01}, \ket{02}, \ket{10}, \ket{11}, \ket{12}),
+$$
+this tensor corresponds to the coordinate vector
+$$
+\ket{\psi}
+\;\longleftrightarrow\;
+\begin{pmatrix}
+0\\
+2\\
+0\\
+1+i\\
+0\\
+-i
+\end{pmatrix}
+\in \mathbb{C}^6.
+$$
+
+Using the canonical identification
+$$
+\mathbb{C}^2 \otimes \mathbb{C}^3 \cong \mathbb{C}^{2\times 3},
+$$
+where
+$$
+\ket{i}\otimes\ket{j} \longmapsto E_{ij},
+$$
+the same tensor is represented by the matrix
+$$
+\ket{\psi}
+\;\longleftrightarrow\;
+\begin{pmatrix}
+0 & 2 & 0\\
+1+i & 0 & -i
+\end{pmatrix}.
+$$
+
+\end{examples}
+
 \begin{defn}
     \label{defn:inner_product_on_tensor_product}
 
@@ -186,6 +274,75 @@ This enables basis-free construction of vector spaces with proper multiplication
 \end{defn}
 
 In practice, we ignore the subscript of the vector space and just write $\langle v\otimes w, u\otimes x\rangle=\langle v,u\rangle\langle w,x\rangle$.
+Partial trace
+
+\begin{defn}
+
+\label{defn:trace}
+
+Let $T$ be a linear operator on $\mathscr{H}$, $(e_1,e_2,\cdots,e_n)$ be a basis of $\mathscr{H}$ and $(\epsilon_1,\epsilon_2,\cdots,\epsilon_n)$ be a basis of dual space $\mathscr{H}^*$. Then the trace of $T$ is defined by
+
+$$
+\operatorname{Tr}(T)=\sum_{i=1}^n \epsilon_i(T(e_i))=\sum_{i=1}^n \langle e_i,T(e_i)\rangle
+$$
+
+\end{defn}
+
+This is equivalent to the sum of the diagonal elements of $T$.
+
+\begin{defn}
+    \label{defn:partial_trace}
+
+Let $T$ be a linear operator on $\mathscr{H}=\mathscr{A}\otimes \mathscr{B}$, where $\mathscr{A}$ and $\mathscr{B}$ are finite-dimensional Hilbert spaces.
+
+An operator $T$ on $\mathscr{H}=\mathscr{A}\otimes \mathscr{B}$ can be written as 
+
+$$
+T=\sum_{i=1}^n a_i A_i\otimes B_i
+$$
+
+where $A_i$ is a linear operator on $\mathscr{A}$ and $B_i$ is a linear operator on $\mathscr{B}$.
+
+The $\mathscr{B}$-partial trace of $T$ ($\operatorname{Tr}_{\mathscr{B}}(T):\mathcal{L}(\mathscr{A}\otimes \mathscr{B})\to \mathcal{L}(\mathscr{A})$) is the linear operator on $\mathscr{A}$ defined by
+
+$$
+\operatorname{Tr}_{\mathscr{B}}(T)=\sum_{i=1}^n a_i \operatorname{Tr}(B_i) A_i
+$$
+
+
+\end{defn}
+Or we can define the map $L_v: \mathscr{A}\to \mathscr{A}\otimes \mathscr{B}$ by
+
+$$
+L_v(u)=u\otimes v
+$$
+
+Note that $\langle u,L_v^*(u')\otimes v'\rangle=\langle u,u'\rangle \langle v,v'\rangle=\langle u\otimes v,u'\otimes v'\rangle=\langle L_v(u),u'\otimes v'\rangle$.
+
+Therefore, $L_v^*\sum_{j} u_j\otimes v_j=\sum_{j} \langle v,v_j\rangle u_j$.
+
+Then the partial trace of $T$ can also be defined by
+
+Let $\{v_j\}$ be a set of orthonormal basis of $\mathscr{B}$.
+
+$$
+\operatorname{Tr}_{\mathscr{B}}(T)=\sum_{j} L^*_{v_j}(T)L_{v_j}
+$$
+
+
+\begin{defn}
+    \label{defn:partial_trace_with_respect_to_state}
+Let $T$ be a linear operator on $\mathscr{H}=\mathscr{A}\otimes \mathscr{B}$, where $\mathscr{A}$ and $\mathscr{B}$ are finite-dimensional Hilbert spaces.
+
+Let $\rho$ be a state on $\mathscr{B}$ consisting of orthonormal basis $\{v_j\}$ and eigenvalue $\{\lambda_j\}$.
+
+The partial trace of $T$ with respect to $\rho$ is the linear operator on $\mathscr{A}$ defined by
+
+$$
+\operatorname{Tr}_{\mathscr{A}}(T)=\sum_{j} \lambda_j L^*_{v_j}(T)L_{v_j}
+$$
+\end{defn}
+
 
 This introduces a new model in mathematics explaining quantum mechanics: the non-commutative probability theory.
 
@@ -333,21 +490,25 @@ Recall from classical probability theory, we call the initial probability distri
     \label{defn:state}
     Non-commutative probability state:
 
-    A state on $(\mathscr{B}(\mathscr{H}),\mathscr{P})$ is a map $\rho:\mathscr{P}\to[0,1]$, (commonly named as density operator) such that:
+    Given a non-commutative probability space $(\mathscr{B}(\mathscr{H}),\mathscr{P})$,
+
+    A state is a unit vector $\bra{\psi}$ in the Hilbert space $\mathscr{H}$, such that $\bra{\psi}\ket{\psi}=1$. 
+    
+    Every state uniquely defines a map $\rho:\mathscr{P}\to[0,1]$, $\rho(P)=\bra{\psi}P\ket{\psi}$ (commonly named as density operator) such that:
     \begin{itemize}
         \item $\rho(O)=0$, where $O$ is the zero projection, and $\rho(I)=1$, where $I$ is the identity projection.
         \item If $P_1,P_2,\ldots,P_n$ are pairwise disjoint orthogonal projections, then $\rho(P_1 + P_2 + \cdots + P_n) = \sum_{i=1}^n \rho(P_i)$.
     \end{itemize}
 \end{defn}
 
-An example of a density operator can be given as follows:
+Note that the pure states are the density operators that can be represented by a unit vector $\bra{\psi}$ in the Hilbert space $\mathscr{H}$, whereas mixed states are the density operators that cannot be represented by a unit vector in the Hilbert space $\mathscr{H}$.
 
 If $(|\psi_1\rangle,|\psi_2\rangle,\cdots,|\psi_n\rangle)$ is an orthonormal basis of $\mathscr{H}$ consisting of eigenvectors of $\rho$, for the eigenvalues $p_1,p_2,\cdots,p_n$, then $p_j\geq 0$ and $\sum_{j=1}^n p_j=1$.
 
 We can write $\rho$ as
-\[
+$$
     \rho=\sum_{j=1}^n p_j|\psi_j\rangle\langle\psi_j|
-\]
+$$
 (Under basis $|\psi_j\rangle$, it is a diagonal matrix with $p_j$ on the diagonal.)
 
 % Then we need to introduce a theorem that ensures that every state on the space of all orthogonal projections on $\mathscr{H}$ can be represented by a density operator.
@@ -423,26 +584,6 @@ When operators commute, we recover classical probability measures.
 
 \end{defn}
 
-\begin{prop}
-    \label{prop:indistinguishability}
-    Proposition of indistinguishability:
-
-    Suppose that we have two systems $u_1,u_2\in \mathscr{H}_1$, the two states are distinguishable if and only if they are orthogonal.
-\end{prop}
-
-\begin{proof}
-    Ways to distinguish the two states:
-    \begin{enumerate}
-        \item Set $X=\{0,1,2\}$ and $M_i=|u_i\rangle\langle u_i|$, $M_0=I-M_1-M_2$
-        \item Then $\{M_0,M_1,M_2\}$ is a complete collection of measurement operators on $\mathscr{H}$.
-        \item Suppose the prepared state is $u_1$, then $p(1)=\|M_1u_1\|^2=\|u_1\|^2=1$, $p(2)=\|M_2u_1\|^2=0$, $p(0)=\|M_0u_1\|^2=0$.
-    \end{enumerate}
-
-    If they are not orthogonal, then there is no choice of measurement operators to perfectly distinguish the two states.
-
-\end{proof}
-
-Intuitively, if the two states are not orthogonal, then for any measurement (projection) there exists non-zero probability of getting the same outcome for both states.
 
 Here is Table~\ref{tab:analog_of_classical_probability_theory_and_non_commutative_probability_theory} summarizing the analog of classical probability theory and non-commutative (\textit{quantum}) probability theory~\cite{Feres}:
 
@@ -495,6 +636,88 @@ Here is Table~\ref{tab:analog_of_classical_probability_theory_and_non_commutativ
     \vspace{0.5cm}
 \end{table}
 
+\subsection{Quantum physics and terminologies}
+
+In this section, we will introduce some terminologies and theorems used in quantum physics that are relevant to our study. Assuming no prior knowledge of quantum physics, we will provide brief definitions and explanations for each term.
+
+One might ask, what is the fundamental difference between a quantum system and a classical system, and why can we not directly apply those theorems in classical computers to a quantum computer? It turns out that quantum error-correcting codes are hard due to the following definitions and features for quantum computing.
+
+\begin{defn}
+ All quantum operations can be constructed by composing four kinds of transformations: (adapted from Chapter 10 of \cite{Bengtsson_Zyczkowski_2017})
+
+  \begin{enumerate}
+    \item Unitary operations. $U(\cdot)$ for any quantum state. It is possible to apply a non-unitary operation for an open quantum system, but that is usually not the focus for quantum computing and usually leads to non-recoverable loss of information that we wish to obtain.
+    \item Extend the system. Given a quantum state $\rho\in\mathcal{H}^N$, we can extend it to a larger quantum system by "entangle" (For this report, you don't need to worry for how quantum entanglement works) it with some new states $\sigma\in \mathcal{H}^K$ (The space where the new state dwells is usually called ancilla system) and get $\rho'=\rho\otimes\sigma\in \mathcal{H}^N\otimes \mathcal{K}$.
+    \item Partial trace. Given a quantum state $\rho\in\mathcal{H}^N$ and some reference state $\sigma\in\mathcal {H}^K$, we can trace out some subsystems and get a new state $\rho'\in\mathcal{H}^{N-K}$.
+    \item Selective measurement. Given a quantum state, we measure it and get a classical bit; unlike the classical case, the measurement is a probabilistic operation. (More specifically, this is some projection to a reference state corresponding to a classical bit output. For this report, you don't need to worry about how such a result is obtained and how the reference state is constructed.)
+  \end{enumerate}
+\end{defn}
+
+
+$U(n)$ is the group of all $n\times n$ \textbf{unitary matrices} over $\mathbb{C}$, 
+
+$$
+U(n)=\{A\in \mathbb{C}^{n\times n}: A^*A=AA^*=I_n\}
+$$
+
+The uniqueness of such measurement came from the lemma below~\cite{Elizabeth_book}
+
+\begin{lemma}
+    \label{lemma:haar_measure}
+
+    Let $(U(n), \| \cdot \|, \mu)$ be a metric measure space where $\| \cdot \|$ is the Hilbert-Schmidt norm and $\mu$ is the measure function.
+
+    The Haar measure on $U(n)$ is the unique probability measure that is invariant under the action of $U(n)$ on itself.
+    
+    That is, fixing $B\in U(n)$, $\forall A\in U(n)$, $\mu(A\cdot B)=\mu(B\cdot A)=\mu(B)$.
+    
+    The Haar measure is the unique probability measure that is invariant under the action of $U(n)$ on itself.
+\end{lemma}
+
+
+\begin{defn}
+    \label{defn:pure_state}
+    Pure state:
+
+A random pure state $\varphi$ is any random variable distributed according to the unitarily invariant probability measure on the pure states $\mathcal{P}(A)$ of the system $A$, denoted by $\varphi\in_R\mathcal{P}(A)$.
+\end{defn}
+
+It is trivial that for the space of pure state, we can easily apply the Haar measure as the unitarily invariant probability measure since the space of pure state is $S^n$ for some $n$. However, for the case of mixed states, that is a bit complicated and we need to use partial tracing to defined the rank-$s$ random states.
+
+\begin{defn}
+    \label{defn:rank_s_random_state}
+    Rank-$s$ random state.
+
+    For a system $A$ and an integer $s\geq 1$, consider the distribution onn the mixed states $\mathcal{S}(A)$ of A induced by the partial trace over the second factor form the uniform distribution on pure states of $A\otimes\mathbb{C}^s$. Any random variable $\rho$ distributed as such will be called a rank-$s$ random states; denoted as $\rho\in_R \mathcal{S}_s(A)$. And $\mathcal{P}(A)=\mathcal{S}_1(A)$.
+\end{defn}
+
+
+\begin{prop}
+    \label{prop:indistinguishability}
+    Proposition of indistinguishability:
+
+    Suppose that we have two systems $u_1,u_2\in \mathscr{H}_1$, the two states are distinguishable if and only if they are orthogonal.
+\end{prop}
+
+\begin{proof}
+    Ways to distinguish the two states:
+    \begin{enumerate}
+        \item Set $X=\{0,1,2\}$ and $M_i=|u_i\rangle\langle u_i|$, $M_0=I-M_1-M_2$
+        \item Then $\{M_0,M_1,M_2\}$ is a complete collection of measurement operators on $\mathscr{H}$.
+        \item Suppose the prepared state is $u_1$, then $p(1)=\|M_1u_1\|^2=\|u_1\|^2=1$, $p(2)=\|M_2u_1\|^2=0$, $p(0)=\|M_0u_1\|^2=0$.
+    \end{enumerate}
+
+    If they are not orthogonal, then there is no choice of measurement operators to perfectly distinguish the two states.
+
+\end{proof}
+
+Intuitively, if the two states are not orthogonal, then for any measurement (projection) there exists non-zero probability of getting the same outcome for both states.
+
+\subsection{Random quantum states}
+
+First, we need to define what is a random state in a bipartite system.
+
+
 % When compiled standalone, print this chapter's references at the end.
 \ifSubfilesClassLoaded{
     \printbibliography[title={References}]
diff --git a/chapters/chap1.pdf b/chapters/chap1.pdf
index 40113e8..c9f509e 100644
Binary files a/chapters/chap1.pdf and b/chapters/chap1.pdf differ
diff --git a/chapters/chap1.tex b/chapters/chap1.tex
index 4602edc..c6bc7b7 100644
--- a/chapters/chap1.tex
+++ b/chapters/chap1.tex
@@ -468,41 +468,9 @@ It is a theorem connecting the following mathematical structure:
 
 To prove the concentration of measure phenomenon, we need to analyze the following elements involved in figure~\ref{fig:Hayden_concentration_of_measure_phenomenon}:
 
-First, we need to define what is a random state in a bipartite system. In fact, for pure states, there is a unique uniform distribution under Haar measure that is unitarily invariant.
-
-$U(n)$ is the group of all $n\times n$ \textbf{unitary matrices} over $\mathbb{C}$, 
-
-$$
-U(n)=\{A\in \mathbb{C}^{n\times n}: A^*A=AA^*=I_n\}
-$$
-
-The uniqueness of such measurement came from the lemma below~\cite{Elizabeth_book}
-
-\begin{lemma}
-
-    Let $(U(n), \| \cdot \|, \mu)$ be a metric measure space where $\| \cdot \|$ is the Hilbert-Schmidt norm and $\mu$ is the measure function.
-
-    The Haar measure on $U(n)$ is the unique probability measure that is invariant under the action of $U(n)$ on itself.
-    
-    That is, fixing $B\in U(n)$, $\forall A\in U(n)$, $\mu(A\cdot B)=\mu(B\cdot A)=\mu(B)$.
-    
-    The Haar measure is the unique probability measure that is invariant under the action of $U(n)$ on itself.
-
-\end{lemma}
-
     
 The existence and uniqueness of the Haar measure is a theorem in compact lie group theory. For this research topic, we will not prove it.
 
-A random pure state $\varphi$ is any random variable distributed according to the unitarily invariant probability measure on the pure states $\mathcal{P}(A)$ of the system $A$, denoted by $\varphi\in_R\mathcal{P}(A)$.
-
-It is trivial that for the space of pure state, we can easily apply the Haar measure as the unitarily invariant probability measure since the space of pure state is $S^n$ for some $n$. However, for the case of mixed states, that is a bit complicated and we need to use partial tracing to defined the rank-$s$ random states.
-
-\begin{defn}
-    Rank-$s$ random state.
-
-    For a system $A$ and an integer $s\geq 1$, consider the distribution onn the mixed states $\mathcal{S}(A)$ of A induced by the partial trace over the second factor form the uniform distribution on pure states of $A\otimes\mathbb{C}^s$. Any random variable $\rho$ distributed as such will be called a rank-$s$ random states; denoted as $\rho\in_R \mathcal{S}_s(A)$. And $\mathcal{P}(A)=\mathcal{S}_1(A)$.
-\end{defn}
-
 Due to time constrains of the projects, the following lemma is demonstrated but not investigated thoroughly through the research:
 
 
@@ -513,11 +481,11 @@ Due to time constrains of the projects, the following lemma is demonstrated but
 
     Choose a random pure state $\sigma=|\psi\rangle\langle\psi|$ from $A'\otimes A$.
 
-    The expected value of the entropy of entanglement is known and satisfies a concentration inequality known as Page's formula~\cite{Pages_conjecture,Pages_conjecture_simple_proof,Bengtsson_Życzkowski_2017}[15.72]. The detailed proof is not fully explored in this project and is intended to be done in the next semester.
+    The expected value of the entropy of entanglement is known and satisfies a concentration inequality known as Page's formula~\cite{Pages_conjecture,Pages_conjecture_simple_proof,Bengtsson_Zyczkowski_2017}[15.72].
 
-    \[
-    \mathbb{E}[H(\psi_A)] \geq \log_2(d_A)-\frac{1}{2\ln(2)}\frac{d_A}{d_B}
-    \]
+    $$
+    \mathbb{E}[H(\psi_A)]=\frac{1}{\ln(2)}\left(\sum_{j=d_B+1}^{d_Ad_B}\frac{1}{j}-\frac{d_A-1}{2d_B}\right) \geq \log_2(d_A)-\frac{1}{2\ln(2)}\frac{d_A}{d_B}
+    $$
 
 \end{lemma}
 
@@ -530,18 +498,27 @@ It basically provides a lower bound for the expected entropy of entanglement. Ex
 	\label{fig:entropy_vs_dim}
 \end{figure}
 
-Then we have bound for Lipschitz constant $\eta$ of the map $H(\varphi_A)$
+Then we have bound for Lipschitz constant $\eta$ of the map $S(\varphi_A): \mathcal{P}(A\otimes B)\to \R$
 
 \begin{lemma}
     The Lipschitz constant $\eta$ of $S(\varphi_A)$ is upper bounded by $\sqrt{8}\log_2(d_A)$ for $d_A\geq 3$.
 \end{lemma}
 
+\begin{proof}
+    The proof use lagrange multiplier method to find the maximum of the gradient of $S(\varphi_A)$.
+    %
+    TODO: use lagrange multiplier method to find the maximum of the gradient of $S(\varphi_A)$.
+    %
+\end{proof}
+
 From Levy's lemma, we have
 
 If we define $\beta=\frac{1}{\ln(2)}\frac{d_A}{d_B}$, then we have
-\[
+
+$$
 \operatorname{Pr}[H(\psi_A) < \log_2(d_A)-\alpha-\beta] \leq \exp\left(-\frac{1}{8\pi^2\ln(2)}\frac{(d_Ad_B-1)\alpha^2}{(\log_2(d_A))^2}\right)
-\]
+$$
+
 where $d_B\geq d_A\geq 3$~\cite{Hayden_2006}.
 
 Experimentally, we can have the following result:
diff --git a/main.bib b/main.bib
index 2a5d572..dcefac4 100644
--- a/main.bib
+++ b/main.bib
@@ -118,9 +118,9 @@
   publisher = {Princeton University Press}
 }
 
-@book{Bengtsson_Życzkowski_2017,
+@book{Bengtsson_Zyczkowski_2017,
   title     = {Geometry of Quantum States: An Introduction to Quantum Entanglement},
-  author    = {Bengtsson, Ingemar and Życzkowski, Karol},
+  author    = {Bengtsson, Ingemar and Zyczkowski, Karol},
   year      = {2017},
   publisher = {Cambridge University Press}
 }
diff --git a/main.pdf b/main.pdf
index e9e574c..e3d48ed 100644
Binary files a/main.pdf and b/main.pdf differ
diff --git a/main.tex b/main.tex
index 00576e5..76cf03d 100644
--- a/main.tex
+++ b/main.tex
@@ -39,7 +39,7 @@
 \usepackage{xcolor}
 
 % A dedicated "Examples" block (optional convenience wrapper)
-\newtcolorbox{examples}{%
+\newtcolorbox{examples}[1][Example]{%
   enhanced,
   breakable,
   colback=white,
@@ -50,7 +50,7 @@
   arc=1.5mm,
   left=1.2mm,right=1.2mm,top=1.0mm,bottom=1.0mm,
   fonttitle=\bfseries,
-  title=Examples
+  title=#1
 }