From 0b10672bc86d7d4bfae56770ae940d4646fe574e Mon Sep 17 00:00:00 2001
From: Trance-0 <60459821+Trance-0@users.noreply.github.com>
Date: Tue, 14 Oct 2025 12:44:28 -0500
Subject: [PATCH] updates
---
content/CSE5313/CSE5313_L13.md | 333 +++++++++++++++++++++++++++++++++
content/CSE5313/_meta.js | 1 +
2 files changed, 334 insertions(+)
create mode 100644 content/CSE5313/CSE5313_L13.md
diff --git a/content/CSE5313/CSE5313_L13.md b/content/CSE5313/CSE5313_L13.md
new file mode 100644
index 0000000..3e452ba
--- /dev/null
+++ b/content/CSE5313/CSE5313_L13.md
@@ -0,0 +1,333 @@
+# CSE5313 Coding and information theory for data science (Lecture 13)
+
+## Recap from last lecture
+
+Local recoverable codes:
+
+An $[n, k]_q$ code is called $r$-locally recoverable if
+
+- every codeword symbol $y_j$ has a recovering set $R_j \subseteq [n] \setminus j$ ($[n]=\{1,2,\ldots,n\}$),
+- such that $y_j$ is computable from $y_i$ for all $i \in R_j$.
+- $|R_j| \leq r$ for every $j \in n$.
+
+### Bounds for locally recoverable codes
+
+**Bound 1**: $\frac{k}{n}\leq \frac{r}{r+1}$.
+
+**Bound 2**: $d\leq n-k-\lceil\frac{k}{r}\rceil +2$. ($r=k$ gives singleton bound)
+
+#### Turan's Lemma
+
+Let $G$ be a graph with $n$ vertices. Then there exists an induced directed acyclic subgraph (DAG) of $G$ on at least $\frac{n}{1+\operatorname{avg}_i(d^{out}_i)}$ nodes, where $d^{out}_i$ is the out-degree of vertex $i$.
+
+Proof using probabilistic method.
+
+
+
+Proof via the probabilistic method
+
+> Useful for showing the existence of a large acyclic subgraph, but not for finding it.
+
+> [!TIP]
+>
+> Show that $\mathbb{E}[X]\geq something$, and therefore there exists $U_\pi$ with $|U_\pi|\geq something$, using pigeonhole principle.
+
+For a permutation $\pi$ of $[n]$, define $U_\pi = \{\pi(i): i \in [n]\}$.
+
+Let $i\in U_\pi$ if each of the $d_i^{out}$ outgoing edges from $i$ connect to a node $j$ with $\pi(j)>\pi(i)$.
+
+In other words, we select a subset of nodes $U_\pi$ such that each node in $U_\pi$ has an outgoing edge to a node in $U_\pi$ with a larger index. All edges going to right.
+
+This graph is clearly acyclic.
+
+Choose $\pi$ at random and Let $X=|U_\pi|$ be a random variable.
+
+Let $X_i$ be the indicator random variable for $i\in U_\pi$.
+
+So $X=\sum_{i=1}^{n} X_i$.
+
+Using linearity of expectation, we have
+
+$$
+E[X]=\sum_{i=1}^{n} E[X_i]
+$$
+
+$E[X_i]$ is the probability that $\pi$ places $i$ before any of its out-neighbors.
+
+For each node, there are $(d_i^{out}+1)!$ ways to place the node and its out-neighbors.
+
+For each node, there are $d_i^{out}!$ ways to place the out-neighbors.
+
+So, $E[X_i]=\frac{d_i^{out}!}{(d_i^{out}+1)!}=\frac{1}{d_i^{out}+1}$.
+
+Since $X=\sum_{i=1}^{n} X_i$, we have
+
+> [!TIP]
+>
+> Recall Arithmetic mean ($\frac{1}{n}\sum_{i=1}^{n} x_i$) is greater than or equal to harmonic mean ($\frac{n}{\sum_{i=1}^{n} \frac{1}{x_i}}$).
+
+$$
+\begin{aligned}
+E[X]&=\sum_{i=1}^{n} E[X_i]\\
+&=\sum_{i=1}^{n} \frac{1}{d_i^{out}+1}\\
+&=\frac{1}{n}\sum_{i=1}^{n} \frac{1}{d_i^{out}+1}\\
+&\geq \frac{n}{\frac{1}{n}(1+\sum_{i=1}^{n} d_i^{out})}\\
+&=\frac{n}{1+\operatorname{avg}_i(d_i^{out})}\\
+\end{aligned}
+$$
+
+
+
+## Locally recoverable codes
+
+### Bound 1
+
+$$
+\frac{k}{n}\leq \frac{r}{r+1}
+$$
+
+
+
+Proof via Turan's Lemma
+
+Let $G$ be a directed graph such that $i\to j$ is an edge if $j\in R_i$. ($j$ required to repair $i$)
+
+$d_i^{out}\leq r$ for every $i$, so $\operatorname{avg}_i(d_i^{out})\leq r$.
+
+By Turan's Lemma, there exists an induced directed acyclic subgraph (DAG) $G_U$ on $G$ with $U$ nodes such that $|U|\geq \frac{n}{1+\operatorname{avg}_i(d_i^{out})}\geq \frac{n}{1+r}$.
+
+Since $G_U$ is a DAG, it is acyclic. So there exists a vertex $u_1\in U$ with no outgoing edges **inside** $G_U$. (leaf node in $G_U$)
+
+Note that if we remove $u_1$ from $G_U$, the remaining graph is still a DAG.
+
+Let $G_{U_1}$ be the induced graph on $U_1=U\setminus \{u_1\}$.
+
+We can find a vertex $u_2\in U_1$ with no outgoing edges **inside** $G_{U_1}$.
+
+We can continue this process until we have all the vertices in $U$ removed.
+
+So all symbols in $U$ are redundant.
+
+Note the number of redundant symbols $=n-k\geq \frac{n}{1+r}$.
+
+So $k\leq \frac{rn}{r+1}$.
+
+So $\frac{k}{n}\leq \frac{r}{r+1}$.
+
+
+
+### Bound 2
+
+$$
+d\leq n-k-\lceil\frac{k}{r}\rceil +2
+$$
+
+#### Definition for code reduction
+
+For $C=[n,k]_q$ code, let $I\subseteq [n]$ be a set of indices, e.g., $I=\{1,3,5,6,7,8\}$.
+
+Let $C_I$ be the code obtained by deleting the symbols that is not in $I$. (only keep the symbols with indices in $I$)
+
+Then $C_I$ is a code of length $|I|$, $C_I$ is generated by $G_I\in \mathbb{F}^{k\times |I|}_q$ (only keep the rows of $G$ with indices in $I$).
+
+$|C_I|=q^{\operatorname{rank}(G_I)}$.
+
+Note $G_I$ not necessarily of rank $k$.
+
+#### Reduction lemma
+
+Let $\mathcal{C}$ be an $[n, k]_q$ code. Then $d=n-\max\{|I|:C_I
+
+Proof for reduction lemma
+
+We proceed from two inequalities.
+
+First $d\leq n-\max\{|I|:C_I
+
+We prove bound 2 using the same subgraph from proof of bound 1.
+
+
+
+Proof for bound 2
+
+Let $G$ be a directed graph such that $i\to j$ is an edge if $j\in R_i$. ($j$ required to repair $i$)
+
+$d_i^{out}\leq r$ for every $i$, so $\operatorname{avg}_i(d_i^{out})\leq r$.
+
+By Turan's Lemma, there exists an induced directed acyclic subgraph (DAG) $G_U$ on $G$ with $U$ nodes such that $|U|\geq \frac{n}{1+\operatorname{avg}_i(d_i^{out})}\geq \frac{n}{1+r}$.
+
+Let $U'\subset U$ with size $\lfloor\frac{k-1}{r}\rfloor$.
+
+$U'$ exists since $\frac{n}{r+1}>\frac{k}{r}$ by bound 1, and $\frac{k}{r}>\lfloor\frac{k-1}{r}\rfloor$.
+
+So $G_{U'}$ is also acyclic.
+
+Let $N\subseteq [n]\setminus U'$ be the set of neighbors of $U'$ in $G_U$.
+
+$|N|\leq r|U'|\leq k-1$.
+
+Complete $n$ to be of the size $k-1$ by adding elements not in $U'$.
+
+$|C_N|\leq q^{k-1}$
+
+Also $|N\cup U'|=k-1+\lfloor\frac{k-1}{r}\rfloor$
+
+Note that all nodes in $G_U$ can be recovered from nodes in $N$.
+
+So $|C_{N\cup U'}|=|C_N|\leq q^{k-1}$.
+
+Therefore, $\max\{|I|:C_I
+
+### Construction of locally recoverable codes
+
+Recall Reed-Solomon code:
+
+$\{f(a_1),f(a_2),\ldots,f(a_n)|f(x)\in \mathbb{F}_q^{k-1}[x]\}$.
+
+- $\dim=k$
+- $d=n-k+1$
+- Need $n\geq q$.
+- To encode $a=a_1,a_2,\ldots,a_n$, we need to evaluate $f_a(x)=\sum_{i=0}^{k-1}f_a(a_i)x^i$.
+
+Assume $r+1|n$ and $r|k$.
+
+Choose $A=\{\alpha_1,\alpha_2,\ldots,\alpha_n\}$
+
+Partition $A$ into $\frac{n}{r+1}$ subsets of size $r+1$.
+
+#### Definition for good polynomial
+
+A polynomial $g$ over $\mathbb{F}_q$ is called good if
+
+- $\deg(g)=r+1$
+- $g$ is constant for every $A_i$., i.e., $\forall x,y\in A_i, g(x)=g(y)$.
+
+To encode $a=(a_0,a_1,\ldots,a_n)$, we consider $a\in \mathbb{F}_q^{r\times (k/r)}$
+
+Let $f_a(x)=\sum_{i=0}^{r-1} f_{a,i}(x)\cdot x^i$
+
+$f_{a,i}(x)=\sum_{j=0}^{k/r-1} a_{i,j}\cdot g(x)^j$, where $g$ is a good polynomial.
+
+
+Proof for valid codeword
+
+In other words,
+
+$$
+\mathcal{C}=\{(f_a(\alpha_i))_{i=1}^{n}|a\in \mathbb{F}_q^k\}
+$$
+
+For data $a\in \mathbb{F}_q^k$, the $i$th server stores $f_a(\alpha_i)$.
+
+Note $\mathcal{C}$ is a linear code. and $\dim \mathcal{C}=k$.
+
+Let $\alpha\in \{\alpha_1,\alpha_2,\ldots,\alpha_n\}$.
+
+Suppose $\alpha\in A_1$.
+
+Locality is ensured by recovering failed server $\alpha$ by $f_a(\alpha_i)$ from $\{f_a(\beta):\beta\in A_1\setminus \{\alpha\}\}$.
+
+Note $g$ is constant on $A_1$.
+
+Denote the constant by $f_i(A_1)$.
+
+Let $\delta_1(x)=\sum_{i=0}^{r-1} f_{a,i}(A_1)x^i$.
+
+$\delta_1(x)=f_a(x)$ for all $x\in A_1$.
+
+#### Constructing good polynomial
+
+Let $(\mathbb{F}_q^*,\cdot)$ be the multiplicative group of $\mathbb{F}_q$.
+
+For a subgroup $H$ of this group, a multiplicative coset is $Ha=\{ha|h\in H,a\in \mathbb{F}_q^*\}$.
+
+The family of all cosets of $H:\{Ha|a\in \mathbb{F}_q^*\}$.
+
+This is a partition of $\mathbb{F}_q^*$.
+
+#### Definition of annihilator
+
+The annihilator of $H$ is $g(x)=\prod_{h\in H}(x-h)$.
+
+Note that $g(h)=0$ for all $h\in H$.
+
+#### Annihilator is a good polynomial
+
+Fix $H$, consider the partition $\{Ha|a\in \mathbb{F}_q^*\}$ and let $g(x)=\prod_{a\in \mathbb{F}_q^*}(x-a)$. Then $g$ is a good polynomial.
+
+
+Proof
+
+Need to show if $x,y\in Ha$, then $g(x)=g(y)$.
+
+Since $x\in Ha$, there exists $h',h''\in H$ such that $x=h'a$ and $y=h''a$.
+
+So $g(x)=g(h'a)=\prod_{h\in H}(h'a-h)=(h')^{|H|}\prod_{h\in H}(a-h/h')$.
+
+By Fermat's Little Theorem, $(h')^{|H|}\equiv 1$ for every $h'\in H$.
+
+So $\{h/h'|h\in H\}=H$ for every $h'\in H$.
+
+So $g(x)=(h')^{|H|}\prod_{h\in H}(a-h/h')=\prod_{h\in H}(a-h/h')=g(a)$.
+
+Similarly, $g(y)=g(h''a)=\prod_{h\in H}(h''a-h)=(h'')^{|H|}\prod_{h\in H}(a-h/h'')=g(a)$.
+
+
diff --git a/content/CSE5313/_meta.js b/content/CSE5313/_meta.js
index 8388c92..c7a5b5b 100644
--- a/content/CSE5313/_meta.js
+++ b/content/CSE5313/_meta.js
@@ -15,4 +15,5 @@ export default {
CSE5313_L10: "CSE5313 Coding and information theory for data science (Recitation 10)",
CSE5313_L11: "CSE5313 Coding and information theory for data science (Recitation 11)",
CSE5313_L12: "CSE5313 Coding and information theory for data science (Lecture 12)",
+ CSE5313_L13: "CSE5313 Coding and information theory for data science (Lecture 13)",
}
\ No newline at end of file