From d99e9fcfddf75643b8c3368c174175095f2ff14c Mon Sep 17 00:00:00 2001 From: Zheyuan Wu <60459821+Trance-0@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:31:49 -0600 Subject: [PATCH] update --- pages/CSE559A/CSE559A_L8.md | 87 +++++++++++++++++++++++ pages/CSE559A/_meta.js | 1 + pages/Math4121/Math4121_L11.md | 111 ++++++++++++++++++++++++++++- pages/Math416/Math416_L7.md | 16 +++-- pages/Math416/Math416_L8.md | 125 +++++++++++++++++++++++++++++++++ pages/Math416/_meta.js | 1 + 6 files changed, 333 insertions(+), 8 deletions(-) create mode 100644 pages/CSE559A/CSE559A_L8.md create mode 100644 pages/Math416/Math416_L8.md diff --git a/pages/CSE559A/CSE559A_L8.md b/pages/CSE559A/CSE559A_L8.md new file mode 100644 index 0000000..93b26f7 --- /dev/null +++ b/pages/CSE559A/CSE559A_L8.md @@ -0,0 +1,87 @@ +# Lecture 8 + +Paper review sharing. + +## Recap: Three ways to think about linear classifiers + +Geometric view: Hyperplanes in the feature space + +Algebraic view: Linear functions of the features + +Visual view: One template per class + +## Continue on linear classification models + +Two layer networks as combination of templates. + +Interpretability is lost during the depth increase. + +A two layer network is a **universal approximator** (we can approximate any continuous function to arbitrary accuracy). But the hidden layer may need to be huge. + +[Multi-layer networks demo](https://playground.tensorflow.org) + +### Supervised learning outline + +1. Collect training data +2. Specify model (select hyper-parameters) +3. Train model + +#### Hyper-parameters selection + +- Number of layers, number of units per layer, learning rate, etc. +- Type of non-linearity, regularization, etc. +- Type of loss function, etc. +- SGD settings: batch size, number of epochs, etc. + +#### Hyper-parameter searching + +Use validation set to evaluate the performance of the model. + +Never peek the test set. + +Use the training set to do K-fold cross validation. + +### Backpropagation + +#### Computation graphs + +SGD update for each parameter + +$$ +w_k\gets w_k-\eta\frac{\partial e}{\partial w_k} +$$ + +$e$ is the error function. + +#### Using the chain rule + +Suppose $k=1$, $e=l(f_1(x,w_1),y)$ + +Example: $e=(f_1(x,w_1)-y)^2$ + +So $h_1=f_1(x,w_1)=w^T_1x$, $e=l(h_1,y)=(y-h_1)^2$ + +$$ +\frac{\partial e}{\partial w_1}=\frac{\partial e}{\partial h_1}\frac{\partial h_1}{\partial w_1} +$$ + +$$ +\frac{\partial e}{\partial h_1}=2(h_1-y) +$$ + +$$ +\frac{\partial h_1}{\partial w_1}=x +$$ + +$$ +\frac{\partial e}{\partial w_1}=2(h_1-y)x +$$ + +#### General backpropagation algorithm + + + + + + + diff --git a/pages/CSE559A/_meta.js b/pages/CSE559A/_meta.js index 90cbcf9..ed50190 100644 --- a/pages/CSE559A/_meta.js +++ b/pages/CSE559A/_meta.js @@ -10,4 +10,5 @@ export default { CSE559A_L5: "Computer Vision (Lecture 5)", CSE559A_L6: "Computer Vision (Lecture 6)", CSE559A_L7: "Computer Vision (Lecture 7)", + CSE559A_L8: "Computer Vision (Lecture 8)", } diff --git a/pages/Math4121/Math4121_L11.md b/pages/Math4121/Math4121_L11.md index 9cdf10f..d5ae08f 100644 --- a/pages/Math4121/Math4121_L11.md +++ b/pages/Math4121/Math4121_L11.md @@ -1 +1,110 @@ -# Lecture 11 \ No newline at end of file +# Lecture 11 + +## Recap + +$$ +I(x)=\begin{cases} +0 & x\leq 0 \\ +1 & x>0 +\end{cases} +$$ + +## Continue on Chapter 6 + +### The step function + +#### Theorem 6.16 + +If $\sum c_n$ converges and $\{s_n\}$ is a sequence of distinct elements of $(a,b)$, and $f$ is continuous on $[a,b]$, and $\alpha(x)=\sum_{n=1}^{\infty}c_nI(x-s_n)$, then $\int_a^bf \ d\alpha=\sum_{n=1}^{\infty}c_nf(s_n)$. + +Proof: + +For each $x$, $I(x-s_n)\leq 1$ so $\sum_{n=1}^{\infty}c_nI(x-s_n)\leq \sum_{n=1}^{\infty}c_n$ converges (by comparison test). + +Let $\epsilon>0$. We can find $N$ such that $\sum_{n=N+1}^{\infty}c_n<\epsilon$. (Recall that the series $\sum_{n=1}^{\infty}c_n$ converges if and only if $\lim_{N\to\infty}\sum_{n=1}^{N}c_n$ exists.) + +Set $\alpha_1(x)=\sum_{n=1}^{N}c_nI(x-s_n)$, and $\alpha_2(x)=\sum_{n=N+1}^{\infty}c_nI(x-s_n)$. + +Using the linearity of the integral, we have + +$$ +\int_a^b f\ d\alpha_1= \sum_{n=1}^{N}c_n\int_a^b fd(I(x-s_n))= \sum_{n=1}^{N}c_nf(s_n) +$$ + +On the other hand, with $M=\sup|f|$, + +$$ +\left|\int_a^b f\ d\alpha_2\right|\leq \int_a^b |f|\ d\alpha_2\leq M\int_a^b \alpha_2\ dx=M\sum_{n=N+1}^{\infty}c_n(b-s_n)<\epsilon +$$ + +So, + +$$ +\begin{aligned} +\left|\int_a^b f\ d\alpha-\sum_{n=1}^{\infty}c_nf(s_n)\right|&= \left|\int_a^b f\ d\alpha_2-\sum_{n=N+1}^{\infty}c_nf(s_n)\right|\\ +&\leq |M\epsilon-\sum_{n=N+1}^{\infty}|c_n|M(b-s_n)|\\ +&<2M\epsilon +\end{aligned} +$$ + +Since $\epsilon$ is arbitrary, we have $\int_a^b f\ d\alpha=\sum_{n=1}^{\infty}c_nf(s_n)$. + +### Integration and differentiation + +#### Theorem 6.20 Fundamental theorem of calculus + +Let $f\in \mathscr{R}$ for $x\in [a,b]$. We define $F(x)=\int_a^x f(t)\ dt$. Then $F$ is continuous and if $f$ is continuous at $x_0\in [a,b]$, then $F$ is differentiable at $x_0$ and $F'(x_0)=f(x_0)$. + +Proof: + +Let $x0$. Then we can find $\delta>0$ such that $a0$ and $P=\{x_0,x_1,\cdots,x_n\}$ be a partition of $[a,b]$. + +By the mean value theorem, on each subinterval $[x_{i-1},x_i]$, there exists $t_i\in (x_{i-1},x_i)$ such that + +$$ +F(x_i)-F(x_{i-1})=F'(t_i)(x_i-x_{i-1})=f(t_i)\Delta x_i +$$ + +Since $m_i\leq f(t_i)\leq M_i$, notices that $\sum_{i=1}^n f(t_i)\Delta x_i=F(b)-F(a)$. + +So, + +$$ +L(P,f)\leq F(b)-F(a)\leq U(P,f) +$$ + +So, $f\in \mathscr{R}$ and $\int_a^b f(x)\ dx=F(b)-F(a)$. + +EOP diff --git a/pages/Math416/Math416_L7.md b/pages/Math416/Math416_L7.md index 8043d83..066d035 100644 --- a/pages/Math416/Math416_L7.md +++ b/pages/Math416/Math416_L7.md @@ -130,21 +130,23 @@ We know $c_n r^ne^{in\theta}\to 0$ as $n\to\infty$. So there exists $M\geq|c_n r^ne^{in\theta}|$ for all $n\in\mathbb{N}$. -So $\forall \zeta\in\overline{B(0,r)}$, $|c_n\zeta^n|\leq |c_n|e^n\leq M(\frac{e}{r})^n$. +So $\forall \zeta\in\overline{B(0,r)}$, $|c_n\zeta^n|\leq |c_n| |\zeta|^n \leq M \left(\frac{|\zeta|}{r}\right)^n$. So $\sum_{n=0}^{\infty}|c_n\zeta^n|$ converges absolutely. So the series converges absolutely and uniformly on $\overline{B(0,r)}$. -_Some steps are omitted._ +If $|\zeta| > r$, then $|c_n \zeta^n|$ does not tend to zero, and the series diverges. EOP -There are few cases for the convergence of the power series. +#### Possible Cases for the Convergence of Power Series -Let $E=\{\zeta\in\mathbb{C}: \sum_{n=0}^{\infty}c_n(\zeta-\zeta_0)^n\text{ converges}\}$. +1. **Convergence Only at $\zeta = 0$**: + - **Proof**: If the power series $\sum_{n=0}^{\infty} c_n (\zeta - \zeta_0)^n$ converges only at $\zeta = 0$, it means that the radius of convergence $R = 0$. This occurs when the terms $c_n (\zeta - \zeta_0)^n$ do not tend to zero for any $\zeta \neq 0$. The series diverges for all $\zeta \neq 0$ because the terms grow without bound. -1. It cloud only converge at $\zeta=0$. -2. It could converge everywhere. +2. **Convergence Everywhere**: + - **Proof**: If the power series converges for all $\zeta \in \mathbb{C}$, the radius of convergence $R = \infty$. This implies that the terms $c_n (\zeta - \zeta_0)^n$ tend to zero for all $\zeta$. This can happen if the coefficients $c_n$ decrease rapidly enough, such as in the exponential series. -Continue next time. +3. **Convergence Within a Finite Radius**: + - **Proof**: For a power series with a finite radius of convergence $R$, the series converges absolutely and uniformly for $|\zeta - \zeta_0| < R$ and diverges for $|\zeta - \zeta_0| > R$. On the boundary $|\zeta - \zeta_0| = R$, the series may converge or diverge depending on the specific series. This is determined by the behavior of the terms on the boundary. diff --git a/pages/Math416/Math416_L8.md b/pages/Math416/Math416_L8.md new file mode 100644 index 0000000..18ccaba --- /dev/null +++ b/pages/Math416/Math416_L8.md @@ -0,0 +1,125 @@ +# Lecture 8 + +## Review + +### Sequences of Functions + +Let $f_n: G \to \mathbb{C}$ be a sequence of functions. + +#### Convergence Pointwise + +Definition: + +Let $\zeta\in G$, $\forall \epsilon > 0$, $\exists N \in \mathbb{N}$ such that $\forall n \geq N$, $|f_n(\zeta) - f(\zeta)| < \epsilon$. + +#### Convergence Uniformly + +Definition: + +$\forall \epsilon > 0$, $\forall \zeta\in G$, $\exists N \in \mathbb{N}$ such that $\forall n \geq N$, $|f_n(\zeta) - f(\zeta)| < \epsilon$. + +#### Convergence Locally Uniformly + +Definition: + +$\forall \epsilon > 0$, $\forall \zeta\in G$, $\exists N \in \mathbb{N}$ such that $\forall n \geq N$, $|f_n(\zeta) - f(\zeta)| < \epsilon$. + +#### Convergence Uniformly on Compact Sets + +Definition: $\forall C\subset G$ that is compact, $\forall \epsilon > 0, \exists N \in \mathbb{N} \text{ s.t. } \forall n \geq N, \forall \zeta\in C, |f_n(\zeta) - f(\zeta)| < \epsilon$ + +#### Power Series + +Definition: + +$$ +\sum_{n=0}^{\infty} c_n (\zeta - \zeta_0)^n +$$ + +$\zeta_0$ is the center of the power series. + +#### Theorem of Power Seriess + +If a power series converges at $\zeta_1$, then it converges absolutely at every point of $\overline{B(0,r)}$ that is strictly inside the disk of convergence. + +## Continue on Power Series + +### Limits of Power Series + +#### Theorem 5.12 + +Cauchy-Hadamard Theorem: + +The radius of convergence of the power series is given by $\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$ is given by + +$$ +\frac{1}{R} = \limsup_{n\to\infty} |a_n|^{1/n} +$$ + +Proof: + +Suppose $(b_n)^{\infty}_{n=0}$ is a sequence of real numbers such that $\lim_{n\to\infty} b_n$ may nor may not exists by $(-1)^n(1-\frac{1}{n})$. + +The limit superior of $(b_n)$ is defined as + +$$ +s_n = \sup_{k\geq n} b_k +$$ + +$s_n$ is a decreasing sequence, by completeness of $\mathbb{R}$, every bounded sequence has a limit in $\mathbb{R}$. + +So $s_n$ converges to some limit $s\in\mathbb{R}$. + +Without loss of generality, this also holds for infininum of $s_n$. + +Forward direction: + +We want to show that the radius of convergence of $\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$ is greater than or equal to $\frac{1}{\limsup_{n\to\infty} |a_n|^{1/n}}$. + +Since $\sum_{n=0}^{\infty} 1\zeta^n=\frac{1}{1-\zeta}$ for $|\zeta|<1$. Assume $\limsup_{n\to\infty} |a_n|^{1/n}$ is finite, then $\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$ converges absolutely at $\zeta_0$. + +Let $\rho>\limsup_{n\to\infty} |a_n|^{1/n}$, then $\exists N \in \mathbb{N}$ such that $\forall n \geq N$, $|a_n|^{1/n}\leq \rho$. + +So $\frac{1}{R}=\limsup_{n\to\infty} |a_n|^{1/n}<\rho$ + +So $R>\frac{1}{\rho}$ + +/*TRACK LOST*/ + +Backward direction: + +Suppose $|\zeta|>R$, then $\exists$ number $|\zeta|$ such that $|\zeta|>\frac{1}{\rho}>R$. + +So $\rho<\limsup_{n\to\infty} |a_n|^{1/n}$ + +This means that $\exists$ infinitely many $n_j$s such that $|a_{n_j}|^{1/n_j}>\rho$ + +So $|a_{n_j}\zeta^{n_j}|>\rho^{n_j}|\zeta|^{n_j}$ + +Series $\sum_{n=1}^{\infty} a_n\zeta^n$ diverges, each individual term is not going to $0$. + +So $\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$ does not converge at $\zeta$ + +EOP + +_What if $|\zeta-\zeta_0|=R$?_ + +For $\sum_{n=0}^{\infty} \zeta^n$, the radius of convergence is $1$. + +It diverges eventually on the circle of convergence. + +For $\sum_{n=0}^{\infty} \frac{1}{(n+1)^2}\zeta^n$, the radius of convergence is $1$. + +This converges everywhere on the circle of convergence. + +For $\sum_{n=0}^{\infty} \frac{1}{n+1}\zeta^n$, the radius of convergence is $1$. + +This diverges at $\zeta=1$ (harmonic series) and converges at $\zeta=-1$ (alternating harmonic series). + +#### Theorem 5.15 + +Suppose $\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$ has a positive radius of convergence $R$. Define $f(\zeta)=\sum_{n=0}^{\infty} a_n (\zeta - \zeta_0)^n$, then $f$ is holomorphic on $B(0,R)$ and $f'(\zeta)=\sum_{n=1}^{\infty} n a_n (\zeta - \zeta_0)^{n-1}=\sum_{k=0}^{\infty} (k+1)a_{k+1} (\zeta - \zeta_0)^k$. + +Proof: + +/*TRACK LOST*/ diff --git a/pages/Math416/_meta.js b/pages/Math416/_meta.js index 01d8a8c..bf54eb0 100644 --- a/pages/Math416/_meta.js +++ b/pages/Math416/_meta.js @@ -10,4 +10,5 @@ export default { Math416_L5: "Complex Variables (Lecture 5)", Math416_L6: "Complex Variables (Lecture 6)", Math416_L7: "Complex Variables (Lecture 7)", + Math416_L8: "Complex Variables (Lecture 8)", }