From 29a5945f05acd6b70015f6de20d402f2cad42d68 Mon Sep 17 00:00:00 2001
From: Zheyuan Wu <60459821+Trance-0@users.noreply.github.com>
Date: Sat, 11 Oct 2025 12:25:24 -0500
Subject: [PATCH] fix typos

---
 content/CSE510/CSE510_L10.md | 13 +++++++++----
 content/CSE510/CSE510_L12.md |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/content/CSE510/CSE510_L10.md b/content/CSE510/CSE510_L10.md
index 9d982f2..34722d0 100644
--- a/content/CSE510/CSE510_L10.md
+++ b/content/CSE510/CSE510_L10.md
@@ -29,13 +29,18 @@ Scale of rewards and Q-values is unknown
 
 ### Deadly Triad in Reinforcement Learning
 
-Off-policy learning (learning the expected reward changes of policy change instead of the optimal policy)
-Function approximation (usually with supervised learning)
+Off-policy learning
 
-$Q(s,a)\gets f_\theta(s,a)$
+- (learning the expected reward changes of policy change instead of the optimal policy)
 
-Bootstrapping (self-reference)
+Function approximation
 
+- (usually with supervised learning)
+- $Q(s,a)\gets f_\theta(s,a)$
+
+Bootstrapping
+
+- (self-reference, update new function from itself)
 - $Q(s,a)\gets r(s,a)+\gamma \max_{a'\in A} Q(s',a')$
 
 ### Stable Solutions for DQN
diff --git a/content/CSE510/CSE510_L12.md b/content/CSE510/CSE510_L12.md
index 8ad0405..19c76c5 100644
--- a/content/CSE510/CSE510_L12.md
+++ b/content/CSE510/CSE510_L12.md
@@ -81,7 +81,7 @@ Reducing variance using a baseline
 
 $$
 \begin{aligned}
-\mathbb{E}_{\pi_\theta}\left[\nabla_\theta\log \pi_\theta(s,a)B(s)]&=\sum_{s\in S}d^{\pi_\theta}(s)\sum_{a\in A}\nabla_{\theta}\pi_\theta(s,a)B(s)\\
+\mathbb{E}_{\pi_\theta}\left[\nabla_\theta\log \pi_\theta(s,a)B(s)\right]&=\sum_{s\in S}d^{\pi_\theta}(s)\sum_{a\in A}\nabla_{\theta}\pi_\theta(s,a)B(s)\\
 &=\sum_{s\in S}d^{\pi_\theta}B(s)\nabla_\theta\sum_{a\in A}\pi_\theta(s,a)\\
 &=0
 \end{aligned}