fix typos

This commit is contained in:
Zheyuan Wu
2025-10-11 12:25:24 -05:00
parent 15a7be1dad
commit 29a5945f05
2 changed files with 10 additions and 5 deletions

View File

@@ -81,7 +81,7 @@ Reducing variance using a baseline
$$
\begin{aligned}
\mathbb{E}_{\pi_\theta}\left[\nabla_\theta\log \pi_\theta(s,a)B(s)]&=\sum_{s\in S}d^{\pi_\theta}(s)\sum_{a\in A}\nabla_{\theta}\pi_\theta(s,a)B(s)\\
\mathbb{E}_{\pi_\theta}\left[\nabla_\theta\log \pi_\theta(s,a)B(s)\right]&=\sum_{s\in S}d^{\pi_\theta}(s)\sum_{a\in A}\nabla_{\theta}\pi_\theta(s,a)B(s)\\
&=\sum_{s\in S}d^{\pi_\theta}B(s)\nabla_\theta\sum_{a\in A}\pi_\theta(s,a)\\
&=0
\end{aligned}