diff --git a/content/CSE510/CSE510_L26.md b/content/CSE510/CSE510_L26.md new file mode 100644 index 0000000..77c4190 --- /dev/null +++ b/content/CSE510/CSE510_L26.md @@ -0,0 +1,38 @@ +# CSE510 Deep Reinforcement Learning (Lecture 26) + +## Continue on Real-World Practical Challenges for RL + +### Factored multi-agent RL + +- Sample efficiency -> Shared Learning +- Complexity -> High-Order Factorization +- Partial Observability -> Communication Learning +- Sparse reward -> Coordinated Exploration + +#### Parameter Sharing vs. Diversity + +- Parameter Sharing is critical for deep MARL methods +- However, agents tend to acquire homogenous behaviors +- Diversity is essential for exploration and practical tasks + +[link to paper: Google Football](https://arxiv.org/pdf/1907.11180) + +Schematics of Our Approach: Celebrating Diversity in Shared MARL (CDS) + +- In representation, CDS allows MARL to adaptively decide +when to share learning +- Encouraging Diversity in Optimization + +In optimization, maximizing an information-theoretic objective to achieve identity-aware diversity + +$$ +\begin{aligned} +I^\pi(\tau_T;id)&=H(\tau_t)-H(\tau_T|id)=\mathbbb{E}_{id,\tau_T\sim \pi}\left[\log \frac{p(\tau_T|id)}{p(\tau_T)}\right]\\ +&= \mathbb{E}_{id,\tau}\left[ \log \frac{p(o_0|id)}{p(o_0)}+\sum_{t=0}^{T-1}\log\frac{a_t|\tau_t,id}{p(a_t|\tau_t)}+\log \frac{p(o_{t+1}|\tau_t,a_t,id)}{p(o_{t+1}|\tau_t,a_t)}\right] +\end{aligned} +$$ + +Here: $\sum_{t=0}^{T-1}\log\frac{a_t|\tau_t,id}{p(a_t|\tau_t)}$ represents the action diversity. + +$\log \frac{p(o_{t+1}|\tau_t,a_t,id)}{p(o_{t+1}|\tau_t,a_t)}$ represents the observation diversity. + diff --git a/content/CSE510/_meta.js b/content/CSE510/_meta.js index 557e618..13aed33 100644 --- a/content/CSE510/_meta.js +++ b/content/CSE510/_meta.js @@ -28,4 +28,5 @@ export default { CSE510_L23: "CSE510 Deep Reinforcement Learning (Lecture 23)", CSE510_L24: "CSE510 Deep Reinforcement Learning (Lecture 24)", CSE510_L25: "CSE510 Deep Reinforcement Learning (Lecture 25)", + CSE510_L26: "CSE510 Deep Reinforcement Learning (Lecture 26)", } \ No newline at end of file diff --git a/distribute/prebuild.sh b/distribute/prebuild.sh index 54a6d7e..ec49385 100644 --- a/distribute/prebuild.sh +++ b/distribute/prebuild.sh @@ -24,6 +24,12 @@ for dir in ./content/*/; do # strip the leading path and trailing slash base_dir="$(basename "${dir%/}")" + # explicitly ignore Swap + if [ "$base_dir" = "Swap" ]; then + echo "Ignoring Swap dir: $dir" + continue + fi + # check if base_dir is in KEEP_PAGES (space-separated list) case " $KEEP_PAGES " in *" $base_dir "*) @@ -40,6 +46,12 @@ done for dir in ./public/*/; do base_dir="$(basename "${dir%/}")" + # explicitly ignore Swap + if [ "$base_dir" = "Swap" ]; then + echo "Ignoring Swap dir: $dir" + continue + fi + case " $KEEP_PAGES " in *" $base_dir "*) echo "Keeping public dir: $dir"