updates

2025-10-14 20:34:47 -05:00
parent 250f763f1f
commit e74aac95e3
517 changed files with 1418 additions and 16701 deletions
--- a/result.aux
+++ b/result.aux
@@ -1,5 +1,15 @@
 \relax 
-\newlabel{1}{{{1}}{1}{}{}{}}
-\newlabel{2}{{{2}}{1}{}{}{}}
-\newlabel{3}{{{3}}{1}{}{}{}}
-\gdef \@abspage@last{4}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\newlabel{1}{{{1}}{2}{}{AMS.1}{}}
+\newlabel{2}{{{2}}{2}{}{AMS.2}{}}
+\newlabel{3}{{{3}}{2}{}{AMS.3}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces DQN. Nothing to say but what expected from training.}}{4}{figure.1}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Double DQN. I found there is interesting camel like bump for q-value when training with Double DQN. It is less stable than the vanilla DQN.}}{4}{figure.2}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Dueling DQN. Using Advantage network creates comparable results as the DQN.}}{4}{figure.3}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Prioritized Experience Replay. Using this alone makes the training process less stable and loss is significantly higher than the previous methods.}}{5}{figure.4}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces N-Step Experience Replay. So far the most stable method of training, especially when the replay buffer size is large. However, when the replay buffer size is too small, typically $\le 70$, the training process may not converge to optimal performance.}}{5}{figure.5}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces NStep + PER. Combining the two methods counter the unstable loss function for training in PER.}}{6}{figure.6}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Noisy DQN. Experiment for sigma = 0.017 gets comparable result with normal DQN. Stability issue persist when sigma is too large.}}{6}{figure.7}\protected@file@percent }
+\gdef \@abspage@last{7}