16 lines
1.9 KiB
TeX
16 lines
1.9 KiB
TeX
\relax
|
|
\providecommand\hyper@newdestlabel[2]{}
|
|
\providecommand\HyField@AuxAddToFields[1]{}
|
|
\providecommand\HyField@AuxAddToCoFields[2]{}
|
|
\newlabel{1}{{{1}}{2}{}{AMS.1}{}}
|
|
\newlabel{2}{{{2}}{2}{}{AMS.2}{}}
|
|
\newlabel{3}{{{3}}{2}{}{AMS.3}{}}
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces DQN. Nothing to say but what expected from training.}}{4}{figure.1}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Double DQN. I found there is interesting camel like bump for q-value when training with Double DQN. It is less stable than the vanilla DQN.}}{4}{figure.2}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Dueling DQN. Using Advantage network creates comparable results as the DQN.}}{4}{figure.3}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Prioritized Experience Replay. Using this alone makes the training process less stable and loss is significantly higher than the previous methods.}}{5}{figure.4}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces N-Step Experience Replay. So far the most stable method of training, especially when the replay buffer size is large. However, when the replay buffer size is too small, typically $\le 70$, the training process may not converge to optimal performance.}}{5}{figure.5}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces NStep + PER. Combining the two methods counter the unstable loss function for training in PER.}}{6}{figure.6}\protected@file@percent }
|
|
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Noisy DQN. Experiment for sigma = 0.017 gets comparable result with normal DQN. Stability issue persist when sigma is too large.}}{6}{figure.7}\protected@file@percent }
|
|
\gdef \@abspage@last{7}
|