diff --git a/hw3/bash/2-4-experiments.sh b/hw3/bash/2-4-experiments.sh index 74dff15..c4ecef6 100644 --- a/hw3/bash/2-4-experiments.sh +++ b/hw3/bash/2-4-experiments.sh @@ -7,4 +7,11 @@ python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg -na --use_baseline - python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg -na --use_baseline --baseline_gradient_steps 3 --exp_name halfcheetah_na_rtg_baseline_bgs3 python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg -na --use_baseline --baseline_learning_rate 0.001 --exp_name halfcheetah_na_rtg_baseline_blr1e-3 # Berkely parameters -python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg --use_baseline --baseline_gradient_steps 5 --baseline_learning_rate 0.01 --exp_name halfcheetah_na_rtg_baseline_bgs5_blr1e-2 \ No newline at end of file +python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg --use_baseline --baseline_gradient_steps 5 --baseline_learning_rate 0.01 --exp_name halfcheetah_na_rtg_baseline_bgs5_blr1e-2 +# try +python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg --use_baseline --baseline_gradient_steps 10 --exp_name halfcheetah_na_rtg_baseline_bgs10 +python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg --use_baseline --baseline_gradient_steps 10 --baseline_learning_rate 0.0001 --exp_name halfcheetah_na_rtg_baseline_bgs10_blr1e-4 +python run.py --env_name HalfCheetah-v4 -n 100 -b 5000 -rtg --use_baseline --baseline_gradient_steps 8 --baseline_learning_rate 0.001 --exp_name halfcheetah_na_rtg_baseline_bgs10_blr1e-3 +# Extended na +python run.py --env_name HalfCheetah-v4 -n 300 -b 5000 -na --use_baseline --exp_name halfcheetah_na_baseline_n300 +python run.py --env_name HalfCheetah-v4 -n 300 -b 10000 -na --use_baseline --exp_name halfcheetah_na_lb_baseline diff --git a/hw3/data/p24/pg_halfcheetah_na_baseline_n300_HalfCheetah-v4_02-11-2025_13-46-17/events.out.tfevents.1762112777._0x8007000D b/hw3/data/p24/pg_halfcheetah_na_baseline_n300_HalfCheetah-v4_02-11-2025_13-46-17/events.out.tfevents.1762112777._0x8007000D new file mode 100644 index 0000000..550f188 Binary files /dev/null and b/hw3/data/p24/pg_halfcheetah_na_baseline_n300_HalfCheetah-v4_02-11-2025_13-46-17/events.out.tfevents.1762112777._0x8007000D differ diff --git a/hw3/data/p24/pg_halfcheetah_na_lb_baseline_HalfCheetah-v4_02-11-2025_14-46-22/events.out.tfevents.1762116382._0x8007000D b/hw3/data/p24/pg_halfcheetah_na_lb_baseline_HalfCheetah-v4_02-11-2025_14-46-22/events.out.tfevents.1762116382._0x8007000D new file mode 100644 index 0000000..38c255e Binary files /dev/null and b/hw3/data/p24/pg_halfcheetah_na_lb_baseline_HalfCheetah-v4_02-11-2025_14-46-22/events.out.tfevents.1762116382._0x8007000D differ diff --git a/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_HalfCheetah-v4_02-11-2025_13-07-55/events.out.tfevents.1762110475._0x8007000D b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_HalfCheetah-v4_02-11-2025_13-07-55/events.out.tfevents.1762110475._0x8007000D new file mode 100644 index 0000000..844a4fe Binary files /dev/null and b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_HalfCheetah-v4_02-11-2025_13-07-55/events.out.tfevents.1762110475._0x8007000D differ diff --git a/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-3_HalfCheetah-v4_02-11-2025_13-34-30/events.out.tfevents.1762112070._0x8007000D b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-3_HalfCheetah-v4_02-11-2025_13-34-30/events.out.tfevents.1762112070._0x8007000D new file mode 100644 index 0000000..d88acc6 Binary files /dev/null and b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-3_HalfCheetah-v4_02-11-2025_13-34-30/events.out.tfevents.1762112070._0x8007000D differ diff --git a/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-4_HalfCheetah-v4_02-11-2025_13-21-02/events.out.tfevents.1762111262._0x8007000D b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-4_HalfCheetah-v4_02-11-2025_13-21-02/events.out.tfevents.1762111262._0x8007000D new file mode 100644 index 0000000..05e4406 Binary files /dev/null and b/hw3/data/p24/pg_halfcheetah_na_rtg_baseline_bgs10_blr1e-4_HalfCheetah-v4_02-11-2025_13-21-02/events.out.tfevents.1762111262._0x8007000D differ diff --git a/hw3_20251102.zip b/hw3_20251102.zip new file mode 100644 index 0000000..b010369 Binary files /dev/null and b/hw3_20251102.zip differ diff --git a/images/p24.png b/images/p24.png new file mode 100644 index 0000000..e8085fe Binary files /dev/null and b/images/p24.png differ diff --git a/result.aux b/result.aux index cf5298d..392683b 100644 --- a/result.aux +++ b/result.aux @@ -9,4 +9,5 @@ \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Learning Curve for Baseline Loss for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate}}{5}{figure.5}\protected@file@percent } \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Learning Curve for Average Return for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate}}{5}{figure.6}\protected@file@percent } \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Learning Curve for Average Return for Batch Size of 5000 with Command Line Argument -na}}{6}{figure.7}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Learning Curve for Average Return for HalfCheetah with Berkely Parameters}}{7}{figure.8}\protected@file@percent } \gdef \@abspage@last{7} diff --git a/result.fdb_latexmk b/result.fdb_latexmk index 9bbe7cd..38403a2 100644 --- a/result.fdb_latexmk +++ b/result.fdb_latexmk @@ -1,5 +1,5 @@ # Fdb version 4 -["pdflatex"] 1762109729.51876 "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw3/result.tex" "result.pdf" "result" 1762109732.7984 0 +["pdflatex"] 1762135450.31487 "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw3/result.tex" "result.pdf" "result" 1762135454.02355 0 "c:/texlive/2023/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc" 1708989547 2900 1537cc8184ad1792082cd229ecc269f4 "" "c:/texlive/2023/texmf-dist/fonts/map/fontname/texfonts.map" 1708990624 3524 cb3e574dea2d1052e39280babc910dc8 "" "c:/texlive/2023/texmf-dist/fonts/tfm/jknappen/ec/tcrm1095.tfm" 1708990172 1536 02c06700a42be0f5a28664c7273f82e7 "" @@ -90,7 +90,7 @@ "c:/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map" 1708994999 5518052 de2a91c664d75f3971de4662dc6b5a65 "" "c:/texlive/2023/texmf-var/web2c/pdftex/pdflatex.fmt" 1708995327 8220658 fb4d14532342a0ef5245dd396c4a1bd1 "" "c:/texlive/2023/texmf.cnf" 1708994944 713 e69b156964470283e0530f5060668171 "" - "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw3/result.tex" 1762109729 9327 325d504eb4600f89970ba23df50c6614 "" + "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw3/result.tex" 1762135449 9599 077510b604c1d10442793fc223b5d22e "" "images/p1311.png" 1762031876 149514 6293646f1f05d492f35159770b12eb5a "" "images/p1312.png" 1762031893 133077 d3cf887e55ce7659912ab10bb8452d8b "" "images/p231.png" 1762108686 96865 fd505e278e838359b318c393b3861b46 "" @@ -98,9 +98,10 @@ "images/p2331.png" 1762109093 245738 c016dd30b4e309d33364e417efa7104b "" "images/p2332.png" 1762109428 230947 a0b7230271c0d200011df9c3a746f979 "" "images/p234.png" 1762109220 123051 f5b2e6d5ec4925af33980bea13bd95d4 "" - "result.aux" 1762109732 1465 e94525fee934452f444bc84f4e518ee6 "pdflatex" - "result.out" 1762109730 0 d41d8cd98f00b204e9800998ecf8427e "pdflatex" - "result.tex" 1762109729 9327 325d504eb4600f89970ba23df50c6614 "" + "images/p24.png" 1762135386 347948 28766f8a888b6b71f1a0c7560f7174a8 "" + "result.aux" 1762135453 1649 388ea5c36186412711babf1dcfd9cf57 "pdflatex" + "result.out" 1762135451 0 d41d8cd98f00b204e9800998ecf8427e "pdflatex" + "result.tex" 1762135449 9599 077510b604c1d10442793fc223b5d22e "" (generated) "result.aux" "result.log" diff --git a/result.fls b/result.fls index 3d1117d..f66bf49 100644 --- a/result.fls +++ b/result.fls @@ -205,6 +205,11 @@ INPUT ./images/p234.png INPUT ./images/p234.png INPUT ./images/p234.png INPUT ./images/p234.png +INPUT ./images/p24.png +INPUT ./images/p24.png +INPUT ./images/p24.png +INPUT ./images/p24.png +INPUT ./images/p24.png INPUT result.aux INPUT ./result.out INPUT ./result.out diff --git a/result.log b/result.log index 5961c6e..0f623d7 100644 --- a/result.log +++ b/result.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2024.2.26) 2 NOV 2025 12:55 +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2024.2.26) 2 NOV 2025 20:04 entering extended mode restricted \write18 enabled. file:line:error style messages enabled. @@ -390,7 +390,13 @@ File: images/p234.png Graphic file (type png) Package pdftex.def Info: images/p234.png used on input line 209. (pdftex.def) Requested size: 375.80544pt x 195.68184pt. - [6 <./images/p234.png>] [7] (./result.aux) + [6 <./images/p234.png>] + +File: images/p24.png Graphic file (type png) + +Package pdftex.def Info: images/p24.png used on input line 219. +(pdftex.def) Requested size: 375.80544pt x 195.68184pt. + [7 <./images/p24.png>] (./result.aux) *********** LaTeX2e <2023-11-01> patch level 1 L3 programming layer <2024-02-20> @@ -399,18 +405,18 @@ Package rerunfilecheck Info: File `result.out' has not changed. (rerunfilecheck) Checksum: D41D8CD98F00B204E9800998ECF8427E;0. ) Here is how much of TeX's memory you used: - 10687 strings out of 474137 - 163778 string characters out of 5748517 + 10695 strings out of 474137 + 163932 string characters out of 5748517 1932190 words of memory out of 5000000 - 32871 multiletter control sequences out of 15000+600000 + 32878 multiletter control sequences out of 15000+600000 564028 words of font info for 60 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 69i,12n,79p,633b,567s stack positions out of 10000i,1000n,20000p,200000b,200000s -Output written on result.pdf (7 pages, 1185257 bytes). +Output written on result.pdf (7 pages, 1475932 bytes). PDF statistics: - 86 PDF objects out of 1000 (max. 8388607) - 54 compressed objects within 1 object stream - 15 named destinations out of 1000 (max. 500000) - 36 words of extra memory for PDF output out of 10000 (max. 10000000) + 89 PDF objects out of 1000 (max. 8388607) + 55 compressed objects within 1 object stream + 16 named destinations out of 1000 (max. 500000) + 41 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/result.pdf b/result.pdf index 7457fc8..f5dcb5a 100644 Binary files a/result.pdf and b/result.pdf differ diff --git a/result.synctex.gz b/result.synctex.gz index 24c72e6..132ec6f 100644 Binary files a/result.synctex.gz and b/result.synctex.gz differ diff --git a/result.tex b/result.tex index cc05610..34bbe0e 100644 --- a/result.tex +++ b/result.tex @@ -214,11 +214,13 @@ What is not used: \newpage \item [2.4] Bonus (20pt) - % \begin{figure}[H] - % \centering - % \includegraphics[width=0.8\textwidth]{images/p241.png} - % \caption{Learning Curve for Average Return for HalfCheetah with Berkely Parameters} - % \end{figure} + \begin{figure}[H] + \centering + \includegraphics[width=0.8\textwidth]{images/p24.png} + \caption{Learning Curve for Average Return for HalfCheetah with Berkely Parameters} + \end{figure} + + Unfortunately, the experiments with Berkely parameters do not converge to the maximum reward of 300. We tested different batch sizes and learning rates, but the results are still not satisfactory. Even with increasing epoch, the performance is still not beyond random movement. \end{enumerate}