partial update for section 1

2025-11-01 16:28:48 -05:00
parent 0f109ac389
commit ac986ec69a
40 changed files with 1439 additions and 3 deletions
--- a/result.tex
+++ b/result.tex
@@ -0,0 +1,168 @@
+\documentclass[11pt]{article}
+\usepackage{amsmath, amsfonts, amsthm}
+\usepackage{amssymb}
+\usepackage{fancyhdr,parskip}
+\usepackage{fullpage}
+\usepackage{mathrsfs}
+\usepackage{mathtools}
+\usepackage{float}
+\usepackage{hyperref}
+
+%%
+%% Stuff above here is packages that will be used to compile your document.
+%% If you've used unusual LaTeX features, you may have to install extra packages by adding them to this list.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+\setlength{\headheight}{15.2pt}
+\setlength{\headsep}{20pt}
+\pagestyle{fancyplain}
+
+%%
+%% Stuff above here is layout and formatting.  If you've never used LaTeX before, you probably don't need to change any of it.
+%% Later, you can learn how it all works and adjust it to your liking, or write your own formatting code.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%
+% These commands create theorem-like environments.
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{corollary}[theorem]{Corollary}
+\newtheorem{prop}[theorem]{Proposition}
+\newtheorem{defn}[theorem]{Definition}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% This section contains some useful macros that will save you time typing.
+%%
+
+% Using \displaystyle (or \ds) in a block of math has a number of effects, but most notably, it makes your fractions come out bigger.
+\newcommand{\ds}{\displaystyle}
+
+% These lines are for displaying integrals; typing \dx will make the dx at the end of the integral look better.
+\newcommand{\is}{\hspace{2pt}}
+\newcommand{\dx}{\is dx}
+
+% These commands produce the fancy Z (for the integers) and other letters conveniently.
+\newcommand{\Z}{\mathbb{Z}}
+\newcommand{\Q}{\mathbb{Q}}
+\newcommand{\R}{\mathbb{R}}
+\newcommand{\C}{\mathbb{C}}
+\newcommand{\F}{\mathbb{F}}
+\newcommand{\T}{\mathcal{T}}
+\newcommand{\B}{\mathcal{B}}
+
+% for fancy empty set char
+\renewcommand{\emptyset}{\varnothing}
+
+% customized commands for future assignements
+\newcommand{\imply}{\Rightarrow}
+\def\P{\mathscr{P}}
+\def\L{\mathscr{L}}
+\def\M{\mathscr{M}}
+\DeclarePairedDelimiterX{\inp}[2]{\langle}{\rangle}{#1, #2}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% This is the header.  It will appear on every page, and it's a good place to put your name, the assignment title, and stuff like that.
+%% I usually leave the center header blank to avoid clutter.
+%%
+
+\fancyhead[L]{\textbf{CSE5100 Homework 3}}
+\fancyhead[C]{\empty}
+\fancyhead[R]{Zheyuan Wu}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+\begin{document}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Actual math starts here!
+
+
+% Use an enumerated list to write up problems.  First we begin a list.
+
+\textbf{Use Of GenAI}
+
+This homework is completed with the help of Windsurf VS code extension.\url{https://windsurf.com/}
+
+What is used:
+
+\begin{itemize}
+    \item Autofill feature to generate syntactically correct latex code (each tab key pressed filled no more than 100 characters, at most $20\%$ of the predicted text is adapted) for the homework with human supervision.
+    \item Use AI to debug the latex code and find unclosed parentheses or other syntax errors.
+    \item Use AI to autofill the parts that follows the same structure as the previous parts (example: case by case proofs).
+    \item Use AI to auto correct misspelled words or latex commands.
+\end{itemize}
+
+What is not used:
+
+\begin{itemize}
+    \item Directly use AI to generate the solutions in latex document.
+    \item Use AI to ask for hint or solution for the problems.
+    \item Select part of the document and ask AI to fill the parts missing.
+\end{itemize}
+
+\newpage
+
+\begin{enumerate}
+    \item [1.3] Deliveries
+    \begin{enumerate}
+        \item [1.3.1]
+        Create two graphs: 
+        \begin{itemize}
+            \item In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000. (The small batch
+            experiments.) (15 pts)
+
+            \begin{figure}[H]
+                \centering
+                \includegraphics[width=0.8\textwidth]{images/p1311.png}
+                \caption{Learning Curves for Batch Size of 1000}
+            \end{figure}
+            \item In the second graph, compare the learning curves for the experiments running with batch size of 4000. (The large batch experiments.) (15 pts)
+
+            \begin{figure}[H]
+                \centering
+                \includegraphics[width=0.8\textwidth]{images/p1312.png}
+                \caption{Learning Curves for Batch Size of 4000}
+            \end{figure}
+            Note that the x-axis should be number of environment steps, not number of policy gradient iterations.
+        \end{itemize}
+
+        \item [1.3.2]
+        Answer the following questions briefly:
+
+        Provide the exact command line configurations you used to run your experiments, including any parameters changed from their defaults.
+        
+        The best configuration in both the small and large batch size cases should converge to a maximum score of 500.
+        \begin{itemize}
+            \item Which value estimator has better performance without advantage normalization: the trajectory-centric one, or the one using reward-to-go? Why? (10 pts)
+            
+            The reward-to-go one has better performance without advantage normalization.
+
+            The reward-to-go has more fine-grained control over the learning process by using the rewards after the current timestep to estimate the Q-value for the current state-action pair.
+
+            \item Did advantage normalization help? (10 pts)
+            
+            Yes, advantage normalization helps.
+
+            The advantage normalization helps the learning process by stabilizing the learning rate and preventing the policy from overfitting to the data.
+
+            \item Did the batch size make an impact? (10 pts)
+
+            Yes, the batch size makes an impact.
+
+            The larger batch size allows the agent to learn from more data in each update, which can help the agent to converge to a better policy, especially when the normalization and reward-to-go are used.
+        \end{itemize}
+    \end{enumerate}
+
+\end{enumerate}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Actual math ends here.  Don't put any content below the \end{document} line.
+%%
+
+\end{document}