done?

2025-10-12 00:55:07 -05:00
commit 250f763f1f
467 changed files with 19784 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 # python files
 __pycache__
--- a/hw2/DRL_Homework_2.pdf
+++ b/hw2/DRL_Homework_2.pdf
--- a/hw2/README.md
+++ b/hw2/README.md
@@ -0,0 +1,24 @@
 # Installation
 Since we are using PyTorch for hw2, we recommend using conda to manage the environment. Please refer to the [miniconda](https://docs.conda.io/en/latest/miniconda.html) homepage for a compact conda installation.
 You have two options for creating the environment of hw2
 * For mac users or a cpu-only installation, please remove the `pytorch-cuda` term in either ways.
 * To create a new conda environment, simply run `conda env create -f environment.yml`
 * If you want to install the package within the environment you created with hw1, please following the below steps:
  ```bash
  conda activate <hw1-env-name>
  # we are using PyTorch 2.0!
  # remove the pytorch-cuda=11.7 term if you are a mac user to want a cpu-only installation
  conda install pytorch==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia
  pip install gymnasium[classic_control]==0.27.1
  pip install matplotlib==3.7.1
  # for hyperparameter management
  pip install hydra-core==1.3.2
  # for video recording
  pip install moviepy==1.0.3
  ```
 That's it! If you encounter any trouble creating the environment, please let us know :-)
--- a/hw2/agent.py
+++ b/hw2/agent.py
@@ -0,0 +1,127 @@
 import os
 import torch
 import torch.optim as optim
 from copy import deepcopy
 from model import QNetwork, DuelingQNetwork
 from gymnasium.wrappers import TimeLimit
 class DQNAgent:
    def __init__(self, state_size, action_size, cfg, device='cuda'):
        self.device = device
        self.use_double = cfg.use_double
        self.use_dueling = cfg.use_dueling
        self.target_update_interval = cfg.target_update_interval
        q_model = DuelingQNetwork if self.use_dueling else QNetwork
        self.q_net = q_model(state_size, action_size, cfg.hidden_size, cfg.activation).to(self.device)
        self.target_net = deepcopy(self.q_net).to(self.device)
        self.optimizer = optim.AdamW(self.q_net.parameters(), lr=cfg.lr)
        self.tau = cfg.tau
        # update the gamma we use in the Bellman equation for n-step DQN
        self.gamma = cfg.gamma ** cfg.nstep
    def soft_update(self, target, source):
        """
        Soft update the target network using the source network
        """
        for target_param, source_param in zip(target.parameters(), source.parameters()):
            target_param.data.copy_((1 - self.tau) * target_param.data + self.tau * source_param.data)
    @torch.no_grad()
    def get_action(self, state):
        """
        Get the action according to the current state and Q value
        """
        ############################
        # YOUR IMPLEMENTATION HERE #
        # update from single state
        torch_max_idx = torch.argmax(self.q_net(torch.tensor(state).to(self.device)), dim=0)
        action = torch_max_idx.cpu().numpy()
        ############################
        return action
    @torch.no_grad()
    def get_Q_target(self, state, action, reward, done, next_state) -> torch.Tensor:
        """
        Get the target Q value according to the Bellman equation
        """
        if self.use_double:
            # YOUR IMPLEMENTATION HERE
            reward_tensor = reward.to(self.device)
            # update from batch states via q_net
            next_q_tensor = self.q_net(next_state.to(self.device))
            # return the max Q value
            next_q = torch.max(next_q_tensor, dim=1).values
            q_target = reward_tensor + (1-done.to(self.device)) * self.gamma *  next_q
            return q_target
        else:
            # YOUR IMPLEMENTATION HERE
            reward_tensor = reward.to(self.device)
            # update from batch states
            next_q_tensor = self.target_net(next_state.to(self.device))
            # return the max Q value
            next_q = torch.max(next_q_tensor, dim=1).values
            q_target = reward_tensor + (1-done.to(self.device)) * self.gamma *  next_q
            return q_target
    def get_Q(self, state, action, use_double_net=False) -> torch.Tensor:
        """
        Get the Q value of the current state and action
        """
        ############################
        # YOUR IMPLEMENTATION HERE #
        if use_double_net:
            # get from target net
            q_tensor = self.target_net(state.to(self.device))
            action_idx = action.squeeze(1).to(dtype=torch.int32).to(self.device)
            # select corresponding action, do not use index_select... That don't works
            q = q_tensor.gather(1, action_idx.unsqueeze(1)).squeeze(1)
            return q
        else:
            # elegant python move by Jack Wu. Fantastic...
            # q= self.q_net(state.to(self.device))[:, action.int()]
            # update from batch states
            q_tensor = self.q_net(state.to(self.device))
            action_idx = action.squeeze(1).to(dtype=torch.int32).to(self.device)
            # select corresponding action, do not use index_select... That don't works
            q = q_tensor.gather(1, action_idx.unsqueeze(1)).squeeze(1)
            return q
        ############################
    def update(self, batch, step, weights=None):
        state, action, reward, next_state, done = batch
        Q_target = self.get_Q_target(state, action, reward, done, next_state)
        Q = self.get_Q(state, action)
        if weights is None:
            weights = torch.ones_like(Q).to(self.device)
        td_error = torch.abs(Q - Q_target).detach()
        loss = torch.mean((Q - Q_target)**2 * weights)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        if not step % self.target_update_interval:
            with torch.no_grad():
                self.soft_update(self.target_net, self.q_net)
        return loss.item(), td_error, Q.mean().item()
    def save(self, name):
        os.makedirs('models', exist_ok=True)
        torch.save(self.q_net.state_dict(), os.path.join('models', name))
    def load(self, name='best.pt'):
        self.q_net.load_state_dict(torch.load(os.path.join('models', name)))
    def __repr__(self) -> str:
        use_double = 'Double' if self.use_double else ''
        use_dueling = 'Dueling' if self.use_dueling else ''
        prefix = 'Normal' if not self.use_double and not self.use_dueling else ''
        return use_double + use_dueling + prefix + 'QNetwork'
--- a/hw2/buffer.py
+++ b/hw2/buffer.py
@@ -0,0 +1,202 @@
 import torch
 import numpy as np
 from collections import deque
 def get_buffer(cfg, **args):
    assert type(cfg.nstep) == int and cfg.nstep > 0, 'nstep must be a positive integer'
    if not cfg.use_per:
        if cfg.nstep == 1:
            return ReplayBuffer(cfg.capacity, **args)
        else:
            return NStepReplayBuffer(cfg.capacity, cfg.nstep, cfg.gamma, **args)
    else:
        if cfg.nstep == 1:
            return PrioritizedReplayBuffer(cfg.capacity, cfg.per_eps, cfg.per_alpha, cfg.per_beta, **args)
        else:
            return PrioritizedNStepReplayBuffer(cfg.capacity, cfg.per_eps, cfg.per_alpha, cfg.per_beta, cfg.nstep, cfg.gamma, **args)
 class ReplayBuffer:
    def __init__(self, capacity, state_size, device):
        self.device = device
        self.state = torch.empty(capacity, state_size, dtype=torch.float)
        self.action = torch.empty(capacity, 1, dtype=torch.float)
        self.reward = torch.empty(capacity, dtype=torch.float)
        self.next_state = torch.empty(capacity, state_size, dtype=torch.float)
        self.done = torch.empty(capacity, dtype=torch.int)
        self.idx = 0
        self.size = 0
        self.capacity = capacity
    def __repr__(self) -> str:
        return 'NormalReplayBuffer'
    def add(self, transition):
        state, action, reward, next_state, done = transition
        # store transition in the buffer and update the index and size of the buffer
        # you may need to convert the data type to torch.tensor
        ############################
        # YOUR IMPLEMENTATION HERE #
        self.state[self.idx] = torch.tensor(state, device=self.device)
        self.action[self.idx] = torch.tensor(action, device=self.device)
        self.reward[self.idx] = torch.tensor(reward, device=self.device)
        self.next_state[self.idx] = torch.tensor(next_state, device=self.device)
        self.done[self.idx] = torch.tensor(done, device=self.device)
        self.idx = (self.idx + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)
        ############################
    def sample(self, batch_size):
        # sample batch_size data from the buffer without replacement
        sample_idxs = np.random.choice(self.size, batch_size, replace=False)
        batch = ()
        # get a batch of data from the buffer according to the sample_idxs
        # please transfer the data to the corresponding device before return
        ############################
        # YOUR IMPLEMENTATION HERE #
        # do not load to gpu device since the buffer is not loaded on init
        batch = (torch.index_select(self.state, 0, torch.tensor(sample_idxs)),
                torch.index_select(self.action, 0, torch.tensor(sample_idxs)),
                torch.index_select(self.reward, 0, torch.tensor(sample_idxs)),
                torch.index_select(self.next_state, 0, torch.tensor(sample_idxs)),
                torch.index_select(self.done, 0, torch.tensor(sample_idxs))
            )
        ############################
        return batch
 class NStepReplayBuffer(ReplayBuffer):
    def __init__(self, capacity, n_step, gamma, state_size, device):
        super().__init__(capacity, state_size, device=device)
        self.n_step = n_step
        self.n_step_buffer = deque([], maxlen=n_step)
        self.gamma = gamma
    def __repr__(self) -> str:
        return f'{self.n_step}StepReplayBuffer'
    def n_step_handler(self):
        """Get n-step state, action, reward and done for the transition, discard those rewards after done=True"""
        ############################
        # YOUR IMPLEMENTATION HERE #
        state, action, reward, done = self.n_step_buffer[0]
        # compute n-step discounted reward
        gamma = self.gamma
        for i in range(1, len(self.n_step_buffer)):
            if done:
                break
            reward += gamma * self.n_step_buffer[i][2]
            gamma *= self.gamma
        ############################
        return state, action, reward, done
    def add(self, transition):
        state, action, reward, next_state, done = transition
        self.n_step_buffer.append((state, action, reward, done))
        if len(self.n_step_buffer) < self.n_step:
            return
        state, action, reward, done = self.n_step_handler()
        super().add((state, action, reward, next_state, done))
 class PrioritizedReplayBuffer(ReplayBuffer):
    def __init__(self, capacity, eps, alpha, beta, state_size, device):
        self.weights = np.zeros(capacity, dtype=np.float32) # stores weights for importance sampling
        self.eps = eps  # minimal priority for stability
        self.alpha = alpha  # determines how much prioritization is used, α = 0 corresponding to the uniform case
        self.beta = beta  # determines the amount of importance-sampling correction, b = 1 fully compensate for the non-uniform probabilities
        self.max_priority = eps  # priority for new samples, init as eps
        super().__init__(capacity, state_size, device=device)
    def add(self, transition):
        """
        Add a new experience to memory, and update it's priority to the max_priority.
        """
        ############################
        # YOUR IMPLEMENTATION HERE #
        super().add(transition)
        self.weights[self.idx] = self.max_priority
        ############################
    def sample(self, batch_size):
        """
        Sample a batch of experiences from the buffer with priority, and calculates the weights used for the correction of bias used in the Q-learning update
        Returns:
            batch: a batch of experiences as in the normal replay buffer
            weights: torch.Tensor (batch_size, ), importance sampling weights for each sample
            sample_idxs: numpy.ndarray (batch_size, ), the indexes of the sample in the buffer
        """
        ############################
        # YOUR IMPLEMENTATION HERE #
        # assume sample with replacement, in case if sample size is too small
        sample_idxs_tensor = torch.multinomial(torch.tensor(self.weights), batch_size, replacement=True)
        sample_idxs = sample_idxs_tensor.cpu().numpy()
        # do not load to gpu device since the buffer is not loaded on init
        batch = (
            torch.index_select(self.state, 0, torch.tensor(sample_idxs)),
            torch.index_select(self.action, 0, torch.tensor(sample_idxs)),
            torch.index_select(self.reward, 0, torch.tensor(sample_idxs)),
            torch.index_select(self.next_state, 0, torch.tensor(sample_idxs)),
            torch.index_select(self.done, 0, torch.tensor(sample_idxs))
        )
        weights = torch.tensor(self.weights[sample_idxs], device=self.device).unsqueeze(1)
        ############################
        return batch, weights, sample_idxs
    def update_priorities(self, data_idxs, priorities: np.ndarray):
        priorities = (priorities + self.eps) ** self.alpha
        self.weights[data_idxs] = priorities
        self.max_priority = max(self.weights)
    def __repr__(self) -> str:
        return 'PrioritizedReplayBuffer'
 # Avoid Diamond Inheritance
 class PrioritizedNStepReplayBuffer(PrioritizedReplayBuffer):
    def __init__(self, capacity, eps, alpha, beta, n_step, gamma, state_size, device):
        ############################
        # YOUR IMPLEMENTATION HERE #
        super().__init__(capacity, eps, alpha, beta, state_size, device)
        self.n_step = n_step
        self.n_step_buffer = deque([], maxlen=n_step)
        self.gamma = gamma
        ############################
    def __repr__(self) -> str:
        return f'Prioritized{self.n_step}StepReplayBuffer'
    def add(self, transition):
        ############################
        # YOUR IMPLEMENTATION HERE #
        state, action, reward, next_state, done = transition
        self.n_step_buffer.append((state, action, reward, done))
        if len(self.n_step_buffer) < self.n_step:
            return
        state, action, reward, done = self.n_step_handler()
        super().add((state, action, reward, next_state, done))
        ############################
    # def the other necessary class methods as your need
    def n_step_handler(self):
        """Get n-step state, action, reward and done for the transition, discard those rewards after done=True"""
        ############################
        # YOUR IMPLEMENTATION HERE #
        state, action, reward, done = self.n_step_buffer[0]
        # compute n-step discounted reward
        gamma = self.gamma
        for i in range(1, len(self.n_step_buffer)):
            if done:
                break
            reward += gamma * self.n_step_buffer[i][2]
            gamma *= self.gamma
        ############################
        return state, action, reward, done
--- a/hw2/cfgs/config.yaml
+++ b/hw2/cfgs/config.yaml
@@ -0,0 +1,45 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50_000
  batch_size: 128 
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12_500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation: 
    _target_: torch.nn.ELU
    # you can define other parameters of the __init__ function (if any) for the object here
  use_dueling: False
  use_double: False
 buffer:
  capacity: 50_000
  use_per: False
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
 hydra:
  job:
    chdir: true
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
--- a/hw2/core.py
+++ b/hw2/core.py
@@ -0,0 +1,128 @@
 from copy import deepcopy
 import random
 import logging
 import numpy as np
 from buffer import ReplayBuffer, PrioritizedReplayBuffer
 import matplotlib.pyplot as plt
 from utils import moving_average, merge_videos, get_epsilon
 from gymnasium.wrappers import RecordVideo, RecordEpisodeStatistics
 logger = logging.getLogger(__name__)
 def visualize(step, title, train_steps, train_returns, eval_steps, eval_returns, losses, q_values):
    train_window, loss_window, q_window = 10, 100, 100
    plt.figure(figsize=(20, 6))
    # plot train and eval returns
    plt.subplot(1, 3, 1)
    plt.title('frame %s. score: %s' % (step, np.mean(train_returns[-train_window:])))
    plt.plot(train_steps[train_window - 1:], moving_average(train_returns, train_window), label='train')
    if len(eval_steps) > 0:
        plt.plot(eval_steps, eval_returns, label='eval')
    plt.legend()
    plt.xlabel('step')
    # plot td losses
    plt.subplot(1, 3, 2)
    plt.title('loss')
    plt.plot(moving_average(losses, loss_window))
    plt.xlabel('step')
    plt.subplot(1, 3, 3)
    # plot q values
    plt.title('q_values')
    plt.plot(moving_average(q_values, q_window))
    plt.xlabel('step')
    plt.suptitle(title, fontsize=16)
    plt.savefig('results.png')
    plt.close()
 def eval(env, agent, episodes, seed):
    returns = []
    for episode in range(episodes):
        state, _ = env.reset(seed=episode + seed)
        done, truncated = False, False
        while not (done or truncated):
            state, _, done, truncated, info = env.step(agent.get_action(state))
        returns.append(info['episode']['r'].item())
    return np.mean(returns), np.std(returns)
 def train(cfg, env, agent, buffer, seed):
    # wrap env to record episode returns
    env = RecordEpisodeStatistics(env)
    eval_env = deepcopy(env)
    losses, Qs = [], []
    episode_rewards, train_steps = [], []
    eval_rewards, eval_steps = [], []
    best_reward = -np.inf
    done, truncated = False, False
    state, _ = env.reset(seed=seed)
    for step in range(1, cfg.timesteps + 1):
        if done or truncated:
            state, _ = env.reset()
            done, truncated = False, False
            # store episode reward
            episode_rewards.append(info['episode']['r'].item())
            train_steps.append(step - 1)
        eps = get_epsilon(step - 1, cfg.eps_min, cfg.eps_max, cfg.eps_steps)
        if random.random() < eps:
            action = env.action_space.sample()
        else:
            action = agent.get_action(state)
        next_state, reward, done, truncated, info = env.step(action)
        buffer.add((state, action, reward, next_state, int(done)))
        state = next_state
        if step > cfg.batch_size + cfg.nstep:
            # sample and do one step update
            if isinstance(buffer, PrioritizedReplayBuffer):
                # sample with priorities and update the priorities with td_error
                batch, weights, tree_idxs = buffer.sample(cfg.batch_size)
                loss, td_error, Q = agent.update(batch, step, weights=weights)
                buffer.update_priorities(tree_idxs, td_error.cpu().numpy())
            elif isinstance(buffer, ReplayBuffer):
                batch = buffer.sample(cfg.batch_size)
                loss, _, Q = agent.update(batch, step)
            else:
                raise RuntimeError("Unknown Buffer")
            Qs.append(Q)
            losses.append(loss)
        if step % cfg.eval_interval == 0:
            eval_mean, eval_std = eval(eval_env, agent=agent, episodes=cfg.eval_episodes, seed=seed)
            state, _ = env.reset()
            eval_steps.append(step - 1)
            eval_rewards.append(eval_mean)
            logger.info(f"Step: {step}, Eval mean: {eval_mean}, Eval std: {eval_std}")
            if eval_mean > best_reward:
                best_reward = eval_mean
                agent.save('best_model.pt')
        if step % cfg.plot_interval == 0:
            visualize(step, f'{agent} with {buffer}', train_steps, episode_rewards, eval_steps, eval_rewards, losses, Qs)
    agent.save('final_model.pt')
    visualize(step, f'{agent} with {buffer}', train_steps, episode_rewards, eval_steps, eval_rewards, losses, Qs)
    env = RecordVideo(eval_env, 'final_videos', name_prefix='eval', episode_trigger=lambda x: x % 2 == 0 and x < cfg.eval_episodes)
    eval_mean, eval_std = eval(env, agent=agent, episodes=cfg.eval_episodes, seed=seed)
    agent.load('best_model.pt')  # use best model for visualization
    env = RecordVideo(eval_env, 'best_videos', name_prefix='eval', episode_trigger=lambda x: x % 2 == 0 and x < cfg.eval_episodes)
    eval_mean, eval_std = eval(env, agent=agent, episodes=cfg.eval_episodes, seed=seed)
    env.close()
    logger.info(f"Final Eval mean: {eval_mean}, Eval std: {eval_std}")
    merge_videos('final_videos')
    merge_videos('best_videos')
    return eval_mean
--- a/hw2/environment.yml
+++ b/hw2/environment.yml
@@ -0,0 +1,15 @@
 name: drl_hw2
 channels:
  - pytorch
  - nvidia
  - defaults
 dependencies:
  - python=3.10
  - pytorch=2.0.0
  - pytorch-cuda=11.7 # Comment this line if you are a mac user or want a cpu-only installation
  - pip=23.0.1
  - pip:
    - gymnasium[classic-control]==0.27.1
    - hydra-core==1.3.2
    - matplotlib==3.7.1
    - moviepy==1.0.3
--- a/hw2/gallery/A
+++ b/hw2/gallery/A
--- a/hw2/gallery/All-In-One.png
+++ b/hw2/gallery/All-In-One.png
--- a/hw2/gallery/DQN.png
+++ b/hw2/gallery/DQN.png
--- a/hw2/gallery/Double
+++ b/hw2/gallery/Double
--- a/hw2/gallery/Dueling
+++ b/hw2/gallery/Dueling
--- a/hw2/gallery/NStep
+++ b/hw2/gallery/NStep
--- a/hw2/gallery/NStep.png
+++ b/hw2/gallery/NStep.png
--- a/hw2/gallery/PER.png
+++ b/hw2/gallery/PER.png
--- a/hw2/main.py
+++ b/hw2/main.py
@@ -0,0 +1,30 @@
 import hydra
 import utils
 import torch
 import logging
 from agent import DQNAgent
 from core import train
 from buffer import get_buffer
 import gymnasium as gym
 logger = logging.getLogger(__name__)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@hydra.main(config_path="cfgs", config_name="config", version_base="1.3")
 def main(cfg):
    env = gym.make(cfg.env_name, render_mode="rgb_array")
    utils.set_seed_everywhere(env, cfg.seed)
    state_size = utils.get_space_shape(env.observation_space)
    action_size = utils.get_space_shape(env.action_space)
    buffer = get_buffer(cfg.buffer, state_size=state_size, device=device)
    agent = DQNAgent(state_size=state_size, action_size=action_size, cfg=cfg.agent, device=device)
    logger.info(f"Training for {cfg.train.timesteps} timesteps with {agent} and {buffer}")
    eval_mean = train(cfg.train, env, agent, buffer, seed=cfg.seed)
    logger.info(f"Finish training with eval mean: {eval_mean}")
 if __name__ == "__main__":
    main()
--- a/hw2/model.py
+++ b/hw2/model.py
@@ -0,0 +1,53 @@
 from hydra.utils import instantiate
 import torch
 import torch.nn as nn
 class QNetwork(nn.Module):
    def __init__(self, state_size, action_size, hidden_size, activation):
        super(QNetwork, self).__init__()
        self.q_head = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            instantiate(activation),
            nn.Linear(hidden_size, hidden_size),
            instantiate(activation),
            nn.Linear(hidden_size, action_size)
        )
    def forward(self, state):
        Qs = self.q_head(state)
        return Qs
 class DuelingQNetwork(nn.Module):
    def __init__(self, state_size, action_size, hidden_size, activation):
        super(DuelingQNetwork, self).__init__()
        self.feature_layer = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            instantiate(activation),
        )
        self.value_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            instantiate(activation),
            nn.Linear(hidden_size, 1)
        )
        self.advantage_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            instantiate(activation),
            nn.Linear(hidden_size, action_size)
        )
    def forward(self, state):
        """
        Get the Q value of the current state and action using dueling network
        """
        ############################
        # YOUR IMPLEMENTATION HERE #
        # using equation (7) on https://arxiv.org/pdf/1511.06581
        Qs=self.value_head(self.feature_layer(state))+self.advantage_head(self.feature_layer(state))
        ############################
        return Qs
--- a/hw2/utils.py
+++ b/hw2/utils.py
@@ -0,0 +1,65 @@
 import os
 import glob
 import torch
 import shutil
 import random
 import numpy as np
 import gymnasium as gym
 from gymnasium.spaces import Discrete, Box
 from moviepy.editor import VideoFileClip, concatenate_videoclips
 def moving_average(a, n):
    """
    Return an array of the moving average of a with window size n
    """
    if len(a) <= n:
        return a
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n
 def get_epsilon(step, eps_min, eps_max, eps_steps):
    """
    Return the linearly descending epsilon of the current step for the epsilon-greedy policy. After eps_steps, epsilon will keep at eps_min
    """
    ############################
    # YOUR IMPLEMENTATION HERE #
    return max(eps_min, eps_max - (step / eps_steps) * (eps_max - eps_min))
    ############################
 def merge_videos(video_dir):
    """
    Merge videos in the video_dir into a single video    
    """
    videos = glob.glob(os.path.join(video_dir, "*.mp4"))
    videos = sorted(videos, key=lambda x: int(x.split("-")[-1].split(".")[0]))
    clip = concatenate_videoclips([VideoFileClip(video) for video in videos])
    clip.write_videofile(f"{video_dir}.mp4")
    shutil.rmtree(video_dir)
 def set_seed_everywhere(env: gym.Env, seed=0):
    """
    Set seed for all randomness sources
    """
    env.action_space.seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
 def get_space_shape(space):
    """
    Return the shape of the gym.Space object
    """
    if isinstance(space, Discrete):
        return space.n
    elif isinstance(space, Box):
        if len(space.shape) == 1:
            return space.shape[0]
        else:
            return space.shape
    else:
        raise ValueError(f"Space not supported: {space}")
--- a/result.aux
+++ b/result.aux
@@ -0,0 +1,5 @@
 \relax 
 \newlabel{1}{{{1}}{1}{}{}{}}
 \newlabel{2}{{{2}}{1}{}{}{}}
 \newlabel{3}{{{3}}{1}{}{}{}}
 \gdef \@abspage@last{4}
--- a/result.fdb_latexmk
+++ b/result.fdb_latexmk
@@ -0,0 +1,80 @@
 # Fdb version 4
 ["pdflatex"] 1760230164.51698 "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2/result.tex" "result.pdf" "result" 1760230165.61579 0
  "c:/texlive/2023/texmf-dist/fonts/map/fontname/texfonts.map" 1708990624 3524 cb3e574dea2d1052e39280babc910dc8 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex7.tfm" 1708988591 1004 54797486969f23fa377b128694d548df ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex8.tfm" 1708988591 988 bdf658c3bfc2d96d3c8b02cfc1c94c20 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm" 1708988591 916 f87d7c45f9c908e672703b83b72241a3 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam7.tfm" 1708988591 928 2dc8d444221b7a635bb58038579b861a ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm" 1708988591 908 2921f8a10601f252058503cc6570e581 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm7.tfm" 1708988591 940 228d6584342e91276bf566bcf9716b83 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmbx10.tfm" 1708989536 1328 c834bbb027764024c09d3d2bf908b5f0 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmex10.tfm" 1708989536 992 662f679a0b3d2d53c1b94050fdaa3f50 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi10.tfm" 1708989536 1528 abec98dbc43e172678c11b3b9031252a ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi6.tfm" 1708989536 1512 f21f83efb36853c0b70002322c1ab3ad ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi8.tfm" 1708989536 1520 eccf95517727cb11801f4f1aee3a21b4 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr10.tfm" 1708989536 1296 45809c5a464d5f32c8f98ba97c1bb47f ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr6.tfm" 1708989536 1300 b62933e007d01cfd073f79b963c01526 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr8.tfm" 1708989536 1292 21c1c5bfeaebccffdb478fd231a0997d ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm" 1708989536 1124 6c73e740cf17375f03eec0ee63599741 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy6.tfm" 1708989536 1116 933a60c408fc0a863a92debe84b2d294 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy8.tfm" 1708989536 1120 8b7d695260f3cff42e636090a8002094 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmti10.tfm" 1708989536 1480 aa8e34af0eb6a2941b776984cf1dfdc4 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/rsfs/rsfs10.tfm" 1708993366 688 37338d6ab346c2f1466b29e195316aa4 ""
  "c:/texlive/2023/texmf-dist/fonts/tfm/public/rsfs/rsfs5.tfm" 1708993366 684 3a51bd4fd9600428d5264cf25f04bb9a ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb" 1708988591 34811 78b52f49e893bcba91bd7581cdc144c0 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb" 1708988591 30251 6afa5cb1d0204815a708a080681d4674 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb" 1708988591 36299 5f9df58c2139e7edcf37c8fca4bd384d ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb" 1708988591 35469 dcf3a5f2fc1862f5952e3ee5eb1d98c4 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb" 1708988591 35752 024fb6c41858982481f6968b5fc26508 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb" 1708988591 32726 39f0f9e62e84beb801509898a605dbd5 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb" 1708988591 32569 5e5ddc8df908dea60932f3c484a54c0d ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb" 1708988591 32587 65067f817f408bc71a7312f3d9828a9b ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb" 1708988591 32626 5abc8bb2f28aa647d4c70f8ea38cc0d3 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb" 1708988591 37944 359e864bd06cde3b1cf57bb20757fb06 ""
  "c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb" 1708988591 34694 870c211f62cb72718a00e353f14f254d ""
  "c:/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii" 1708992232 71627 94eb9990bed73c364d7f53f960cc8c5b ""
  "c:/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty" 1708991801 17865 1a9bd36b4f98178fa551aca822290953 ""
  "c:/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty" 1708988587 12594 0d51ac3a545aaaa555021326ff22a6cc ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty" 1708988591 5949 3f3fd50a8cc94c3d4cbf4fc66cd3df1c ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty" 1708988591 13829 94730e64147574077f8ecfea9bb69af4 ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd" 1708988592 961 6518c6525a34feb5e8250ffa91731cff ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd" 1708988592 961 d02606146ba5601b5645f987c92e6193 ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty" 1708988596 2222 499d61426192c39efd8f410ee1a52b9c ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty" 1708988596 4173 82ac04dfb1256038fad068287fbb4fe6 ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty" 1708988596 88371 d84032c0f422c3d1e282266c01bef237 ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty" 1708988596 4474 b811654f4bf125f11506d13d13647efb ""
  "c:/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty" 1708988596 2444 0d0c1ee65478277e8015d65b86983da2 ""
  "c:/texlive/2023/texmf-dist/tex/latex/base/article.cls" 1708991500 20144 147463a6a579f4597269ef9565205cfe ""
  "c:/texlive/2023/texmf-dist/tex/latex/base/size11.clo" 1708991500 8464 59874a3b0776c73e2a138b025d8473dd ""
  "c:/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty" 1708990302 13886 d1306dcf79a944f6988e688c1785f9ce ""
  "c:/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty" 1708990361 46845 3b58f70c6e861a13d927bff09d35ecbc ""
  "c:/texlive/2023/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty" 1708990446 18450 88279bf67c81e69f8e3f1c1bad1a26c5 ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg" 1708990878 1224 978390e9c2234eab29404bc21b268d1e ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def" 1708990879 19448 1e988b341dda20961a6b931bcde55519 ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty" 1708990876 18387 8f900a490197ebaf93c02ae9476d4b09 ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty" 1708990876 8010 a8d949cbdbc5c983593827c9eec252e1 ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty" 1708990876 2671 7e67d78d9b88c845599a85b2d41f2e39 ""
  "c:/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty" 1708990876 4023 293ea1c16429fc0c4cf605f4da1791a9 ""
  "c:/texlive/2023/texmf-dist/tex/latex/jknapltx/mathrsfs.sty" 1708991316 300 12fa6f636b617656f2810ee82cb05015 ""
  "c:/texlive/2023/texmf-dist/tex/latex/jknapltx/ursfs.fd" 1708991316 548 cc4e3557704bfed27c7002773fad6c90 ""
  "c:/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty" 1708991458 22555 6d8e155cfef6d82c3d5c742fea7c992e ""
  "c:/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty" 1708991460 13815 760b0c02f691ea230f5359c4e1de23a7 ""
  "c:/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def" 1708991467 30006 3d512c0edd558928ddea1690180ef77e ""
  "c:/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg" 1708991573 678 4792914a8f45be57bb98413425e4c7af ""
  "c:/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty" 1708991994 62269 5c1837a5bc5db4c0d255eedc225ca44b ""
  "c:/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty" 1708991994 5582 a43dedf8e5ec418356f1e9dfe5d29fc3 ""
  "c:/texlive/2023/texmf-dist/tex/latex/parskip/parskip.sty" 1708992628 4288 94714aa7f535440f33181fec52a31963 ""
  "c:/texlive/2023/texmf-dist/tex/latex/preprint/fullpage.sty" 1708992914 2789 05b418f78b224ec872f5b11081138605 ""
  "c:/texlive/2023/texmf-dist/tex/latex/tools/calc.sty" 1708994243 10214 547fd4d29642cb7c80bf54b49d447f01 ""
  "c:/texlive/2023/texmf-dist/web2c/texmf.cnf" 1708988443 41009 84b61f42d16d06bedb915f57aa2374cf ""
  "c:/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map" 1708994999 5518052 de2a91c664d75f3971de4662dc6b5a65 ""
  "c:/texlive/2023/texmf-var/web2c/pdftex/pdflatex.fmt" 1708995327 8220658 fb4d14532342a0ef5245dd396c4a1bd1 ""
  "c:/texlive/2023/texmf.cnf" 1708994944 713 e69b156964470283e0530f5060668171 ""
  "d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2/result.tex" 1760230163 6257 29867be9781c52dc4faad49bb0cba6fa ""
  "result.aux" 1760230165 119 495be67432001ea8f4f9fa642ad39ad3 "pdflatex"
  "result.tex" 1760230163 6257 29867be9781c52dc4faad49bb0cba6fa ""
  (generated)
  "result.aux"
  "result.log"
  "result.pdf"
  (rewritten before read)
--- a/result.fls
+++ b/result.fls
@@ -0,0 +1,124 @@
 PWD d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2
 INPUT c:/texlive/2023/texmf.cnf
 INPUT c:/texlive/2023/texmf-dist/web2c/texmf.cnf
 INPUT c:/texlive/2023/texmf-var/web2c/pdftex/pdflatex.fmt
 INPUT d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2/result.tex
 OUTPUT result.log
 INPUT c:/texlive/2023/texmf-dist/tex/latex/base/article.cls
 INPUT c:/texlive/2023/texmf-dist/tex/latex/base/article.cls
 INPUT c:/texlive/2023/texmf-dist/tex/latex/base/size11.clo
 INPUT c:/texlive/2023/texmf-dist/tex/latex/base/size11.clo
 INPUT c:/texlive/2023/texmf-dist/tex/latex/base/size11.clo
 INPUT c:/texlive/2023/texmf-dist/fonts/map/fontname/texfonts.map
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr10.tfm
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/parskip/parskip.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/parskip/parskip.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty
 INPUT c:/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
 INPUT c:/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/preprint/fullpage.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/preprint/fullpage.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/jknapltx/mathrsfs.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/jknapltx/mathrsfs.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/tools/calc.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/tools/calc.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
 INPUT c:/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
 INPUT ./result.aux
 INPUT ./result.aux
 INPUT result.aux
 OUTPUT result.aux
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
 INPUT c:/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
 INPUT c:/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 INPUT c:/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 INPUT c:/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 INPUT c:/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
 INPUT c:/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 INPUT c:/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 INPUT c:/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmbx10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr8.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmr6.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi8.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmmi6.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy8.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmsy6.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmex10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex8.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/cmextra/cmex7.tfm
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msam7.tfm
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/amsfonts/symbols/msbm7.tfm
 INPUT c:/texlive/2023/texmf-dist/tex/latex/jknapltx/ursfs.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/jknapltx/ursfs.fd
 INPUT c:/texlive/2023/texmf-dist/tex/latex/jknapltx/ursfs.fd
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/rsfs/rsfs10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/rsfs/rsfs10.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/rsfs/rsfs5.tfm
 INPUT c:/texlive/2023/texmf-dist/fonts/tfm/public/cm/cmti10.tfm
 OUTPUT result.pdf
 INPUT c:/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map
 INPUT result.aux
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb
 INPUT c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb
--- a/result.log
+++ b/result.log
@@ -0,0 +1,260 @@
 This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2024.2.26)  11 OCT 2025 19:49
 entering extended mode
 restricted \write18 enabled.
 file:line:error style messages enabled.
 %&-line parsing enabled.
 **"d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2/result.tex"
 (d:/Documents/Nextcloud/Documents/Project WUSTL/Academic/2025_Fall/CSE5100/Homeworks/hw2/result.tex
 LaTeX2e <2023-11-01> patch level 1
 L3 programming layer <2024-02-20>
 (c:/texlive/2023/texmf-dist/tex/latex/base/article.cls
 Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
 (c:/texlive/2023/texmf-dist/tex/latex/base/size11.clo
 File: size11.clo 2023/05/17 v1.4n Standard LaTeX file (size option)
 )
 \c@part=\count188
 \c@section=\count189
 \c@subsection=\count190
 \c@subsubsection=\count191
 \c@paragraph=\count192
 \c@subparagraph=\count193
 \c@figure=\count194
 \c@table=\count195
 \abovecaptionskip=\skip48
 \belowcaptionskip=\skip49
 \bibindent=\dimen140
 ) (c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty
 Package: amsmath 2023/05/13 v2.17o AMS math features
 \@mathmargin=\skip50
 For additional information on amsmath, use the `?' option.
 (c:/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty
 Package: amstext 2021/08/26 v2.01 AMS text
 (c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty
 File: amsgen.sty 1999/11/30 v2.0 generic functions
 \@emptytoks=\toks17
 \ex@=\dimen141
 )) (c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty
 Package: amsbsy 1999/11/29 v1.2d Bold Symbols
 \pmbraise@=\dimen142
 ) (c:/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty
 Package: amsopn 2022/04/08 v2.04 operator names
 )
 \inf@bad=\count196
 LaTeX Info: Redefining \frac on input line 234.
 \uproot@=\count197
 \leftroot@=\count198
 LaTeX Info: Redefining \overline on input line 399.
 LaTeX Info: Redefining \colon on input line 410.
 \classnum@=\count199
 \DOTSCASE@=\count266
 LaTeX Info: Redefining \ldots on input line 496.
 LaTeX Info: Redefining \dots on input line 499.
 LaTeX Info: Redefining \cdots on input line 620.
 \Mathstrutbox@=\box51
 \strutbox@=\box52
 LaTeX Info: Redefining \big on input line 722.
 LaTeX Info: Redefining \Big on input line 723.
 LaTeX Info: Redefining \bigg on input line 724.
 LaTeX Info: Redefining \Bigg on input line 725.
 \big@size=\dimen143
 LaTeX Font Info:    Redeclaring font encoding OML on input line 743.
 LaTeX Font Info:    Redeclaring font encoding OMS on input line 744.
 \macc@depth=\count267
 LaTeX Info: Redefining \bmod on input line 905.
 LaTeX Info: Redefining \pmod on input line 910.
 LaTeX Info: Redefining \smash on input line 940.
 LaTeX Info: Redefining \relbar on input line 970.
 LaTeX Info: Redefining \Relbar on input line 971.
 \c@MaxMatrixCols=\count268
 \dotsspace@=\muskip16
 \c@parentequation=\count269
 \dspbrk@lvl=\count270
 \tag@help=\toks18
 \row@=\count271
 \column@=\count272
 \maxfields@=\count273
 \andhelp@=\toks19
 \eqnshift@=\dimen144
 \alignsep@=\dimen145
 \tagshift@=\dimen146
 \tagwidth@=\dimen147
 \totwidth@=\dimen148
 \lineht@=\dimen149
 \@envbody=\toks20
 \multlinegap=\skip51
 \multlinetaggap=\skip52
 \mathdisplay@stack=\toks21
 LaTeX Info: Redefining \[ on input line 2953.
 LaTeX Info: Redefining \] on input line 2954.
 ) (c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
 \symAMSa=\mathgroup4
 \symAMSb=\mathgroup5
 LaTeX Font Info:    Redeclaring math symbol \hbar on input line 98.
 LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
 (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
 ) (c:/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty
 Package: amsthm 2020/05/29 v2.20.6
 \thm@style=\toks22
 \thm@bodyfont=\toks23
 \thm@headfont=\toks24
 \thm@notefont=\toks25
 \thm@headpunct=\toks26
 \thm@preskip=\skip53
 \thm@postskip=\skip54
 \thm@headsep=\skip55
 \dth@everypar=\toks27
 ) (c:/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty
 Package: amssymb 2013/01/14 v3.01 AMS font symbols
 ) (c:/texlive/2023/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
 Package: fancyhdr 2022/11/09 v4.1 Extensive control of page headers and footers
 \f@nch@headwidth=\skip56
 \f@nch@O@elh=\skip57
 \f@nch@O@erh=\skip58
 \f@nch@O@olh=\skip59
 \f@nch@O@orh=\skip60
 \f@nch@O@elf=\skip61
 \f@nch@O@erf=\skip62
 \f@nch@O@olf=\skip63
 \f@nch@O@orf=\skip64
 ) (c:/texlive/2023/texmf-dist/tex/latex/parskip/parskip.sty
 Package: parskip 2021-03-14 v2.0h non-zero parskip adjustments
 (c:/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty
 Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)
 (c:/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty
 Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
 \KV@toks@=\toks28
 ) (c:/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
 Package: ltxcmds 2023-12-04 v1.26 LaTeX kernel commands for general use (HO)
 ) (c:/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
 Package: kvsetkeys 2022-10-05 v1.19 Key value parser (HO)
 )) (c:/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty
 Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
 \etb@tempcnta=\count274
 )) (c:/texlive/2023/texmf-dist/tex/latex/preprint/fullpage.sty
 Package: fullpage 1999/02/23 1.1 (PWD)
 \FP@margin=\skip65
 ) (c:/texlive/2023/texmf-dist/tex/latex/jknapltx/mathrsfs.sty
 Package: mathrsfs 1996/01/01 Math RSFS package v1.0 (jk)
 \symrsfs=\mathgroup6
 ) (c:/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty
 Package: mathtools 2022/06/29 v1.29 mathematical typesetting tools
 (c:/texlive/2023/texmf-dist/tex/latex/tools/calc.sty
 Package: calc 2023/07/08 v4.3 Infix arithmetic (KKT,FJ)
 \calc@Acount=\count275
 \calc@Bcount=\count276
 \calc@Adimen=\dimen150
 \calc@Bdimen=\dimen151
 \calc@Askip=\skip66
 \calc@Bskip=\skip67
 LaTeX Info: Redefining \setlength on input line 80.
 LaTeX Info: Redefining \addtolength on input line 81.
 \calc@Ccount=\count277
 \calc@Cskip=\skip68
 ) (c:/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty
 Package: mhsetup 2021/03/18 v1.4 programming setup (MH)
 )
 \g_MT_multlinerow_int=\count278
 \l_MT_multwidth_dim=\dimen152
 \origjot=\skip69
 \l_MT_shortvdotswithinadjustabove_dim=\dimen153
 \l_MT_shortvdotswithinadjustbelow_dim=\dimen154
 \l_MT_above_intertext_sep=\dimen155
 \l_MT_below_intertext_sep=\dimen156
 \l_MT_above_shortintertext_sep=\dimen157
 \l_MT_below_shortintertext_sep=\dimen158
 \xmathstrut@box=\box53
 \xmathstrut@dim=\dimen159
 )
 \c@theorem=\count279
 (c:/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
 File: l3backend-pdftex.def 2024-02-20 L3 backend support: PDF output (pdfTeX)
 \l__color_backend_stack_int=\count280
 \l__pdf_internal_box=\box54
 ) (./result.aux)
 \openout1 = `result.aux'.
 LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 77.
 LaTeX Font Info:    ... okay on input line 77.
 (c:/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty
 Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
 (c:/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty
 Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)
 (c:/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty
 Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
 ) (c:/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
 )
 Package graphics Info: Driver file: pdftex.def on input line 107.
 (c:/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
 File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex
 (c:/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 [Loading MPS to PDF converter (version 2006.09.02).]
 \scratchcounter=\count281
 \scratchdimen=\dimen160
 \scratchbox=\box55
 \nofMPsegments=\count282
 \nofMParguments=\count283
 \everyMPshowfont=\toks29
 \MPscratchCnt=\count284
 \MPscratchDim=\dimen161
 \MPnumerator=\count285
 \makeMPintoPDFobject=\count286
 \everyMPtoPDFconversion=\toks30
 ))) (c:/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
 Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
 Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485.
 (c:/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
 ))
 \Gin@req@height=\dimen162
 \Gin@req@width=\dimen163
 )
 LaTeX Font Info:    Trying to load font information for U+msa on input line 88.
 (c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2013/01/14 v3.01 AMS symbols A
 )
 LaTeX Font Info:    Trying to load font information for U+msb on input line 88.
 (c:/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2013/01/14 v3.01 AMS symbols B
 )
 LaTeX Font Info:    Trying to load font information for U+rsfs on input line 88.
 (c:/texlive/2023/texmf-dist/tex/latex/jknapltx/ursfs.fd
 File: ursfs.fd 1998/03/24 rsfs font definition file (jk)
 ) [1{c:/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map}
 ] [2] [3] [4] (./result.aux)
 ***********
 LaTeX2e <2023-11-01> patch level 1
 L3 programming layer <2024-02-20>
 ***********
 ) 
 Here is how much of TeX's memory you used:
 4245 strings out of 474137
 64387 string characters out of 5748517
 1938190 words of memory out of 5000000
 26537 multiletter control sequences out of 15000+600000
 563865 words of font info for 59 fonts, out of 8000000 for 9000
 1141 hyphenation exceptions out of 8191
 65i,11n,72p,713b,463s stack positions out of 10000i,1000n,20000p,200000b,200000s
 <c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb><c:/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
 Output written on result.pdf (4 pages, 122012 bytes).
 PDF statistics:
 72 PDF objects out of 1000 (max. 8388607)
 43 compressed objects within 1 object stream
 0 named destinations out of 1000 (max. 500000)
 1 words of extra memory for PDF output out of 10000 (max. 10000000)
--- a/result.pdf
+++ b/result.pdf
--- a/result.synctex.gz
+++ b/result.synctex.gz
--- a/result.tex
+++ b/result.tex
@@ -0,0 +1,156 @@
 \documentclass[11pt]{article}
 \usepackage{amsmath, amsfonts, amsthm}
 \usepackage{amssymb}
 \usepackage{fancyhdr,parskip}
 \usepackage{fullpage}
 \usepackage{mathrsfs}
 \usepackage{mathtools}
 %%
 %% Stuff above here is packages that will be used to compile your document.
 %% If you've used unusual LaTeX features, you may have to install extra packages by adding them to this list.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \setlength{\headheight}{15.2pt}
 \setlength{\headsep}{20pt}
 \pagestyle{fancyplain}
 %%
 %% Stuff above here is layout and formatting.  If you've never used LaTeX before, you probably don't need to change any of it.
 %% Later, you can learn how it all works and adjust it to your liking, or write your own formatting code.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%
 % These commands create theorem-like environments.
 \newtheorem{theorem}{Theorem}
 \newtheorem{lemma}[theorem]{Lemma}
 \newtheorem{corollary}[theorem]{Corollary}
 \newtheorem{prop}[theorem]{Proposition}
 \newtheorem{defn}[theorem]{Definition}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% This section contains some useful macros that will save you time typing.
 %%
 % Using \displaystyle (or \ds) in a block of math has a number of effects, but most notably, it makes your fractions come out bigger.
 \newcommand{\ds}{\displaystyle}
 % These lines are for displaying integrals; typing \dx will make the dx at the end of the integral look better.
 \newcommand{\is}{\hspace{2pt}}
 \newcommand{\dx}{\is dx}
 % These commands produce the fancy Z (for the integers) and other letters conveniently.
 \newcommand{\Z}{\mathbb{Z}}
 \newcommand{\Q}{\mathbb{Q}}
 \newcommand{\R}{\mathbb{R}}
 \newcommand{\C}{\mathbb{C}}
 \newcommand{\F}{\mathbb{F}}
 \newcommand{\T}{\mathcal{T}}
 \newcommand{\B}{\mathcal{B}}
 % for fancy empty set char
 \renewcommand{\emptyset}{\varnothing}
 % customized commands for future assignements
 \newcommand{\imply}{\Rightarrow}
 \def\P{\mathscr{P}}
 \def\L{\mathscr{L}}
 \def\M{\mathscr{M}}
 \DeclarePairedDelimiterX{\inp}[2]{\langle}{\rangle}{#1, #2}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% This is the header.  It will appear on every page, and it's a good place to put your name, the assignment title, and stuff like that.
 %% I usually leave the center header blank to avoid clutter.
 %%
 \fancyhead[L]{\textbf{CSE5100 Homework 2}}
 \fancyhead[C]{\empty}
 \fancyhead[R]{Zheyuan Wu}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{document}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Actual math starts here!
 % Use an enumerated list to write up problems.  First we begin a list.
 \begin{enumerate}
 \item[1.] \textbf{Answer questions in Section 3} Due to the state space complexity of some visual input environments, we may represent Q-functions using a class of parameterized function approximators $\mathcal{Q}=\{Q_w\mid w\in \R^p\}$, where $p$ is the number of parameters. Remember that in the \textit{tabular setting} given a 4-tuple of sampled experience $(s,a,r,s')$, the vanilla Q-learning update is
 \[
 Q(s,a)\coloneqq Q(s,a)+\alpha\left(r+\gamma\max_{a'\in A} Q(s',a')-Q(s,a)\right),\tag{1}\label{1}
 \]
 where $\alpha\in \R$ is the learning rate. In the \textit{function approximation setting}, the update is similar:
 \[
 w\coloneqq w+\alpha\left(r+\gamma\max_{a'\in A} Q_w(s',a')-Q_w(s,a)\right)\nabla_w Q_w(s,a).\tag{2}\label{2}
 \]
 Q-learning can seem as a pseudo stochastic gradient descent step on
 \[
 \ell(w)=\mathbb{E}_{s,a,r,s'}\left(r+\gamma \max_{a'\in A} Q_w(s',a')-Q_w(s,a)\right)^2.\tag{3}\label{3}
 \]
 where the dependency of $\max_{a'\in A} Q_w(s',a')$ on $w$ is ignored, i.e., it is treated as a fixed target.
 \begin{enumerate}
    \item [1.] [\textbf{10pt}] Show that the update \ref{1} and update \ref{2} are the same when the functions in $\mathcal{Q}$ are of the form $Q_w(s,a)=w^T\phi(s,a)$, with $w\in \R^{|S||A|}$ and $\phi:S\times A\to \R^{|S||A|}$, where the feature function $\phi$ is of the form $\phi(s,a)_{s',a'}=\mathbb{I}[s'=s,a'=a]$, where $\mathbb{I}$ denotes the indicator function which evaluates to $1$ if the condition evaluates to true and vice versa. Note that the coordinates in the vector space $\R^{|S||A|}$ can be seen as being indexed by pairs $(s',a')$, where $s'\in S$, $a'\in A$.
    \begin{proof}
        When the functions in $\mathcal{Q}$ are of the form $Q_w(s,a)=w^T\phi(s,a)$, with $w\in \R^{|S||A|}$ and $\phi:S\times A\to \R^{|S||A|}$, then it is linear. 
        \[
        \begin{aligned}
              Q(s,a)&= Q(s,a)+\alpha\left(r+\gamma\max_{a'\in A} Q(s',a')-Q(s,a)\right)\\
              w^T\phi(s,a)&= w^T\phi(s,a)+\alpha\left(r+\gamma\max_{a'\in A} Q(s',a')-Q(s,a)\right)\\
              w&= w+\alpha\left(r+\gamma\max_{a'\in A} Q(s',a')-Q(s,a)\right)\nabla_w Q_w(s,a)
        \end{aligned}
        \]
    \end{proof}
    \item [2.] [\textbf{10pt}] What is the deadly triad in the reinforcement learning? What are the main challenges of using deep learning for function approximation with Q-learning? How does Deep Q-Learning method overcome these challenges?
    The deadly triad in the reinforcement learning are
    \begin{enumerate}
        \item Bootstraping
        \item Function approximation
        \item Off-policy
    \end{enumerate}
    The Deep Q-Learning method overcome the instability caused by the deadly triad interact with statistical estimation issues induced by the bootstrap method used by boostrapping on a separate network and by reducing the overestimation bias. (Use double Q-learning to reduce the overestimation bias.)
    \item [3.] [\textbf{10pt}] Explain how double Q-learning helps with the maximization bias in Q-learning.
    The double Q-learning decouple the action selection and evaluation of action to separate networks.
 \end{enumerate}
 \newpage
 \item [2.] \textbf{The auto-generated results figure} along with a brief description about what has the figures shown.
 \newpage
 \item [3.] \textbf{Any other findings}
 \end{enumerate}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Actual math ends here.  Don't put any content below the \end{document} line.
 %%
 \end{document}
--- a/runs/2025-10-11/20-24-05_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-24-05_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-24-05_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-24-05_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-24-05_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-24-05_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-24-05_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-24-05_/main.log
+++ b/runs/2025-10-11/20-24-05_/main.log
@@ -0,0 +1,7 @@
 [2025-10-11 20:24:05,493][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:173: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 compute_37.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(incompatible_device_warn.format(device_name, capability, " ".join(arch_list), device_name))
--- a/runs/2025-10-11/20-31-16_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-31-16_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-31-16_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-31-16_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-31-16_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-31-16_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-31-16_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-31-16_/main.log
+++ b/runs/2025-10-11/20-31-16_/main.log
@@ -0,0 +1,7 @@
 [2025-10-11 20:31:16,113][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:173: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 compute_37.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(incompatible_device_warn.format(device_name, capability, " ".join(arch_list), device_name))
--- a/runs/2025-10-11/20-35-30_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-35-30_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-35-30_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-35-30_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-35-30_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-35-30_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-35-30_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-35-30_/main.log
+++ b/runs/2025-10-11/20-35-30_/main.log
@@ -0,0 +1,7 @@
 [2025-10-11 20:35:30,859][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:173: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 compute_37.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(incompatible_device_warn.format(device_name, capability, " ".join(arch_list), device_name))
--- a/runs/2025-10-11/20-39-45_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-39-45_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-39-45_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-39-45_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-39-45_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-39-45_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-39-45_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-39-45_/main.log
+++ b/runs/2025-10-11/20-39-45_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:39:45,474][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:39:45,474][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:39:45,476][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:39:47,115][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-41-09_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-41-09_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-41-09_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-41-09_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-41-09_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-41-09_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-41-09_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-41-09_/main.log
+++ b/runs/2025-10-11/20-41-09_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:41:09,978][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:41:09,979][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:41:09,979][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:41:11,670][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-42-26_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-42-26_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-42-26_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-42-26_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-42-26_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-42-26_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-42-26_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-42-26_/main.log
+++ b/runs/2025-10-11/20-42-26_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:42:26,843][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:42:26,844][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:42:26,846][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:42:28,580][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-45-12_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-45-12_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-45-12_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-45-12_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-45-12_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-45-12_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-45-12_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-45-12_/main.log
+++ b/runs/2025-10-11/20-45-12_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:45:12,694][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:45:12,694][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:45:12,696][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:45:14,422][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-45-23_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-45-23_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-45-23_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-45-23_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-45-23_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-45-23_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-45-23_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-45-23_/main.log
+++ b/runs/2025-10-11/20-45-23_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:45:23,927][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:45:23,928][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:45:23,930][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:45:25,714][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-55-32_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-55-32_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-55-32_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-55-32_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-55-32_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-55-32_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-55-32_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-55-32_/main.log
+++ b/runs/2025-10-11/20-55-32_/main.log
@@ -0,0 +1,22 @@
 [2025-10-11 20:55:32,238][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:283: UserWarning: 
    Found GPU0 NVIDIA GeForce RTX 5090 which is of cuda capability 12.0.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (6.1) - (9.0)
  warnings.warn(
 [2025-10-11 20:55:32,238][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:304: UserWarning: 
    Please install PyTorch with a following CUDA
    configurations:  12.8 12.9 following instructions at
    https://pytorch.org/get-started/locally/
  warnings.warn(matched_cuda_warn.format(matched_arches))
 [2025-10-11 20:55:32,240][py.warnings][WARNING] - C:\Users\wuzhe\anaconda3\envs\drl_hw2\lib\site-packages\torch\cuda\__init__.py:326: UserWarning: 
 NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
 The current PyTorch install supports CUDA capabilities sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
 If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
  warnings.warn(
 [2025-10-11 20:55:33,876][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/20-59-59_/.hydra/config.yaml
+++ b/runs/2025-10-11/20-59-59_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/20-59-59_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/20-59-59_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\20-59-59_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/20-59-59_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/20-59-59_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/20-59-59_/main.log
+++ b/runs/2025-10-11/20-59-59_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:00:01,190][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-00-50_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-00-50_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-00-50_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-00-50_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-00-50_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-00-50_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-00-50_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-00-50_/main.log
+++ b/runs/2025-10-11/21-00-50_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:00:52,388][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-03-24_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-03-24_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-03-24_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-03-24_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-03-24_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-03-24_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-03-24_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-03-24_/main.log
+++ b/runs/2025-10-11/21-03-24_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:03:26,154][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-03-35_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-03-35_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-03-35_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-03-35_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-03-35_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-03-35_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-03-35_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-03-35_/main.log
+++ b/runs/2025-10-11/21-03-35_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:03:36,838][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-05-11_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-05-11_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-05-11_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-05-11_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-05-11_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-05-11_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-05-11_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-05-11_/main.log
+++ b/runs/2025-10-11/21-05-11_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:05:12,880][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-07-21_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-07-21_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-07-21_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-07-21_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-07-21_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-07-21_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-07-21_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-07-21_/main.log
+++ b/runs/2025-10-11/21-07-21_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:07:22,911][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-07-44_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-07-44_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-07-44_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-07-44_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-07-44_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-07-44_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-07-44_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-07-44_/main.log
+++ b/runs/2025-10-11/21-07-44_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:07:45,823][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-08-54_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-08-54_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-08-54_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-08-54_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-08-54_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-08-54_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-08-54_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-08-54_/main.log
+++ b/runs/2025-10-11/21-08-54_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:08:56,669][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-09-36_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-09-36_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-09-36_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-09-36_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-09-36_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/runs/2025-10-11/21-09-36_/.hydra/overrides.yaml
+++ b/runs/2025-10-11/21-09-36_/.hydra/overrides.yaml
@@ -0,0 +1 @@
 []
--- a/runs/2025-10-11/21-09-36_/main.log
+++ b/runs/2025-10-11/21-09-36_/main.log
@@ -0,0 +1 @@
 [2025-10-11 21:09:38,404][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer
--- a/runs/2025-10-11/21-10-00_/.hydra/config.yaml
+++ b/runs/2025-10-11/21-10-00_/.hydra/config.yaml
@@ -0,0 +1,33 @@
 seed: 42
 env_name: CartPole-v1
 train:
  nstep: ${buffer.nstep}
  timesteps: 50000
  batch_size: 128
  test_every: 2500
  eps_max: 1
  eps_min: 0.05
  eps_steps: 12500
  start_steps: 0
  plot_interval: 2000
  eval_interval: 2000
  eval_episodes: 10
 agent:
  gamma: 0.99
  lr: 0.002
  tau: 0.1
  nstep: ${buffer.nstep}
  target_update_interval: 3
  hidden_size: 64
  activation:
    _target_: torch.nn.ELU
  use_dueling: false
  use_double: false
 buffer:
  capacity: 50000
  use_per: false
  nstep: 1
  gamma: ${agent.gamma}
  per_alpha: 0.7
  per_beta: 0.4
  per_eps: 0.01
--- a/runs/2025-10-11/21-10-00_/.hydra/hydra.yaml
+++ b/runs/2025-10-11/21-10-00_/.hydra/hydra.yaml
@@ -0,0 +1,154 @@
 hydra:
  run:
    dir: ./runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./sweeps/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
    subdir: ${hydra.job.num}
  launcher:
    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
  sweeper:
    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
    max_batch_size: null
    params: null
  help:
    app_name: ${hydra.job.name}
    header: '${hydra.help.app_name} is powered by Hydra.
      '
    footer: 'Powered by Hydra (https://hydra.cc)
      Use --hydra-help to view Hydra specific help
      '
    template: '${hydra.help.header}
      == Configuration groups ==
      Compose your configuration from those groups (group=option)
      $APP_CONFIG_GROUPS
      == Config ==
      Override anything in the config (foo.bar=value)
      $CONFIG
      ${hydra.help.footer}
      '
  hydra_help:
    template: 'Hydra (${hydra.runtime.version})
      See https://hydra.cc for more info.
      == Flags ==
      $FLAGS_HELP
      == Configuration groups ==
      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
      to command line)
      $HYDRA_CONFIG_GROUPS
      Use ''--cfg hydra'' to Show the Hydra config.
      '
    hydra_help: ???
  hydra_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][HYDRA] %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
    root:
      level: INFO
      handlers:
      - console
    loggers:
      logging_example:
        level: DEBUG
    disable_existing_loggers: false
  job_logging:
    version: 1
    formatters:
      simple:
        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
    handlers:
      console:
        class: logging.StreamHandler
        formatter: simple
        stream: ext://sys.stdout
      file:
        class: logging.FileHandler
        formatter: simple
        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
    root:
      level: INFO
      handlers:
      - console
      - file
    disable_existing_loggers: false
  env: {}
  mode: RUN
  searchpath: []
  callbacks: {}
  output_subdir: .hydra
  overrides:
    hydra:
    - hydra.mode=RUN
    task: []
  job:
    name: main
    chdir: true
    override_dirname: ''
    id: ???
    num: ???
    config_name: config
    env_set: {}
    env_copy: []
    config:
      override_dirname:
        kv_sep: '='
        item_sep: ','
        exclude_keys: []
  runtime:
    version: 1.3.2
    version_base: '1.3'
    cwd: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100
    config_sources:
    - path: hydra.conf
      schema: pkg
      provider: hydra
    - path: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\Homeworks\hw2\hw2\cfgs
      schema: file
      provider: main
    - path: ''
      schema: structured
      provider: schema
    output_dir: D:\Documents\Nextcloud\Documents\Project WUSTL\Academic\2025_Fall\CSE5100\runs\2025-10-11\21-10-00_
    choices:
      hydra/env: default
      hydra/callbacks: null
      hydra/job_logging: default
      hydra/hydra_logging: default
      hydra/hydra_help: default
      hydra/help: default
      hydra/sweeper: basic
      hydra/launcher: basic
      hydra/output: default
  verbose: false
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:00:01,190][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:00:52,388][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:03:26,154][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:03:36,838][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:05:12,880][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:07:22,911][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:07:45,823][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:08:56,669][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`
		`@@ -0,0 +1 @@`
							`[2025-10-11 21:09:38,404][__main__][INFO] - Training for 50000 timesteps with NormalQNetwork and NormalReplayBuffer`