diff --git a/.gitignore b/.gitignore
index 5560dac..f8ef896 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 # Python
-# __pycache__/
-# *.pyc
-# *.pyo
-# *.pyd
-# *.pyw
-# *.pyz
\ No newline at end of file
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..a7a34f3
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,17 @@
+name: DRL
+channels:
+  - pytorch
+  - nvidia
+  - defaults
+dependencies:
+  - python=3.10
+  - pip=23.0.1
+  - pip:
+    - gymnasium[classic-control]==0.27.1
+    - hydra-core==1.3.2
+    - matplotlib==3.7.1
+    - moviepy==1.0.3
+    - torch torchvision --index-url https://download.pytorch.org/whl/cu126 # change this to your own cuda version
+    - opencv-python
+    - tensorboardX==2.6.4
+    - tensorboard==2.20.0
\ No newline at end of file
diff --git a/hw3/bash/1-2-experiments.sh b/hw3/bash/1-2-experiments.sh
new file mode 100644
index 0000000..391f19c
--- /dev/null
+++ b/hw3/bash/1-2-experiments.sh
@@ -0,0 +1,8 @@
+python run.py --env_name CartPole-v1 -n 200 -b 1000 --exp_name cartpole
+python run.py --env_name CartPole-v1 -n 200 -b 1000 -rtg --exp_name cartpole_rtg
+python run.py --env_name CartPole-v1 -n 200 -b 1000 -na --exp_name cartpole_na
+python run.py --env_name CartPole-v1 -n 200 -b 1000 -rtg -na --exp_name cartpole_rtg_na
+python run.py --env_name CartPole-v1 -n 200 -b 4000 --exp_name cartpole_lb
+python run.py --env_name CartPole-v1 -n 200 -b 4000 -rtg --exp_name cartpole_lb_rtg
+python run.py --env_name CartPole-v1 -n 200 -b 4000 -na --exp_name cartpole_lb_na
+python run.py --env_name CartPole-v1 -n 200 -b 4000 -rtg -na --exp_name cartpole_lb_rtg_na
\ No newline at end of file
diff --git a/hw3/bash/read-results.sh b/hw3/bash/read-results.sh
new file mode 100644
index 0000000..4761d09
--- /dev/null
+++ b/hw3/bash/read-results.sh
@@ -0,0 +1 @@
+tensorboard --logdir data/pg_cartpole_CartPole-v1_25-10-2025_15-00-04
\ No newline at end of file
diff --git a/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-00-04/events.out.tfevents.1761422404.soragoto-MSI b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-00-04/events.out.tfevents.1761422404.soragoto-MSI
new file mode 100644
index 0000000..34491cb
Binary files /dev/null and b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-00-04/events.out.tfevents.1761422404.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-03-41/events.out.tfevents.1761422621.soragoto-MSI b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-03-41/events.out.tfevents.1761422621.soragoto-MSI
new file mode 100644
index 0000000..70355a7
Binary files /dev/null and b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-03-41/events.out.tfevents.1761422621.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-16-37/events.out.tfevents.1761423397.soragoto-MSI b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-16-37/events.out.tfevents.1761423397.soragoto-MSI
new file mode 100644
index 0000000..983d9ce
Binary files /dev/null and b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-16-37/events.out.tfevents.1761423397.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-59-08/events.out.tfevents.1761425948.soragoto-MSI b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-59-08/events.out.tfevents.1761425948.soragoto-MSI
new file mode 100644
index 0000000..01c2a5b
Binary files /dev/null and b/hw3/data/pg_cartpole_CartPole-v1_25-10-2025_15-59-08/events.out.tfevents.1761425948.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_lb_CartPole-v1_25-10-2025_15-23-16/events.out.tfevents.1761423796.soragoto-MSI b/hw3/data/pg_cartpole_lb_CartPole-v1_25-10-2025_15-23-16/events.out.tfevents.1761423796.soragoto-MSI
new file mode 100644
index 0000000..2853311
Binary files /dev/null and b/hw3/data/pg_cartpole_lb_CartPole-v1_25-10-2025_15-23-16/events.out.tfevents.1761423796.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_lb_na_CartPole-v1_25-10-2025_15-33-29/events.out.tfevents.1761424409.soragoto-MSI b/hw3/data/pg_cartpole_lb_na_CartPole-v1_25-10-2025_15-33-29/events.out.tfevents.1761424409.soragoto-MSI
new file mode 100644
index 0000000..c3bd2ac
Binary files /dev/null and b/hw3/data/pg_cartpole_lb_na_CartPole-v1_25-10-2025_15-33-29/events.out.tfevents.1761424409.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_lb_rtg_CartPole-v1_25-10-2025_15-28-23/events.out.tfevents.1761424103.soragoto-MSI b/hw3/data/pg_cartpole_lb_rtg_CartPole-v1_25-10-2025_15-28-23/events.out.tfevents.1761424103.soragoto-MSI
new file mode 100644
index 0000000..48515fb
Binary files /dev/null and b/hw3/data/pg_cartpole_lb_rtg_CartPole-v1_25-10-2025_15-28-23/events.out.tfevents.1761424103.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_lb_rtg_na_CartPole-v1_25-10-2025_15-38-36/events.out.tfevents.1761424716.soragoto-MSI b/hw3/data/pg_cartpole_lb_rtg_na_CartPole-v1_25-10-2025_15-38-36/events.out.tfevents.1761424716.soragoto-MSI
new file mode 100644
index 0000000..90686c5
Binary files /dev/null and b/hw3/data/pg_cartpole_lb_rtg_na_CartPole-v1_25-10-2025_15-38-36/events.out.tfevents.1761424716.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_na_CartPole-v1_25-10-2025_15-19-54/events.out.tfevents.1761423594.soragoto-MSI b/hw3/data/pg_cartpole_na_CartPole-v1_25-10-2025_15-19-54/events.out.tfevents.1761423594.soragoto-MSI
new file mode 100644
index 0000000..d9a59f8
Binary files /dev/null and b/hw3/data/pg_cartpole_na_CartPole-v1_25-10-2025_15-19-54/events.out.tfevents.1761423594.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_rtg_CartPole-v1_25-10-2025_15-18-16/events.out.tfevents.1761423496.soragoto-MSI b/hw3/data/pg_cartpole_rtg_CartPole-v1_25-10-2025_15-18-16/events.out.tfevents.1761423496.soragoto-MSI
new file mode 100644
index 0000000..0c44f7c
Binary files /dev/null and b/hw3/data/pg_cartpole_rtg_CartPole-v1_25-10-2025_15-18-16/events.out.tfevents.1761423496.soragoto-MSI differ
diff --git a/hw3/data/pg_cartpole_rtg_na_CartPole-v1_25-10-2025_15-21-34/events.out.tfevents.1761423694.soragoto-MSI b/hw3/data/pg_cartpole_rtg_na_CartPole-v1_25-10-2025_15-21-34/events.out.tfevents.1761423694.soragoto-MSI
new file mode 100644
index 0000000..ca4d218
Binary files /dev/null and b/hw3/data/pg_cartpole_rtg_na_CartPole-v1_25-10-2025_15-21-34/events.out.tfevents.1761423694.soragoto-MSI differ
diff --git a/hw3/src/pg_agent.py b/hw3/src/pg_agent.py
index 48ca1bf..44b6a99 100644
--- a/hw3/src/pg_agent.py
+++ b/hw3/src/pg_agent.py
@@ -103,7 +103,7 @@ class PGAgent(nn.Module):
             q_values = None
             ############################
             # YOUR IMPLEMENTATION HERE #
-
+            q_values = [self._discounted_return(reward) for reward in rewards]
             ############################
 
         else:
@@ -114,7 +114,7 @@ class PGAgent(nn.Module):
 
             ############################
             # YOUR IMPLEMENTATION HERE #
-
+            q_values = [self._discounted_reward_to_go(reward) for reward in rewards]
             ############################
 
         return q_values
@@ -148,7 +148,10 @@ class PGAgent(nn.Module):
             advantages = None
             ############################
             # YOUR IMPLEMENTATION HERE #
-
+            source = rewards.copy()
+            mean = np.mean(source)
+            std = np.std(source)
+            advantages = (source - mean)/std
             ############################
 
         return advantages
@@ -166,9 +169,9 @@ class PGAgent(nn.Module):
 
         ############################
         # YOUR IMPLEMENTATION HERE #
-    
+        q_value=sum(self.gamma ** i * reward for i, reward in enumerate(rewards))
+        return [q_value] * len(rewards)
         ############################
-        pass
 
 
     def _discounted_reward_to_go(self, rewards: Sequence[float]) -> Sequence[float]:
@@ -181,6 +184,12 @@ class PGAgent(nn.Module):
 
         ############################
         # YOUR IMPLEMENTATION HERE #
-
+        q_values = []
+        current_sum = 0
+        for t in range (len(rewards)-1,-1,-1):
+            current_sum *= self.gamma
+            current_sum += rewards[t]
+            q_values.append(current_sum)
+        q_values.reverse()
+        return q_values
         ############################
-        pass
diff --git a/hw3/src/policies.py b/hw3/src/policies.py
index 20331b0..e6b262a 100644
--- a/hw3/src/policies.py
+++ b/hw3/src/policies.py
@@ -63,7 +63,9 @@ class MLPPolicy(nn.Module):
 
         ############################
         # YOUR IMPLEMENTATION HERE #
-
+        obs = ptu.from_numpy(obs)
+        action_tensor = self.forward(obs)
+        action = ptu.to_numpy(action_tensor.sample())
         ############################
 
         return action
@@ -80,7 +82,8 @@ class MLPPolicy(nn.Module):
 
             ############################
             # YOUR IMPLEMENTATION HERE #
-
+            logits_prob = self.logits_net(obs)
+            action = distributions.Categorical(logits=logits_prob)
             ############################
 
         else:
@@ -116,7 +119,21 @@ class MLPPolicyPG(MLPPolicy):
 
         ############################
         # YOUR IMPLEMENTATION HERE #
+        
+        dist = self.forward(obs)
 
+        if self.discrete:
+            log_p = dist.log_prob(actions.long())
+        else:
+            log_p = dist.log_prob(actions)
+
+        loss = -torch.mean(advantages * log_p)
+
+        # update gradients
+        self.optimizer.zero_grad()
+        loss.backward()
+        self.optimizer.step()
+        
         ############################
 
         return {
diff --git a/hw3/src/utils.py b/hw3/src/utils.py
index 281d6eb..44429b6 100644
--- a/hw3/src/utils.py
+++ b/hw3/src/utils.py
@@ -33,7 +33,10 @@ def sample_trajectory(
 
         ############################
         # YOUR IMPLEMENTATION HERE #
+        ac = policy.get_action(ob)
 
+        next_ob, rew, terminated, truncated, _ = env.step(ac)
+        rollout_done = terminated or truncated
         ############################