partial update for section 1

2025-11-01 16:28:48 -05:00
parent 0f109ac389
commit ac986ec69a
40 changed files with 1439 additions and 3 deletions
--- a/hw3/src/pg_agent.py
+++ b/hw3/src/pg_agent.py
@@ -148,10 +148,11 @@ class PGAgent(nn.Module):
            advantages = None
            ############################
            # YOUR IMPLEMENTATION HERE #
-            source = rewards.copy()
+            source = q_values.copy()
            mean = np.mean(source)
            std = np.std(source)
-            advantages = (source - mean)/std
+            # avoid division by zero
+            advantages = (source - mean)/(std+1e-8)
            ############################

        return advantages