partial update for section 1

This commit is contained in:
Trance-0
2025-11-01 16:28:48 -05:00
parent 0f109ac389
commit ac986ec69a
40 changed files with 1439 additions and 3 deletions

View File

@@ -148,10 +148,11 @@ class PGAgent(nn.Module):
advantages = None
############################
# YOUR IMPLEMENTATION HERE #
source = rewards.copy()
source = q_values.copy()
mean = np.mean(source)
std = np.std(source)
advantages = (source - mean)/std
# avoid division by zero
advantages = (source - mean)/(std+1e-8)
############################
return advantages