partial update for section 1
This commit is contained in:
@@ -148,10 +148,11 @@ class PGAgent(nn.Module):
|
||||
advantages = None
|
||||
############################
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
source = rewards.copy()
|
||||
source = q_values.copy()
|
||||
mean = np.mean(source)
|
||||
std = np.std(source)
|
||||
advantages = (source - mean)/std
|
||||
# avoid division by zero
|
||||
advantages = (source - mean)/(std+1e-8)
|
||||
############################
|
||||
|
||||
return advantages
|
||||
|
||||
Reference in New Issue
Block a user