partial update for section 1

This commit is contained in:
Trance-0
2025-11-01 16:28:48 -05:00
parent 0f109ac389
commit ac986ec69a
40 changed files with 1439 additions and 3 deletions

View File

@@ -41,7 +41,7 @@ class ValueCritic(nn.Module):
############################
# YOUR IMPLEMENTATION HERE #
values=self.network(obs)
############################
return values
@@ -55,7 +55,12 @@ class ValueCritic(nn.Module):
loss = None
############################
# YOUR IMPLEMENTATION HERE #
values = self.forward(obs)
loss = F.mse_loss(values, q_values)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
############################
return {

View File

@@ -148,10 +148,11 @@ class PGAgent(nn.Module):
advantages = None
############################
# YOUR IMPLEMENTATION HERE #
source = rewards.copy()
source = q_values.copy()
mean = np.mean(source)
std = np.std(source)
advantages = (source - mean)/std
# avoid division by zero
advantages = (source - mean)/(std+1e-8)
############################
return advantages