done done done part A

This commit is contained in:
Zheyuan Wu
2025-10-25 16:01:19 -05:00
parent 892131cfd4
commit 0f109ac389
18 changed files with 70 additions and 15 deletions

View File

@@ -103,7 +103,7 @@ class PGAgent(nn.Module):
q_values = None
############################
# YOUR IMPLEMENTATION HERE #
q_values = [self._discounted_return(reward) for reward in rewards]
############################
else:
@@ -114,7 +114,7 @@ class PGAgent(nn.Module):
############################
# YOUR IMPLEMENTATION HERE #
q_values = [self._discounted_reward_to_go(reward) for reward in rewards]
############################
return q_values
@@ -148,7 +148,10 @@ class PGAgent(nn.Module):
advantages = None
############################
# YOUR IMPLEMENTATION HERE #
source = rewards.copy()
mean = np.mean(source)
std = np.std(source)
advantages = (source - mean)/std
############################
return advantages
@@ -166,9 +169,9 @@ class PGAgent(nn.Module):
############################
# YOUR IMPLEMENTATION HERE #
q_value=sum(self.gamma ** i * reward for i, reward in enumerate(rewards))
return [q_value] * len(rewards)
############################
pass
def _discounted_reward_to_go(self, rewards: Sequence[float]) -> Sequence[float]:
@@ -181,6 +184,12 @@ class PGAgent(nn.Module):
############################
# YOUR IMPLEMENTATION HERE #
q_values = []
current_sum = 0
for t in range (len(rewards)-1,-1,-1):
current_sum *= self.gamma
current_sum += rewards[t]
q_values.append(current_sum)
q_values.reverse()
return q_values
############################
pass