This commit is contained in:
Zheyuan Wu
2025-10-14 20:34:47 -05:00
parent 250f763f1f
commit e74aac95e3
517 changed files with 1418 additions and 16701 deletions

View File

@@ -85,14 +85,13 @@ class NStepReplayBuffer(ReplayBuffer):
"""Get n-step state, action, reward and done for the transition, discard those rewards after done=True"""
############################
# YOUR IMPLEMENTATION HERE #
state, action, reward, done = self.n_step_buffer[0]
state, action, reward, done = self.n_step_buffer.popleft()
# compute n-step discounted reward
gamma = self.gamma
for i in range(1, len(self.n_step_buffer)):
if done:
for i in range(self.n_step - 1):
reward += self.gamma**(i+1) * self.n_step_buffer[i][2]
# ignore done steps
if self.n_step_buffer[i][3]:
break
reward += gamma * self.n_step_buffer[i][2]
gamma *= self.gamma
############################
return state, action, reward, done
@@ -192,11 +191,12 @@ class PrioritizedNStepReplayBuffer(PrioritizedReplayBuffer):
# YOUR IMPLEMENTATION HERE #
state, action, reward, done = self.n_step_buffer[0]
# compute n-step discounted reward
gamma = self.gamma
for i in range(1, len(self.n_step_buffer)):
if done:
state, action, reward, done = self.n_step_buffer.popleft()
# compute n-step discounted reward
for i in range(self.n_step - 1):
reward += self.gamma**(i+1) * self.n_step_buffer[i][2]
# ignore done steps
if self.n_step_buffer[i][3]:
break
reward += gamma * self.n_step_buffer[i][2]
gamma *= self.gamma
############################
return state, action, reward, done