updates
This commit is contained in:
@@ -85,14 +85,13 @@ class NStepReplayBuffer(ReplayBuffer):
|
||||
"""Get n-step state, action, reward and done for the transition, discard those rewards after done=True"""
|
||||
############################
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
state, action, reward, done = self.n_step_buffer[0]
|
||||
state, action, reward, done = self.n_step_buffer.popleft()
|
||||
# compute n-step discounted reward
|
||||
gamma = self.gamma
|
||||
for i in range(1, len(self.n_step_buffer)):
|
||||
if done:
|
||||
for i in range(self.n_step - 1):
|
||||
reward += self.gamma**(i+1) * self.n_step_buffer[i][2]
|
||||
# ignore done steps
|
||||
if self.n_step_buffer[i][3]:
|
||||
break
|
||||
reward += gamma * self.n_step_buffer[i][2]
|
||||
gamma *= self.gamma
|
||||
############################
|
||||
return state, action, reward, done
|
||||
|
||||
@@ -192,11 +191,12 @@ class PrioritizedNStepReplayBuffer(PrioritizedReplayBuffer):
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
state, action, reward, done = self.n_step_buffer[0]
|
||||
# compute n-step discounted reward
|
||||
gamma = self.gamma
|
||||
for i in range(1, len(self.n_step_buffer)):
|
||||
if done:
|
||||
state, action, reward, done = self.n_step_buffer.popleft()
|
||||
# compute n-step discounted reward
|
||||
for i in range(self.n_step - 1):
|
||||
reward += self.gamma**(i+1) * self.n_step_buffer[i][2]
|
||||
# ignore done steps
|
||||
if self.n_step_buffer[i][3]:
|
||||
break
|
||||
reward += gamma * self.n_step_buffer[i][2]
|
||||
gamma *= self.gamma
|
||||
############################
|
||||
return state, action, reward, done
|
||||
Reference in New Issue
Block a user