partial update for section 1
This commit is contained in:
BIN
hw3/data/a1/q1/events.out.tfevents.1761423397.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761423397.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761423496.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761423496.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761423594.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761423594.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761423694.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761423694.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761423796.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761423796.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761424103.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761424103.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761424409.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761424409.soragoto-MSI
Normal file
Binary file not shown.
BIN
hw3/data/a1/q1/events.out.tfevents.1761424716.soragoto-MSI
Normal file
BIN
hw3/data/a1/q1/events.out.tfevents.1761424716.soragoto-MSI
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -41,7 +41,7 @@ class ValueCritic(nn.Module):
|
||||
|
||||
############################
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
|
||||
values=self.network(obs)
|
||||
############################
|
||||
|
||||
return values
|
||||
@@ -55,7 +55,12 @@ class ValueCritic(nn.Module):
|
||||
loss = None
|
||||
############################
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
values = self.forward(obs)
|
||||
loss = F.mse_loss(values, q_values)
|
||||
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
############################
|
||||
|
||||
return {
|
||||
|
||||
@@ -148,10 +148,11 @@ class PGAgent(nn.Module):
|
||||
advantages = None
|
||||
############################
|
||||
# YOUR IMPLEMENTATION HERE #
|
||||
source = rewards.copy()
|
||||
source = q_values.copy()
|
||||
mean = np.mean(source)
|
||||
std = np.std(source)
|
||||
advantages = (source - mean)/std
|
||||
# avoid division by zero
|
||||
advantages = (source - mean)/(std+1e-8)
|
||||
############################
|
||||
|
||||
return advantages
|
||||
|
||||
Reference in New Issue
Block a user