107 lines
4.0 KiB
Python
107 lines
4.0 KiB
Python
from hydra.utils import instantiate
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
# additional imports for extra credit
|
|
import math
|
|
import torch.nn.functional as F
|
|
|
|
|
|
class QNetwork(nn.Module):
|
|
def __init__(self, state_size, action_size, hidden_size, activation):
|
|
super(QNetwork, self).__init__()
|
|
self.q_head = nn.Sequential(
|
|
nn.Linear(state_size, hidden_size),
|
|
instantiate(activation),
|
|
nn.Linear(hidden_size, hidden_size),
|
|
instantiate(activation),
|
|
nn.Linear(hidden_size, action_size)
|
|
)
|
|
|
|
def forward(self, state):
|
|
Qs = self.q_head(state)
|
|
return Qs
|
|
|
|
|
|
class DuelingQNetwork(nn.Module):
|
|
def __init__(self, state_size, action_size, hidden_size, activation):
|
|
super(DuelingQNetwork, self).__init__()
|
|
self.feature_layer = nn.Sequential(
|
|
nn.Linear(state_size, hidden_size),
|
|
instantiate(activation),
|
|
)
|
|
self.value_head = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size),
|
|
instantiate(activation),
|
|
nn.Linear(hidden_size, 1)
|
|
)
|
|
self.advantage_head = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size),
|
|
instantiate(activation),
|
|
nn.Linear(hidden_size, action_size)
|
|
)
|
|
|
|
def forward(self, state):
|
|
"""
|
|
Get the Q value of the current state and action using dueling network
|
|
"""
|
|
############################
|
|
# YOUR IMPLEMENTATION HERE #
|
|
|
|
# using equation (7) on https://arxiv.org/pdf/1511.06581
|
|
Qs=self.value_head(self.feature_layer(state))+self.advantage_head(self.feature_layer(state))
|
|
############################
|
|
return Qs
|
|
|
|
# Extra credit: implementing Noisy DQN
|
|
class NoisyLinear(nn.Linear):
|
|
|
|
# code reference from:
|
|
# (1) https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On/blob/baa9d013596ea8ea8ed6826b9de6679d98b897ca/Chapter07/lib/dqn_model.py#L9
|
|
# (2) https://github.com/thomashirtz/noisy-networks/blob/main/noisynetworks.py
|
|
|
|
def __init__(self, in_features, out_features, sigma_init=0.5, bias=True):
|
|
super().__init__(in_features, out_features, bias=bias)
|
|
# assume noise is gaussian, set sigma as learnable parameters
|
|
self.sigma_weight = nn.Parameter(torch.full((out_features, in_features), sigma_init))
|
|
self.register_buffer('epsilon_weight', torch.full((out_features, in_features), sigma_init))
|
|
if bias:
|
|
self.sigma_bias = nn.Parameter(torch.full((out_features,), sigma_init))
|
|
self.register_buffer('epsilon_bias', torch.full((out_features,), sigma_init))
|
|
|
|
self.reset_parameters()
|
|
|
|
def reset_parameters(self):
|
|
"""
|
|
Reset the weights and bias of the noisy linear layer to a uniform distribution with std dev of sqrt(3 / in_features)
|
|
"""
|
|
std = math.sqrt(3 / self.in_features)
|
|
self.weight.data.uniform_(-std, std)
|
|
self.bias.data.uniform_(-std, std)
|
|
|
|
def forward(self, input):
|
|
"""
|
|
Forward pass of noisy linear layer, adding gaussian noise to the weight and bias
|
|
"""
|
|
self.epsilon_weight.normal_()
|
|
weight = self.weight + self.sigma_weight * self.epsilon_weight.data
|
|
bias = self.bias
|
|
if bias is not None:
|
|
self.epsilon_bias.normal_()
|
|
bias = bias + self.sigma_bias * self.epsilon_bias.data
|
|
return F.linear(input, weight, bias)
|
|
|
|
class NoisyQNetwork(nn.Module):
|
|
def __init__(self, state_size, action_size, hidden_size, activation, sigma_init=0.5):
|
|
super(NoisyQNetwork, self).__init__()
|
|
self.q_head = nn.Sequential(
|
|
NoisyLinear(state_size, hidden_size, sigma_init=sigma_init),
|
|
instantiate(activation),
|
|
NoisyLinear(hidden_size, hidden_size, sigma_init=sigma_init),
|
|
instantiate(activation),
|
|
NoisyLinear(hidden_size, action_size, sigma_init=sigma_init)
|
|
)
|
|
|
|
def forward(self, state):
|
|
Qs = self.q_head(state)
|
|
return Qs |