import torch
import torch.nn as nn
import torch.nn.functional as F
from fastNLP.modules.encoder.lstm import LSTM
[docs]class Highway(nn.Module):
"""Highway network"""
def __init__(self, input_size):
super(Highway, self).__init__()
self.fc1 = nn.Linear(input_size, input_size, bias=True)
self.fc2 = nn.Linear(input_size, input_size, bias=True)
[docs] def forward(self, x):
t = F.sigmoid(self.fc1(x))
return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1 - t, x)
[docs]class CharLM(nn.Module):
"""CNN + highway network + LSTM
# Input:
4D tensor with shape [batch_size, in_channel, height, width]
# Output:
2D Tensor with shape [batch_size, vocab_size]
# Arguments:
char_emb_dim: the size of each character's attention
word_emb_dim: the size of each word's attention
vocab_size: num of unique words
num_char: num of characters
use_gpu: True or False
"""
def __init__(self, char_emb_dim, word_emb_dim,
vocab_size, num_char):
super(CharLM, self).__init__()
self.char_emb_dim = char_emb_dim
self.word_emb_dim = word_emb_dim
self.vocab_size = vocab_size
# char attention layer
self.char_embed = nn.Embedding(num_char, char_emb_dim)
# convolutions of filters with different sizes
self.convolutions = []
# list of tuples: (the number of filter, width)
self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
for out_channel, filter_width in self.filter_num_width:
self.convolutions.append(
nn.Conv2d(
1, # in_channel
out_channel, # out_channel
kernel_size=(char_emb_dim, filter_width), # (height, width)
bias=True
)
)
self.highway_input_dim = sum([x for x, y in self.filter_num_width])
self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False)
# highway net
self.highway1 = Highway(self.highway_input_dim)
self.highway2 = Highway(self.highway_input_dim)
# LSTM
self.lstm_num_layers = 2
self.lstm = LSTM(self.highway_input_dim, hidden_size=self.word_emb_dim, num_layers=self.lstm_num_layers,
dropout=0.5)
# output layer
self.dropout = nn.Dropout(p=0.5)
self.linear = nn.Linear(self.word_emb_dim, self.vocab_size)
[docs] def forward(self, x):
# Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2]
# Return: Variable of Tensor with shape [num_words, len(word_dict)]
lstm_batch_size = x.size()[0]
lstm_seq_len = x.size()[1]
x = x.contiguous().view(-1, x.size()[2])
# [num_seq*seq_len, max_word_len+2]
x = self.char_embed(x)
# [num_seq*seq_len, max_word_len+2, char_emb_dim]
x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)
# [num_seq*seq_len, 1, max_word_len+2, char_emb_dim]
x = self.conv_layers(x)
# [num_seq*seq_len, total_num_filters]
x = self.batch_norm(x)
# [num_seq*seq_len, total_num_filters]
x = self.highway1(x)
x = self.highway2(x)
# [num_seq*seq_len, total_num_filters]
x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1)
# [num_seq, seq_len, total_num_filters]
x = self.lstm(x)
# [seq_len, num_seq, hidden_size]
x = self.dropout(x)
# [seq_len, num_seq, hidden_size]
x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1)
# [num_seq*seq_len, hidden_size]
x = self.linear(x)
# [num_seq*seq_len, vocab_size]
return x
def conv_layers(self, x):
chosen_list = list()
for conv in self.convolutions:
feature_map = F.tanh(conv(x))
# (batch_size, out_channel, 1, max_word_len-width+1)
chosen = torch.max(feature_map, 3)[0]
# (batch_size, out_channel, 1)
chosen = chosen.squeeze()
# (batch_size, out_channel)
chosen_list.append(chosen)
# (batch_size, total_num_filers)
return torch.cat(chosen_list, 1)