-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathencoder.py
124 lines (107 loc) · 4.52 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import numpy as np
import time
import logging
import torch
from torch.autograd import Variable
import torch.nn as nn
logger = logging.getLogger(__name__)
def reverse_padded_sequence(inputs, lengths, batch_first=False):
"""Reverses sequences according to their lengths.
Inputs should have size ``T x B x *`` if ``batch_first`` is False, or
``B x T x *`` if True. T is the length of the longest sequence (or larger),
B is the batch size, and * is any number of dimensions (including 0).
Arguments:
inputs (Variable): padded batch of variable length sequences.
lengths (list[int]): list of sequence lengths
batch_first (bool, optional): if True, inputs should be B x T x *.
Returns:
A Variable with the same size as inputs, but with each sequence
reversed according to its length.
"""
if batch_first:
inputs = inputs.transpose(0, 1)
max_length, batch_size = inputs.size(0), inputs.size(1)
if len(lengths) != batch_size:
raise ValueError('inputs is incompatible with lengths.')
ind = [list(reversed(range(0, length))) + list(range(length, max_length))
for length in lengths]
ind = Variable(torch.LongTensor(ind).transpose(0, 1))
for dim in range(2, inputs.dim()):
ind = ind.unsqueeze(dim)
ind = ind.expand_as(inputs)
if inputs.is_cuda:
ind = ind.cuda(inputs.get_device())
reversed_inputs = torch.gather(inputs, 0, ind)
if batch_first:
reversed_inputs = reversed_inputs.transpose(0, 1)
return reversed_inputs
def DisSentPool(pool_type, sent_len, sent_output):
if pool_type == "mean":
sent_len = Variable(torch.FloatTensor(sent_len)).unsqueeze(1)
emb = torch.sum(sent_output, 0).squeeze(0)
emb = emb / sent_len.expand_as(emb)
elif pool_type == "max":
emb = torch.max(sent_output, 0)[0]
return emb
class BLSTMEncoder(nn.Module):
def __init__(self, word_emb_dim, hidden_size, device):
super(BLSTMEncoder, self).__init__()
self.word_emb_dim = word_emb_dim
self.enc_lstm_dim = hidden_size
self.tied_weights = False
self.biflag = None
self.device = device
bidrectional = True if not self.tied_weights else False
logger.info("tied weights = {}, using biredictional cell: {}".format(self.tied_weights, bidrectional))
self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, 1,
bidirectional=bidrectional, dropout=0.1)
self.emb_drop = nn.Dropout(0.1)
def is_cuda(self):
# either all weights are on cpu or they are on gpu
return 'cuda' in str(self.device)
#return 'cuda' in str(type(self.enc_lstm.bias_hh_l0.data))
def forward(self, sent_tuple):
# sent_len: [max_len, ..., min_len] (bsize)
# sent: Variable(seqlen x bsize x worddim)
sent, sent_len = sent_tuple
# Sort by length (keep idx)
sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len)
idx_unsort = np.argsort(idx_sort)
# uncomment if the input is np array
#idx_sort = torch.from_numpy(idx_sort).cuda() if self.is_cuda() \
# else torch.from_numpy(idx_sort)
idx_sort = torch.from_numpy(idx_sort).to(self.device)
sent = sent.index_select(1, idx_sort)
# apply input dropout
# sent = self.emb_drop(sent)
# Handling padding in Recurrent Networks
len_tmp=torch.tensor([i for i in sent_len]).to(self.device) # prevent given numpy array are negative error
sent_packed = nn.utils.rnn.pack_padded_sequence(sent, len_tmp)
#print("sent packed ", sent_packed )
sent_output = self.enc_lstm(sent_packed)[0] # seqlen x batch x 2*nhid
sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # L X B X H
if self.tied_weights:
# we also compute reverse
sent_rev = reverse_padded_sequence(sent, sent_len)
sent_rev_packed = nn.utils.rnn.pack_padded_sequence(sent_rev, sent_len)
rev_sent_output = self.enc_lstm(sent_rev_packed)[0]
rev_sent_output = nn.utils.rnn.pad_packed_sequence(rev_sent_output)[0]
back_sent_output = reverse_padded_sequence(rev_sent_output, sent_len)
sent_output = sent_output + back_sent_output
# Un-sort by length, uncomment if the input is np array
#idx_unsort = torch.from_numpy(idx_unsort).cuda() if self.is_cuda() \
# else torch.from_numpy(idx_unsort)
idx_unsort = torch.from_numpy(idx_unsort).to(self.device)
sent_output = sent_output.index_select(1, Variable(idx_unsort)) # L X B X H
# Pooling used in Allen Nie's work
# Comment out for GCN layer
"""
if self.pool_type == "mean":
sent_len = Variable(torch.FloatTensor(sent_len)).unsqueeze(1).cuda()
emb = torch.sum(sent_output, 0).squeeze(0)
emb = emb / sent_len.expand_as(emb)
elif self.pool_type == "max":
emb = torch.max(sent_output, 0)[0]
return emb
"""
return sent_output