Skip to content

Commit

Permalink
Add gitignore and clean imports
Browse files Browse the repository at this point in the history
  • Loading branch information
frostming committed Jun 20, 2018
1 parent a42dcd9 commit c9e182e
Show file tree
Hide file tree
Showing 12 changed files with 45 additions and 68 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.py[cod]
__pycache__
11 changes: 5 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
import sys
import argparse
import random
import copy
import torch
import gc
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from utils.metric import get_ner_fmeasure
Expand All @@ -24,7 +22,7 @@
try:
import cPickle as pickle
except ModuleNotFoundError:
import pickle as pickle
import pickle


seed_num = 42
Expand Down Expand Up @@ -160,6 +158,7 @@ def evaluate(data, model, name, nbest=None):
instances = data.raw_Ids
else:
print("Error: wrong evaluate name,", name)
exit(1)
right_token = 0
whole_token = 0
nbest_pred_results = []
Expand Down Expand Up @@ -294,7 +293,7 @@ def train(data):
optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
else:
print("Optimizer illegal: %s"%(data.optimizer))
exit(0)
exit(1)
best_dev = -10
# data.HP_iteration = 1
## start training
Expand Down Expand Up @@ -341,7 +340,7 @@ def train(data):
print(" Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
if sample_loss > 1e8 or str(sample_loss) == "nan":
print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
exit(0)
exit(1)
sys.stdout.flush()
sample_loss = 0
loss.backward()
Expand All @@ -357,7 +356,7 @@ def train(data):
print("totalloss:", total_loss)
if total_loss > 1e8 or str(total_loss) == "nan":
print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
exit(0)
exit(1)
# continue
speed, acc, p, r, f, _,_ = evaluate(data, model, "dev")
dev_finish = time.time()
Expand Down
11 changes: 4 additions & 7 deletions model/charbigru.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
# @Last Modified time: 2018-04-26 13:22:51
from __future__ import print_function
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np

Expand Down Expand Up @@ -42,10 +40,10 @@ def random_embedding(self, vocab_size, embedding_dim):

def get_last_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -59,10 +57,10 @@ def get_last_hiddens(self, input, seq_lengths):

def get_all_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, word_length, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -77,4 +75,3 @@ def get_all_hiddens(self, input, seq_lengths):

def forward(self, input, seq_lengths):
return self.get_all_hiddens(input, seq_lengths)

11 changes: 4 additions & 7 deletions model/charbilstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
# @Last Modified time: 2018-04-26 13:22:34
from __future__ import print_function
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np

Expand Down Expand Up @@ -42,10 +40,10 @@ def random_embedding(self, vocab_size, embedding_dim):

def get_last_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -59,10 +57,10 @@ def get_last_hiddens(self, input, seq_lengths):

def get_all_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, word_length, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -77,4 +75,3 @@ def get_all_hiddens(self, input, seq_lengths):

def forward(self, input, seq_lengths):
return self.get_all_hiddens(input, seq_lengths)

10 changes: 4 additions & 6 deletions model/charcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# @Last Modified time: 2018-04-26 13:21:40
from __future__ import print_function
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
Expand Down Expand Up @@ -39,10 +38,10 @@ def random_embedding(self, vocab_size, embedding_dim):

def get_last_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -55,10 +54,10 @@ def get_last_hiddens(self, input, seq_lengths):

def get_all_hiddens(self, input, seq_lengths):
"""
input:
input:
input: Variable(batch_size, word_length)
seq_lengths: numpy array (batch_size, 1)
output:
output:
Variable(batch_size, word_length, char_hidden_dim)
Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
"""
Expand All @@ -72,4 +71,3 @@ def get_all_hiddens(self, input, seq_lengths):

def forward(self, input, seq_lengths):
return self.get_all_hiddens(input, seq_lengths)

33 changes: 16 additions & 17 deletions model/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
START_TAG = -2
STOP_TAG = -1

Expand Down Expand Up @@ -80,22 +79,22 @@ def _calculate_PZ(self, feats, mask):
# previous to_target is current from_target
# partition: previous results log(exp(from_target)), #(batch_size * from_target)
# cur_values: bat_size * from_target * to_target

cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
cur_partition = log_sum_exp(cur_values, tag_size)
# print cur_partition.data

# (bat_size * from_target * to_target) -> (bat_size * to_target)
# partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)

## effective updated partition part, only keep the partition value of mask value = 1
masked_cur_partition = cur_partition.masked_select(mask_idx)
## let mask_idx broadcastable, to disable warning
mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)

## replace the partition where the maskvalue=1, other partition value keeps the same
partition.masked_scatter_(mask_idx, masked_cur_partition)
partition.masked_scatter_(mask_idx, masked_cur_partition)
# until the last state, add transition score for all partition (and do log_sum_exp) then select the value in STOP_TAG
cur_values = self.transitions.view(1,tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
cur_partition = log_sum_exp(cur_values, tag_size)
Expand Down Expand Up @@ -157,7 +156,7 @@ def _viterbi_decode(self, feats, mask):
partition_history.append(partition)
## cur_bp: (batch_size, tag_size) max source score position in current tag
## set padded label as 0, which will be filtered in post processing
cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
back_points.append(cur_bp)
# exit(0)
### add score to final STOP_TAG
Expand All @@ -173,7 +172,7 @@ def _viterbi_decode(self, feats, mask):
pad_zero = pad_zero.cuda()
back_points.append(pad_zero)
back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size)

## select end ids in STOP_TAG
pointer = last_bp[:, STOP_TAG]
insert_last = pointer.contiguous().view(batch_size,1,1).expand(batch_size,1, tag_size)
Expand Down Expand Up @@ -202,7 +201,7 @@ def _viterbi_decode(self, feats, mask):
def forward(self, feats):
path_score, best_path = self._viterbi_decode(feats)
return path_score, best_path


def _score_sentence(self, scores, mask, tags):
"""
Expand All @@ -217,7 +216,7 @@ def _score_sentence(self, scores, mask, tags):
batch_size = scores.size(1)
seq_len = scores.size(0)
tag_size = scores.size(2)
## convert tag value into a new format, recorded label bigram information to index
## convert tag value into a new format, recorded label bigram information to index
new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
if self.gpu:
new_tags = new_tags.cuda()
Expand Down Expand Up @@ -245,7 +244,7 @@ def _score_sentence(self, scores, mask, tags):
tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size) # seq_len * bat_size
## mask transpose to (seq_len, batch_size)
tg_energy = tg_energy.masked_select(mask.transpose(1,0))

# ## calculate the score from START_TAG to first label
# start_transition = self.transitions[START_TAG,:].view(1, tag_size).expand(batch_size, tag_size)
# start_energy = torch.gather(start_transition, 1, tags[0,:])
Expand Down Expand Up @@ -325,7 +324,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
cur_bp = cur_bp*nbest
partition = partition.transpose(2,1)
cur_bp = cur_bp.transpose(2,1)

# print partition
# exit(0)
#partition: (batch_size * to_target * nbest)
Expand All @@ -334,7 +333,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
## cur_bp: (batch_size,nbest, tag_size) topn source score position in current tag
## set padded label as 0, which will be filtered in post processing
## mask[idx] ? mask[idx-1]
cur_bp.masked_fill_(mask[idx].view(batch_size, 1, 1).expand(batch_size, tag_size, nbest), 0)
cur_bp.masked_fill_(mask[idx].view(batch_size, 1, 1).expand(batch_size, tag_size, nbest), 0)
# print cur_bp[0]
back_points.append(cur_bp)
### add score to final STOP_TAG
Expand All @@ -348,13 +347,13 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
end_partition, end_bp = torch.topk(last_values, nbest, 1)
## end_partition: (batch, nbest, tag_size)
end_bp = end_bp.transpose(2,1)
# end_bp: (batch, tag_size, nbest)
# end_bp: (batch, tag_size, nbest)
pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size, nbest)).long()
if self.gpu:
pad_zero = pad_zero.cuda()
back_points.append(pad_zero)
back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size, nbest)

## select end ids in STOP_TAG
pointer = end_bp[:, STOP_TAG, :] ## (batch_size, nbest)
insert_last = pointer.contiguous().view(batch_size, 1, 1, nbest).expand(batch_size, 1, tag_size, nbest)
Expand Down Expand Up @@ -385,7 +384,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size, nbest))
if self.gpu:
decode_idx = decode_idx.cuda()
decode_idx[-1] = pointer.data/nbest
decode_idx[-1] = pointer.data/nbest
# print "pointer-1:",pointer[2]
# exit(0)
# use old mask, let 0 means has token
Expand All @@ -397,7 +396,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
decode_idx[idx] = new_pointer.data/nbest
# # use new pointer to remember the last end nbest ids for non longest
pointer = new_pointer + pointer.contiguous().view(batch_size,nbest)*mask[idx].view(batch_size,1).expand(batch_size, nbest).long()

# exit(0)
path_score = None
decode_idx = decode_idx.transpose(1,0)
Expand All @@ -408,7 +407,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
# print decode_idx[:,0,:]
# exit(0)

### calculate probability for each sequence
### calculate probability for each sequence
scores = end_partition[:, :, STOP_TAG]
## scores: [batch_size, nbest]
max_scores,_ = torch.max(scores, 1)
Expand Down
7 changes: 2 additions & 5 deletions model/seqmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
from __future__ import print_function
from __future__ import absolute_import
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from .wordsequence import WordSequence
from .crf import CRF

Expand All @@ -19,7 +17,7 @@ def __init__(self, data):
super(SeqModel, self).__init__()
self.use_crf = data.use_crf
print("build network...")
print("use_char: ", data.use_char)
print("use_char: ", data.use_char)
if data.use_char:
print("char feature extractor: ", data.char_feature_extractor)
print("word feature extractor: ", data.word_feature_extractor)
Expand All @@ -30,7 +28,7 @@ def __init__(self, data):
## add two more label for downlayer lstm, use original label size for CRF
label_size = data.label_alphabet_size
data.label_alphabet_size += 2
self.word_hidden = WordSequence(data)
self.word_hidden = WordSequence(data)
if self.use_crf:
self.crf = CRF(label_size, self.gpu)

Expand Down Expand Up @@ -83,4 +81,3 @@ def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths, char_input
scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
return scores, tag_seq


8 changes: 2 additions & 6 deletions model/wordrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@
from __future__ import print_function
from __future__ import absolute_import
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from .charbilstm import CharBiLSTM
from .charbigru import CharBiGRU
from .charcnn import CharCNN
Expand Down Expand Up @@ -47,7 +44,7 @@ def __init__(self, data):
self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
else:
self.word_embedding.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))

self.feature_num = data.feature_num
self.feature_embedding_dims = data.feature_emb_dims
self.feature_embeddings = nn.ModuleList()
Expand Down Expand Up @@ -86,7 +83,7 @@ def forward(self, word_inputs,feature_inputs, word_seq_lengths, char_inputs, cha
char_inputs: (batch_size*sent_len, word_length)
char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
char_seq_recover: variable which records the char order information, used to recover char order
output:
output:
Variable(batch_size, sent_len, hidden_dim)
"""
batch_size = word_inputs.size(0)
Expand All @@ -113,4 +110,3 @@ def forward(self, word_inputs,feature_inputs, word_seq_lengths, char_inputs, cha
word_embs = torch.cat(word_list, 2)
word_represent = self.drop(word_embs)
return word_represent

Loading

0 comments on commit c9e182e

Please sign in to comment.