Add gitignore and clean imports

DPWXY · Jun 20, 2018 · c9e182e · c9e182e
1 parent a42dcd9
commit c9e182e
Show file tree

Hide file tree

Showing 12 changed files with 45 additions and 68 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.py[cod]
+__pycache__
diff --git a/main.py b/main.py
@@ -9,12 +9,10 @@
 import sys
 import argparse
 import random
-import copy
 import torch
 import gc
 import torch.autograd as autograd
 import torch.nn as nn
-import torch.nn.functional as F
 import torch.optim as optim
 import numpy as np
 from utils.metric import get_ner_fmeasure
@@ -24,7 +22,7 @@
 try:
     import cPickle as pickle
 except ModuleNotFoundError:
-    import pickle as pickle
+    import pickle
 
 
 seed_num = 42
@@ -160,6 +158,7 @@ def evaluate(data, model, name, nbest=None):
         instances = data.raw_Ids
     else:
         print("Error: wrong evaluate name,", name)
+        exit(1)
     right_token = 0
     whole_token = 0
     nbest_pred_results = []
@@ -294,7 +293,7 @@ def train(data):
         optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
     else:
         print("Optimizer illegal: %s"%(data.optimizer))
-        exit(0)
+        exit(1)
     best_dev = -10
     # data.HP_iteration = 1
     ## start training
@@ -341,7 +340,7 @@ def train(data):
                 print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
                 if sample_loss > 1e8 or str(sample_loss) == "nan":
                     print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
-                    exit(0)
+                    exit(1)
                 sys.stdout.flush()
                 sample_loss = 0
             loss.backward()
@@ -357,7 +356,7 @@ def train(data):
         print("totalloss:", total_loss)
         if total_loss > 1e8 or str(total_loss) == "nan":
             print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
-            exit(0)
+            exit(1)
         # continue
         speed, acc, p, r, f, _,_ = evaluate(data, model, "dev")
         dev_finish = time.time()

diff --git a/model/charbigru.py b/model/charbigru.py
@@ -5,9 +5,7 @@
 # @Last Modified time: 2018-04-26 13:22:51
 from __future__ import print_function
 import torch
-import torch.autograd as autograd
 import torch.nn as nn
-import torch.nn.functional as F
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 import numpy as np
 
@@ -42,10 +40,10 @@ def random_embedding(self, vocab_size, embedding_dim):
 
     def get_last_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size, word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -59,10 +57,10 @@ def get_last_hiddens(self, input, seq_lengths):
 
     def get_all_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size,  word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, word_length, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -77,4 +75,3 @@ def get_all_hiddens(self, input, seq_lengths):
 
     def forward(self, input, seq_lengths):
         return self.get_all_hiddens(input, seq_lengths)
-
diff --git a/model/charbilstm.py b/model/charbilstm.py
@@ -5,9 +5,7 @@
 # @Last Modified time: 2018-04-26 13:22:34
 from __future__ import print_function
 import torch
-import torch.autograd as autograd
 import torch.nn as nn
-import torch.nn.functional as F
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 import numpy as np
 
@@ -42,10 +40,10 @@ def random_embedding(self, vocab_size, embedding_dim):
 
     def get_last_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size, word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -59,10 +57,10 @@ def get_last_hiddens(self, input, seq_lengths):
 
     def get_all_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size,  word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, word_length, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -77,4 +75,3 @@ def get_all_hiddens(self, input, seq_lengths):
 
     def forward(self, input, seq_lengths):
         return self.get_all_hiddens(input, seq_lengths)
-
diff --git a/model/charcnn.py b/model/charcnn.py
@@ -5,7 +5,6 @@
 # @Last Modified time: 2018-04-26 13:21:40
 from __future__ import print_function
 import torch
-import torch.autograd as autograd
 import torch.nn as nn
 import torch.nn.functional as F
 import numpy as np
@@ -39,10 +38,10 @@ def random_embedding(self, vocab_size, embedding_dim):
 
     def get_last_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size, word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -55,10 +54,10 @@ def get_last_hiddens(self, input, seq_lengths):
 
     def get_all_hiddens(self, input, seq_lengths):
         """
-            input:  
+            input:
                 input: Variable(batch_size,  word_length)
                 seq_lengths: numpy array (batch_size,  1)
-            output: 
+            output:
                 Variable(batch_size, word_length, char_hidden_dim)
             Note it only accepts ordered (length) variable, length size is recorded in seq_lengths
         """
@@ -72,4 +71,3 @@ def get_all_hiddens(self, input, seq_lengths):
 
     def forward(self, input, seq_lengths):
         return self.get_all_hiddens(input, seq_lengths)
-
diff --git a/model/crf.py b/model/crf.py
@@ -8,7 +8,6 @@
 import torch.autograd as autograd
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
 START_TAG = -2
 STOP_TAG = -1
 
@@ -80,22 +79,22 @@ def _calculate_PZ(self, feats, mask):
             # previous to_target is current from_target
             # partition: previous results log(exp(from_target)), #(batch_size * from_target)
             # cur_values: bat_size * from_target * to_target
-            
+
             cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
             cur_partition = log_sum_exp(cur_values, tag_size)
             # print cur_partition.data
-            
+
                 # (bat_size * from_target * to_target) -> (bat_size * to_target)
             # partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
             mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)
-            
+
             ## effective updated partition part, only keep the partition value of mask value = 1
             masked_cur_partition = cur_partition.masked_select(mask_idx)
             ## let mask_idx broadcastable, to disable warning
             mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)
 
             ## replace the partition where the maskvalue=1, other partition value keeps the same
-            partition.masked_scatter_(mask_idx, masked_cur_partition)  
+            partition.masked_scatter_(mask_idx, masked_cur_partition)
         # until the last state, add transition score for all partition (and do log_sum_exp) then select the value in STOP_TAG
         cur_values = self.transitions.view(1,tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
         cur_partition = log_sum_exp(cur_values, tag_size)
@@ -157,7 +156,7 @@ def _viterbi_decode(self, feats, mask):
             partition_history.append(partition)
             ## cur_bp: (batch_size, tag_size) max source score position in current tag
             ## set padded label as 0, which will be filtered in post processing
-            cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0) 
+            cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
             back_points.append(cur_bp)
         # exit(0)
         ### add score to final STOP_TAG
@@ -173,7 +172,7 @@ def _viterbi_decode(self, feats, mask):
             pad_zero = pad_zero.cuda()
         back_points.append(pad_zero)
         back_points  =  torch.cat(back_points).view(seq_len, batch_size, tag_size)
-        
+
         ## select end ids in STOP_TAG
         pointer = last_bp[:, STOP_TAG]
         insert_last = pointer.contiguous().view(batch_size,1,1).expand(batch_size,1, tag_size)
@@ -202,7 +201,7 @@ def _viterbi_decode(self, feats, mask):
     def forward(self, feats):
     	path_score, best_path = self._viterbi_decode(feats)
     	return path_score, best_path
-        
+
 
     def _score_sentence(self, scores, mask, tags):
         """
@@ -217,7 +216,7 @@ def _score_sentence(self, scores, mask, tags):
         batch_size = scores.size(1)
         seq_len = scores.size(0)
         tag_size = scores.size(2)
-        ## convert tag value into a new format, recorded label bigram information to index  
+        ## convert tag value into a new format, recorded label bigram information to index
         new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
         if self.gpu:
             new_tags = new_tags.cuda()
@@ -245,7 +244,7 @@ def _score_sentence(self, scores, mask, tags):
         tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size)  # seq_len * bat_size
         ## mask transpose to (seq_len, batch_size)
         tg_energy = tg_energy.masked_select(mask.transpose(1,0))
-        
+
         # ## calculate the score from START_TAG to first label
         # start_transition = self.transitions[START_TAG,:].view(1, tag_size).expand(batch_size, tag_size)
         # start_energy = torch.gather(start_transition, 1, tags[0,:])
@@ -325,7 +324,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
                 cur_bp = cur_bp*nbest
             partition = partition.transpose(2,1)
             cur_bp = cur_bp.transpose(2,1)
-            
+
             # print partition
             # exit(0)
             #partition: (batch_size * to_target * nbest)
@@ -334,7 +333,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
             ## cur_bp: (batch_size,nbest, tag_size) topn source score position in current tag
             ## set padded label as 0, which will be filtered in post processing
             ## mask[idx] ? mask[idx-1]
-            cur_bp.masked_fill_(mask[idx].view(batch_size, 1, 1).expand(batch_size, tag_size, nbest), 0) 
+            cur_bp.masked_fill_(mask[idx].view(batch_size, 1, 1).expand(batch_size, tag_size, nbest), 0)
             # print cur_bp[0]
             back_points.append(cur_bp)
         ### add score to final STOP_TAG
@@ -348,13 +347,13 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
         end_partition, end_bp = torch.topk(last_values, nbest, 1)
         ## end_partition: (batch, nbest, tag_size)
         end_bp = end_bp.transpose(2,1)
-        # end_bp: (batch, tag_size, nbest)        
+        # end_bp: (batch, tag_size, nbest)
         pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size, nbest)).long()
         if self.gpu:
             pad_zero = pad_zero.cuda()
         back_points.append(pad_zero)
         back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size, nbest)
-        
+
         ## select end ids in STOP_TAG
         pointer = end_bp[:, STOP_TAG, :] ## (batch_size, nbest)
         insert_last = pointer.contiguous().view(batch_size, 1, 1, nbest).expand(batch_size, 1, tag_size, nbest)
@@ -385,7 +384,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
         decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size, nbest))
         if self.gpu:
             decode_idx = decode_idx.cuda()
-        decode_idx[-1] = pointer.data/nbest 
+        decode_idx[-1] = pointer.data/nbest
         # print "pointer-1:",pointer[2]
         # exit(0)
         # use old mask, let 0 means has token
@@ -397,7 +396,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
             decode_idx[idx] = new_pointer.data/nbest
             # # use new pointer to remember the last end nbest ids for non longest
             pointer = new_pointer + pointer.contiguous().view(batch_size,nbest)*mask[idx].view(batch_size,1).expand(batch_size, nbest).long()
-            
+
         # exit(0)
         path_score = None
         decode_idx = decode_idx.transpose(1,0)
@@ -408,7 +407,7 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
         # print decode_idx[:,0,:]
         # exit(0)
 
-        ### calculate probability for each sequence 
+        ### calculate probability for each sequence
         scores = end_partition[:, :, STOP_TAG]
         ## scores: [batch_size, nbest]
         max_scores,_ = torch.max(scores, 1)

diff --git a/model/seqmodel.py b/model/seqmodel.py
@@ -7,10 +7,8 @@
 from __future__ import print_function
 from __future__ import absolute_import
 import torch
-import torch.autograd as autograd
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
 from .wordsequence import WordSequence
 from .crf import CRF
 
@@ -19,7 +17,7 @@ def __init__(self, data):
         super(SeqModel, self).__init__()
         self.use_crf = data.use_crf
         print("build network...")
-        print("use_char: ", data.use_char) 
+        print("use_char: ", data.use_char)
         if data.use_char:
             print("char feature extractor: ", data.char_feature_extractor)
         print("word feature extractor: ", data.word_feature_extractor)
@@ -30,7 +28,7 @@ def __init__(self, data):
         ## add two more label for downlayer lstm, use original label size for CRF
         label_size = data.label_alphabet_size
         data.label_alphabet_size += 2
-        self.word_hidden = WordSequence(data)        
+        self.word_hidden = WordSequence(data)
         if self.use_crf:
             self.crf = CRF(label_size, self.gpu)
 
@@ -83,4 +81,3 @@ def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths, char_input
         scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
         return scores, tag_seq
 
-
diff --git a/model/wordrep.py b/model/wordrep.py
@@ -6,11 +6,8 @@
 from __future__ import print_function
 from __future__ import absolute_import
 import torch
-import torch.autograd as autograd
 import torch.nn as nn
-import torch.nn.functional as F
 import numpy as np
-from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 from .charbilstm import CharBiLSTM
 from .charbigru import CharBiGRU
 from .charcnn import CharCNN
@@ -47,7 +44,7 @@ def __init__(self, data):
             self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
         else:
             self.word_embedding.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
-        
+
         self.feature_num = data.feature_num
         self.feature_embedding_dims = data.feature_emb_dims
         self.feature_embeddings = nn.ModuleList()
@@ -86,7 +83,7 @@ def forward(self, word_inputs,feature_inputs, word_seq_lengths, char_inputs, cha
                 char_inputs: (batch_size*sent_len, word_length)
                 char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                 char_seq_recover: variable which records the char order information, used to recover char order
-            output: 
+            output:
                 Variable(batch_size, sent_len, hidden_dim)
         """
         batch_size = word_inputs.size(0)
@@ -113,4 +110,3 @@ def forward(self, word_inputs,feature_inputs, word_seq_lengths, char_inputs, cha
         word_embs = torch.cat(word_list, 2)
         word_represent = self.drop(word_embs)
         return word_represent
-