python 3

lazyprogrammer · Dec 29, 2017 · 244d1b6 · 244d1b6
1 parent ce6aba1
commit 244d1b6
Show file tree

Hide file tree

Showing 14 changed files with 434 additions and 202 deletions.
diff --git a/nlp_class2/glove.py b/nlp_class2/glove.py
@@ -1,6 +1,12 @@
 # Course URL:
 # https://deeplearningcourses.com/c/natural-language-processing-with-deep-learning-in-python
 # https://udemy.com/natural-language-processing-with-deep-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
 import os
 import json
 import numpy as np
@@ -41,14 +47,14 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
         if not os.path.exists(cc_matrix):
             X = np.zeros((V, V))
             N = len(sentences)
-            print "number of sentences to process:", N
+            print("number of sentences to process:", N)
             it = 0
             for sentence in sentences:
                 it += 1
                 if it % 10000 == 0:
-                    print "processed", it, "/", N
+                    print("processed", it, "/", N)
                 n = len(sentence)
-                for i in xrange(n):
+                for i in range(n):
                     # i is not the word index!!!
                     # j is not the word index!!!
                     # i just points to which element of the sequence (sentence) we're looking at
@@ -72,14 +78,14 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                         X[1,wi] += points
 
                     # left side
-                    for j in xrange(start, i):
+                    for j in range(start, i):
                         wj = sentence[j]
                         points = 1.0 / (i - j) # this is +ve
                         X[wi,wj] += points
                         X[wj,wi] += points
 
                     # right side
-                    for j in xrange(i + 1, end):
+                    for j in range(i + 1, end):
                         wj = sentence[j]
                         points = 1.0 / (j - i) # this is +ve
                         X[wi,wj] += points
@@ -90,21 +96,21 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
         else:
             X = np.load(cc_matrix)
 
-        print "max in X:", X.max()
+        print("max in X:", X.max())
 
         # weighting
         fX = np.zeros((V, V))
         fX[X < xmax] = (X[X < xmax] / float(xmax)) ** alpha
         fX[X >= xmax] = 1
 
-        print "max in f(X):", fX.max()
+        print("max in f(X):", fX.max())
 
         # target
         logX = np.log(X + 1)
 
-        print "max in log(X):", logX.max()
+        print("max in log(X):", logX.max())
 
-        print "time to build co-occurrence matrix:", (datetime.now() - t0)
+        print("time to build co-occurrence matrix:", (datetime.now() - t0))
 
         # initialize weights
         W = np.random.randn(V, D) / np.sqrt(V + D)
@@ -160,11 +166,11 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
 
         costs = []
         sentence_indexes = range(len(sentences))
-        for epoch in xrange(epochs):
+        for epoch in range(epochs):
             delta = W.dot(U.T) + b.reshape(V, 1) + c.reshape(1, V) + mu - logX
             cost = ( fX * delta * delta ).sum()
             costs.append(cost)
-            print "epoch:", epoch, "cost:", cost
+            print("epoch:", epoch, "cost:", cost)
 
             if gd:
                 # gradient descent method
@@ -183,32 +189,32 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 else:
                     # update W
                     oldW = W.copy()
-                    for i in xrange(V):
-                        # for j in xrange(V):
+                    for i in range(V):
+                        # for j in range(V):
                         #     W[i] -= learning_rate*fX[i,j]*(W[i].dot(U[j]) + b[i] + c[j] + mu - logX[i,j])*U[j]
                         W[i] -= learning_rate*(fX[i,:]*delta[i,:]).dot(U)
                     W -= learning_rate*reg*W
                     # print "updated W"
 
                     # update b
-                    for i in xrange(V):
-                        # for j in xrange(V):
+                    for i in range(V):
+                        # for j in range(V):
                         #     b[i] -= learning_rate*fX[i,j]*(W[i].dot(U[j]) + b[i] + c[j] + mu - logX[i,j])
                         b[i] -= learning_rate*fX[i,:].dot(delta[i,:])
                     b -= learning_rate*reg*b
                     # print "updated b"
 
                     # update U
-                    for j in xrange(V):
-                        # for i in xrange(V):
+                    for j in range(V):
+                        # for i in range(V):
                         #     U[j] -= learning_rate*fX[i,j]*(W[i].dot(U[j]) + b[i] + c[j] + mu - logX[i,j])*W[i]
                         U[j] -= learning_rate*(fX[:,j]*delta[:,j]).dot(oldW)
                     U -= learning_rate*reg*U
                     # print "updated U"
 
                     # update c
-                    for j in xrange(V):
-                        # for i in xrange(V):
+                    for j in range(V):
+                        # for i in range(V):
                         #     c[j] -= learning_rate*fX[i,j]*(W[i].dot(U[j]) + b[i] + c[j] + mu - logX[i,j])
                         c[j] -= learning_rate*fX[:,j].dot(delta[:,j])
                     c -= learning_rate*reg*c
@@ -220,8 +226,8 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 # update W
                 # fast way
                 # t0 = datetime.now()
-                for i in xrange(V):
-                    # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(U[j], U[j]) for j in xrange(V)), axis=0)
+                for i in range(V):
+                    # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(U[j], U[j]) for j in range(V)), axis=0)
                     matrix = reg*np.eye(D) + (fX[i,:]*U.T).dot(U)
                     # assert(np.abs(matrix - matrix2).sum() < 1e-5)
                     vector = (fX[i,:]*(logX[i,:] - b[i] - c - mu)).dot(U)
@@ -230,10 +236,10 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
 
                 # slow way
                 # t0 = datetime.now()
-                # for i in xrange(V):
+                # for i in range(V):
                 #     matrix2 = reg*np.eye(D)
                 #     vector2 = 0
-                #     for j in xrange(V):
+                #     for j in range(V):
                 #         matrix2 += fX[i,j]*np.outer(U[j], U[j])
                 #         vector2 += fX[i,j]*(logX[i,j] - b[i] - c[j])*U[j]
                 # print "slow way took:", (datetime.now() - t0)
@@ -244,34 +250,34 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 # print "updated W"
 
                 # update b
-                for i in xrange(V):
+                for i in range(V):
                     denominator = fX[i,:].sum()
                     # assert(denominator > 0)
                     numerator = fX[i,:].dot(logX[i,:] - W[i].dot(U.T) - c - mu)
-                    # for j in xrange(V):
+                    # for j in range(V):
                     #     numerator += fX[i,j]*(logX[i,j] - W[i].dot(U[j]) - c[j])
                     b[i] = numerator / denominator / (1 + reg)
                 # print "updated b"
 
                 # update U
-                for j in xrange(V):
-                    # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(W[i], W[i]) for i in xrange(V)), axis=0)
+                for j in range(V):
+                    # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(W[i], W[i]) for i in range(V)), axis=0)
                     matrix = reg*np.eye(D) + (fX[:,j]*W.T).dot(W)
                     # assert(np.abs(matrix - matrix2).sum() < 1e-8)
                     vector = (fX[:,j]*(logX[:,j] - b - c[j] - mu)).dot(W)
                     # matrix = reg*np.eye(D)
                     # vector = 0
-                    # for i in xrange(V):
+                    # for i in range(V):
                     #     matrix += fX[i,j]*np.outer(W[i], W[i])
                     #     vector += fX[i,j]*(logX[i,j] - b[i] - c[j])*W[i]
                     U[j] = np.linalg.solve(matrix, vector)
                 # print "updated U"
 
                 # update c
-                for j in xrange(V):
+                for j in range(V):
                     denominator = fX[:,j].sum()
                     numerator = fX[:,j].dot(logX[:,j] - W.dot(U[j]) - b  - mu)
-                    # for i in xrange(V):
+                    # for i in range(V):
                     #     numerator += fX[i,j]*(logX[i,j] - W[i].dot(U[j]) - b[i])
                     c[j] = numerator / denominator / (1 + reg)
                 # print "updated c"
@@ -319,28 +325,28 @@ def main(we_file, w2i_file, use_brown=True, n_files=50):
 
     V = len(word2idx)
     model = Glove(100, V, 10)
-    # model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # ALS
-    model.fit(
-        sentences,
-        cc_matrix=cc_matrix,
-        learning_rate=3e-4,
-        reg=0.1,
-        epochs=10,
-        gd=True,
-        use_theano=False,
-        use_tensorflow=True,
-    )
+    model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # ALS
+    # model.fit(
+    #     sentences,
+    #     cc_matrix=cc_matrix,
+    #     learning_rate=3e-4,
+    #     reg=0.1,
+    #     epochs=10,
+    #     gd=True,
+    #     use_theano=False,
+    #     use_tensorflow=True,
+    # )
     model.save(we_file)
 
 
 if __name__ == '__main__':
-    # we = 'glove_model_50.npz'
-    # w2i = 'glove_word2idx_50.json'
-    we = 'glove_model_brown.npz'
-    w2i = 'glove_word2idx_brown.json'
-    main(we, w2i, use_brown=True)
+    we = 'glove_model_50.npz'
+    w2i = 'glove_word2idx_50.json'
+    # we = 'glove_model_brown.npz'
+    # w2i = 'glove_word2idx_brown.json'
+    main(we, w2i, use_brown=False)
     for concat in (True, False):
-        print "** concat:", concat
+        print("** concat:", concat)
         find_analogies('king', 'man', 'woman', concat, we, w2i)
         find_analogies('france', 'paris', 'london', concat, we, w2i)
         find_analogies('france', 'paris', 'rome', concat, we, w2i)

diff --git a/nlp_class2/ner_baseline.py b/nlp_class2/ner_baseline.py
@@ -5,6 +5,12 @@
 # data from https://github.com/aritter/twitter_nlp/blob/master/data/annotated/ner.txt
 # data2 from http://schwa.org/projects/resources/wiki/Wikiner#WikiGold
 
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
 import numpy as np
 from sklearn.utils import shuffle
 from pos_baseline import LogisticRegression
@@ -43,14 +49,14 @@ def get_data(split_sequences=False):
         Xtrain = currentX
         Ytrain = currentY
 
-    print "number of samples:", len(Xtrain)
+    print("number of samples:", len(Xtrain))
     Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
     Ntest = int(0.3*len(Xtrain))
     Xtest = Xtrain[:Ntest]
     Ytest = Ytrain[:Ntest]
     Xtrain = Xtrain[Ntest:]
     Ytrain = Ytrain[Ntest:]
-    print "number of classes:", len(tag2idx)
+    print("number of classes:", len(tag2idx))
     return Xtrain, Ytrain, Xtest, Ytest, word2idx, tag2idx
 
 
@@ -88,32 +94,32 @@ def get_data(split_sequences=False):
 #         Xtrain = np.concatenate(Xtrain)
 #         Ytrain = np.concatenate(Ytrain)
 
-#     print "number of samples:", len(Xtrain)
+#     print("number of samples:", len(Xtrain))
 #     Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
 #     Ntest = int(0.3*len(Xtrain))
 #     Xtest = Xtrain[:Ntest]
 #     Ytest = Ytrain[:Ntest]
 #     Xtrain = Xtrain[Ntest:]
 #     Ytrain = Ytrain[Ntest:]
-#     print "number of classes:", len(tag2idx)
+#     print("number of classes:", len(tag2idx))
 #     return Xtrain, Ytrain, Xtest, Ytest, word2idx, tag2idx
 
 
 def main():
     Xtrain, Ytrain, Xtest, Ytest, word2idx, tag2idx = get_data()
 
     V = len(word2idx)
-    print "vocabulary size:", V
+    print("vocabulary size:", V)
     K = len(tag2idx)
 
     # train and score
     model = LogisticRegression()
     model.fit(Xtrain, Ytrain, V=V, K=K, epochs=5)
-    print "training complete"
-    print "train score:", model.score(Xtrain, Ytrain)
-    print "train f1 score:", model.f1_score(Xtrain, Ytrain)
-    print "test score:", model.score(Xtest, Ytest)
-    print "test f1 score:", model.f1_score(Xtest, Ytest)
+    print("training complete")
+    print("train score:", model.score(Xtrain, Ytrain))
+    print("train f1 score:", model.f1_score(Xtrain, Ytrain))
+    print("test score:", model.score(Xtest, Ytest))
+    print("test f1 score:", model.f1_score(Xtest, Ytest))
 
 if __name__ == '__main__':
     main()
diff --git a/nlp_class2/ner_rnn.py b/nlp_class2/ner_rnn.py
@@ -1,6 +1,12 @@
 # Course URL:
 # https://deeplearningcourses.com/c/natural-language-processing-with-deep-learning-in-python
 # https://udemy.com/natural-language-processing-with-deep-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
 from ner_baseline import get_data
 from pos_rnn import RNN
 
@@ -10,8 +16,10 @@ def main():
     K = len(tag2idx)
     rnn = RNN(10, [10], V, K)
     rnn.fit(Xtrain, Ytrain, epochs=70)
-    print "train f1 score:", rnn.f1_score(Xtrain, Ytrain)
-    print "test f1 score:", rnn.f1_score(Xtest, Ytest)
+    print("train score:", rnn.score(Xtrain, Ytrain))
+    print("test score:", rnn.score(Xtest, Ytest))
+    print("train f1 score:", rnn.f1_score(Xtrain, Ytrain))
+    print("test f1 score:", rnn.f1_score(Xtest, Ytest))
 
 
 if __name__ == '__main__':