Update to lastest master changes

pschydlo · Jul 16, 2017 · d0b0778 · d0b0778
1 parent 9ad7d3a
commit d0b0778
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 51 deletions.
diff --git a/lxmls/deep_learning/sgd.py b/lxmls/deep_learning/sgd.py
@@ -6,6 +6,48 @@
 import theano.tensor as T
 
 
+def get_numpy_batch_update(model, lrate, theano=False):
+    '''
+    returns a batch update function with the update rule written in numpy 
+    (slow)
+    '''
+
+    # Note that the model and lrates variables will be available inside
+    # batch_up. This happend because a function has acees to the variables in
+    # the scope it was defined (where this comment is) This is known as a
+    # closure.
+
+    if theano:
+
+        def batch_up(batch_x, batch_y):
+            '''
+            Update for theano.shared variables
+            '''
+            # Get gradients for each layer and this batch
+            nabla_params = model.grads(batch_x, batch_y)
+            # Update each parameter with SGD rule
+            for m in np.arange(len(model.params)):
+                # Parameters as theano shared variables
+                model.params[m].set_value(model.params[m].get_value() 
+                                          - lrate*np.array(nabla_params[m]))
+
+    else:
+
+        def batch_up(batch_x, batch_y):
+            '''
+            Update for numpy arrays 
+            '''
+            # Get gradients for each layer and this batch
+            # Get gradients for each layer and this batch
+            nabla_params = model.grads(batch_x, batch_y)
+            # Update each parameter with SGD rule
+            for m in np.arange(len(model.params)):
+                # Parameters as theano shared variables
+                model.params[m] -= lrate * nabla_params[m]
+
+    return batch_up
+
+
 def class_acc(hat_y, y_ref):
     """
     Computes percent accuracy and log probability given estimated and reference
@@ -20,14 +62,12 @@ class indices
 
 
 def sanity_checks(batch_up, n_batch, bsize, lrate, train_set):
+
     if batch_up:
 
-        if not n_batch:
+        if not n_batch or not bsize or not train_set:
             raise ValueError("If you use compiled batch update you need to "
-                             "specify n_batch")
-        if bsize or lrate or train_set:
-            raise ValueError("If you use compiled batch update you can not"
-                             "specify bsize, lrate and train_set")
+                             "specify n_batch, bsize and train_set")
     else:
 
         if not bsize or not lrate or not train_set:
@@ -37,20 +77,21 @@ def sanity_checks(batch_up, n_batch, bsize, lrate, train_set):
 
 def SGD_train(model, n_iter, bsize=None, lrate=None, train_set=None,
               batch_up=None, n_batch=None, devel_set=None, model_dbg=None):
+
     # SANITY CHECKS:
     sanity_checks(batch_up, n_batch, bsize, lrate, train_set)
 
-    if not batch_up:
-        train_x, train_y = train_set
+    train_x, train_y = train_set
 
-        # Number of mini batches
-        n_batch = train_x.shape[1]//bsize + 1
-
-        # Check for Theano vars
+    # If no batch update provided we use the numpy one
+    if not batch_up:
         if getattr(model, "_forward", None):
-            shared_vars = True
-        else:
-            shared_vars = False
+            batch_up = get_numpy_batch_update(model, lrate, theano=True)
+        else:    
+            batch_up = get_numpy_batch_update(model, lrate, theano=False)
+
+    # Number of mini batches
+    n_batch = int(np.ceil(float(train_x.shape[1])/bsize))
 
     # For each iteration run backpropagation in a batch of examples. For
     # each batch, sum up all gradients and update each weights with the
@@ -59,38 +100,19 @@ def SGD_train(model, n_iter, bsize=None, lrate=None, train_set=None,
     prev_p_train = None
     for i in np.arange(n_iter):
         # This will hold the posterior of train data for each epoch
-        p_train = 0
         init_time = time.clock()
         for j in np.arange(n_batch):
-
-            if batch_up:
-                # Compiled batch update
-                p_train += -batch_up(j)
-
-            else:
-
-                # Manual batch update
-
-                # Mini batch
-                batch_x = train_x[:, j*bsize:(j+1)*bsize]
-                batch_y = train_y[j*bsize:(j+1)*bsize]
-
-                # Get gradients for each layer and this batch
-                nabla_params = model.grads(batch_x, batch_y)
-
-                # Update each parameter with SGD rule
-                for m in np.arange(len(model.params)):
-                    if shared_vars:
-                        # Parameters as theano shared variables
-                        model.params[m].set_value(model.params[m].get_value() - lrate*np.array(nabla_params[m]))
-                    else:
-                        # Parameters as numpy array
-                        model.params[m] -= lrate * nabla_params[m]
-
+            # Mini batch
+            batch_x = train_x[:, j*bsize:(j+1)*bsize]
+            batch_y = train_y[j*bsize:(j+1)*bsize]
+            # Update parameters
+            batch_up(batch_x, batch_y)
             # INFO
-            sys.stdout.write("\rBatch %d/%d (%d%%) " %
-                             (j+1, n_batch, (j+1)*100.0/n_batch))
+            print "\rBatch %d/%d (%d%%) " % \
+                  (j+1, n_batch, (j+1)*100.0/n_batch),
             sys.stdout.flush()
+
+        # Record time
         batch_time = time.clock() - init_time
 
         # Check probability of devel set
@@ -101,11 +123,7 @@ def SGD_train(model, n_iter, bsize=None, lrate=None, train_set=None,
             else:
                 delta_p_devel = 0
             prev_p_devel = p_devel
-        if prev_p_train:
-            delta_p_train = p_train - prev_p_train
-        else:
-            delta_p_train = 0
-        prev_p_train = p_train
+
         validation_time = time.clock() - init_time - batch_time
         sys.stdout.write("  Epoch %2d/%2d in %2.2f seg\n" % (i+1, n_iter, batch_time))
         if devel_set:

diff --git a/lxmls/labs/day5.py b/lxmls/labs/day5.py
@@ -44,7 +44,6 @@
 print "\n   Exercise 5.1"
 print "######################"
 
-# TODO: Find another corpus (SemEval?) and cleaner parameter extraction
 import numpy as np
 import lxmls.readers.sentiment_reader as srs
 

diff --git a/lxmls/readers/pos_corpus.py b/lxmls/readers/pos_corpus.py
@@ -70,7 +70,6 @@ def compacify(train_seq, test_seq, dev_seq, theano=False):
         # These must be the same
     #    tmap  = {v: k for k, v in train_seq.x_dict.items()}
     #    tmap2 = {v: k for k, v in train_seq2.x_dict.items()}
-    #    import pdb;pdb.set_trace()
     #    [tmap[i] for i in train_seq[0].x]
     #    [tmap2[i] for i in train_seq2[0].x]
 

diff --git a/lxmls/readers/simple_data_set.py b/lxmls/readers/simple_data_set.py
@@ -7,8 +7,7 @@ class SimpleDataSet:
     """ A simple two dimensional dataset for visualization purposes. The date set contains points from two gaussians with mean u_i and std_i"""
 
     def __init__(self, nr_examples=100, g1=[[-5, -5], 1], g2=[[5, 5], 1], balance=0.5, split=[0.8, 0, 0.2]):
-        # FIXME: cast to int
-        nr_positive = nr_examples * balance  # number of examples of "positive" class
+        nr_positive = int(nr_examples * balance)  # number of examples of "positive" class
         nr_negative = nr_examples - nr_positive  # number of examples of "negative" class
         self.mean1 = g1[0]  # mean of positive class
         self.mean2 = g2[0]  # mean of negative class