Merge pull request LxMLS#106 from JoaoLages/develop

Fix solution and description of exercises 2.10 and 2.11
clala1 · Jun 8, 2018 · f06306a · f06306a
2 parents d5e8d23 + 1a63c62
commit f06306a
Showing 1 changed file with 20 additions and 31 deletions.
diff --git a/labs/notebooks/sequence_models/Exercises_2.1_to_2.11.ipynb b/labs/notebooks/sequence_models/Exercises_2.1_to_2.11.ipynb
@@ -532,7 +532,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Excercice 2.10\n",
+    "# Exercice 2.10\n",
     "Implement the method to update the counts given the state and transition posteriors\n",
     "\n",
     "```\n",
@@ -548,37 +548,26 @@
    "source": [
     "### Solution\n",
     "````\n",
-    "def train_EM(self, dataset, smoothing=0, num_epochs=10, evaluate=True):\n",
-    "    self.initialize_random()\n",
-    "\n",
-    "    if evaluate:\n",
-    "        acc = self.evaluate_EM(dataset)\n",
-    "        print \"Initial accuracy: %f\"%(acc)\n",
+    "def update_counts(self, sequence, state_posteriors, transition_posteriors):\n",
+    "    \"\"\" Used in the E-step in EM.\"\"\"\n",
     "\n",
-    "    for t in xrange(1, num_epochs):\n",
-    "        #E-Step\n",
-    "        total_log_likelihood = 0.0\n",
-    "        self.clear_counts(smoothing)\n",
-    "        for sequence in dataset.seq_list:\n",
-    "            # Compute scores given the observation sequence.\n",
-    "            initial_scores, transition_scores, final_scores, emission_scores = \\\n",
-    "                self.compute_scores(sequence)\n",
+    "    num_states = self.get_num_states()  # Number of states.\n",
+    "    length = len(sequence.x)  # Length of the sequence.\n",
     "\n",
-    "            state_posteriors, transition_posteriors, log_likelihood = \\\n",
-    "                self.compute_posteriors(initial_scores,\n",
-    "                                        transition_scores,\n",
-    "                                        final_scores,\n",
-    "                                        emission_scores)\n",
-    "            self.update_counts(sequence, state_posteriors, transition_posteriors)\n",
-    "            total_log_likelihood += log_likelihood\n",
+    "    # Take care of initial probs\n",
+    "    for y in range(num_states):\n",
+    "        self.initial_counts[y] += state_posteriors[0, y]\n",
+    "    for pos in range(length):\n",
+    "        x = sequence.x[pos]\n",
+    "        for y in range(num_states):\n",
+    "            self.emission_counts[x, y] += state_posteriors[pos, y]\n",
+    "            if pos > 0:\n",
+    "                for y_prev in range(num_states):\n",
+    "                    self.transition_counts[y, y_prev] += transition_posteriors[pos-1, y, y_prev]\n",
     "\n",
-    "        print \"Iter: %i Log Likelihood: %f\"%(t, total_log_likelihood)\n",
-    "        #M-Step\n",
-    "        self.compute_parameters()\n",
-    "        if evaluate:\n",
-    "             ### Evaluate accuracy at this iteration\n",
-    "            acc = self.evaluate_EM(dataset)\n",
-    "            print \"Iter: %i Accuracy: %f\"%(t,acc)        \n",
+    "    # Final position\n",
+    "    for y in range(num_states):\n",
+    "        self.final_counts[y] += state_posteriors[length-1, y]\n",
     "````"
    ]
   },
@@ -587,7 +576,7 @@
    "metadata": {},
    "source": [
     "# Exercise 2.11\n",
-    "Implement the method to update the counts given the state and transition posteriors."
+    "Run 20 epochs of the EM algorithm for part of speech induction:"
    ]
   },
   {
@@ -633,7 +622,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,