Skip to content

Commit

Permalink
Merge pull request LxMLS#106 from JoaoLages/develop
Browse files Browse the repository at this point in the history
Fix solution and description of exercises 2.10 and 2.11
  • Loading branch information
ramon-astudillo committed Jun 8, 2018
2 parents d5e8d23 + 1a63c62 commit f06306a
Showing 1 changed file with 20 additions and 31 deletions.
51 changes: 20 additions & 31 deletions labs/notebooks/sequence_models/Exercises_2.1_to_2.11.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Excercice 2.10\n",
"# Exercice 2.10\n",
"Implement the method to update the counts given the state and transition posteriors\n",
"\n",
"```\n",
Expand All @@ -548,37 +548,26 @@
"source": [
"### Solution\n",
"````\n",
"def train_EM(self, dataset, smoothing=0, num_epochs=10, evaluate=True):\n",
" self.initialize_random()\n",
"\n",
" if evaluate:\n",
" acc = self.evaluate_EM(dataset)\n",
" print \"Initial accuracy: %f\"%(acc)\n",
"def update_counts(self, sequence, state_posteriors, transition_posteriors):\n",
" \"\"\" Used in the E-step in EM.\"\"\"\n",
"\n",
" for t in xrange(1, num_epochs):\n",
" #E-Step\n",
" total_log_likelihood = 0.0\n",
" self.clear_counts(smoothing)\n",
" for sequence in dataset.seq_list:\n",
" # Compute scores given the observation sequence.\n",
" initial_scores, transition_scores, final_scores, emission_scores = \\\n",
" self.compute_scores(sequence)\n",
" num_states = self.get_num_states() # Number of states.\n",
" length = len(sequence.x) # Length of the sequence.\n",
"\n",
" state_posteriors, transition_posteriors, log_likelihood = \\\n",
" self.compute_posteriors(initial_scores,\n",
" transition_scores,\n",
" final_scores,\n",
" emission_scores)\n",
" self.update_counts(sequence, state_posteriors, transition_posteriors)\n",
" total_log_likelihood += log_likelihood\n",
" # Take care of initial probs\n",
" for y in range(num_states):\n",
" self.initial_counts[y] += state_posteriors[0, y]\n",
" for pos in range(length):\n",
" x = sequence.x[pos]\n",
" for y in range(num_states):\n",
" self.emission_counts[x, y] += state_posteriors[pos, y]\n",
" if pos > 0:\n",
" for y_prev in range(num_states):\n",
" self.transition_counts[y, y_prev] += transition_posteriors[pos-1, y, y_prev]\n",
"\n",
" print \"Iter: %i Log Likelihood: %f\"%(t, total_log_likelihood)\n",
" #M-Step\n",
" self.compute_parameters()\n",
" if evaluate:\n",
" ### Evaluate accuracy at this iteration\n",
" acc = self.evaluate_EM(dataset)\n",
" print \"Iter: %i Accuracy: %f\"%(t,acc) \n",
" # Final position\n",
" for y in range(num_states):\n",
" self.final_counts[y] += state_posteriors[length-1, y]\n",
"````"
]
},
Expand All @@ -587,7 +576,7 @@
"metadata": {},
"source": [
"# Exercise 2.11\n",
"Implement the method to update the counts given the state and transition posteriors."
"Run 20 epochs of the EM algorithm for part of speech induction:"
]
},
{
Expand Down Expand Up @@ -633,7 +622,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.5.2"
}
},
"nbformat": 4,
Expand Down

0 comments on commit f06306a

Please sign in to comment.