Merge pull request PRML#8 from sth4nth/master

version update
tonysy · Jan 26, 2016 · 4e93847 · 4e93847
2 parents 69352c7 + 6dd77b1
commit 4e93847
Show file tree

Hide file tree

Showing 130 changed files with 2,208 additions and 1,152 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+reference/*
diff --git a/README.md b/README.md
@@ -4,6 +4,12 @@ Pattern Recognition and Machine Learning
 This package contains the matlab implementation of the algorithms described in the book:
 Pattern Recognition and Machine Learning by C. Bishop (http://research.microsoft.com/en-us/um/people/cmbishop/prml/)
 
+The goal of the code are as follows:
+1)clean. make the code as clean as possible, which means, there are little nasty guarding code that distracts reader's attention so that the core of the algorithms is easy to spot.
+2)efficient. use matlab vectorization trick as much as possible to make the function fast, many functions are even comparable with c implementation. usually, the functions in this package are orders faster than matlab builtin function which provide same functionality (such as kmeans). If anyone can find any matlab implementation that are faster than my code, I am happy to do further optimization.
+3)robust. many numerical stability techniques are applied to avoid numerical underflow and overflow which often happens when dealing with high dimensional data
+4)easy to learn. the code are heavily commented, and the reference formulas in PRML book are indicated for corresponding code lines
+5)practical. the package is designed not only for users to learn the algorithms in the book, but also to facility ML reseearch. Many functions in this package are already among the top downloads in Matlab file exchange and widely used.
 
 License
 -------

diff --git a/TODO.txt b/TODO.txt
@@ -1,9 +1,9 @@
 TODO: 
-derive simpler bound for vb and improve vb functions chapter 10
-fix llh for rvm cd
-viterbi normalize update
-
-Other:
-Add plot function
-Add inference function for classification, mixture models
-Add unit test
+chapter13: demo for time series models
+chapter12: prediction functions for ppca
+chapter05: MLP
+chapter10: prediction functions for VB
+chapter08: BP, EP
+chapter07: update beta for sequential rvm, 
+chapter10: compute bound terms inside each factors, 
+chapter04: plot multiclass data boundary
diff --git a/chapter01/condEntropy.m b/chapter01/condEntropy.m
@@ -1,5 +1,6 @@
 function z = condEntropy (x, y)
 % Compute conditional entropy H(x|y) of two discrete variables x and y.
+% x, y: two vectors of integers of the same length
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/demo.m b/chapter01/demo.m
@@ -0,0 +1,40 @@
+% Done
+% demo for information theory toolbox
+clear;
+k = 10;  % variable range
+n = 100;  % number of variables
+
+x = ceil(k*rand(1,n));
+y = ceil(k*rand(1,n));
+
+% x = randi(k,1,n);  % need statistics toolbox
+% y = randi(k,1,n);
+
+%% entropy H(x), H(y)
+Hx = entropy(x);
+Hy = entropy(y);
+%% joint entropy H(x,y)
+Hxy = jointEntropy(x,y);
+%% conditional entropy H(x|y)
+Hx_y = condEntropy(x,y);
+%% mutual information I(x,y)
+Ixy = mutInfo(x,y);
+%% relative entropy (KL divergence) KL(p(x)|p(y))
+Dxy = relatEntropy(x,y);
+%% normalized mutual information I_n(x,y)
+nIxy = nmi(x,y);
+%% nomalized variation information I_v(x,y)
+vIxy = nvi(x,y);
+%% H(x|y) = H(x,y)-H(y)
+isequalf(Hx_y,Hxy-Hy)
+%% I(x,y) = H(x)-H(x|y)
+isequalf(Ixy,Hx-Hx_y)
+%% I(x,y) = H(x)+H(y)-H(x,y)
+isequalf(Ixy,Hx+Hy-Hxy)
+%% I_n(x,y) = I(x,y)/sqrt(H(x)*H(y))
+isequalf(nIxy,Ixy/sqrt(Hx*Hy))
+%% I_v(x,y) = (1-I(x,y)/H(x,y))
+isequalf(vIxy,1-Ixy/Hxy)
+
+
+
diff --git a/chapter01/entropy.m b/chapter01/entropy.m
@@ -1,5 +1,6 @@
 function z = entropy(x)
 % Compute entropy H(x) of a discrete variable x.
+% x: a vectors of integers
 % Written by Mo Chen ([email protected]).
 n = numel(x);
 x = reshape(x,1,n);

diff --git a/chapter01/jointEntropy.m b/chapter01/jointEntropy.m
@@ -1,5 +1,6 @@
 function z = jointEntropy(x, y)
 % Compute joint entropy H(x,y) of two discrete variables x and y.
+% x, y: two vectors of integers of the same length
 % Written by Mo Chen ([email protected]).    
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/mutInfo.m b/chapter01/mutInfo.m
@@ -1,5 +1,6 @@
 function z = mutInfo(x, y)
 % Compute mutual information I(x,y) of two discrete variables x and y.
+% x, y: two vectors of integers of the same length
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/nmi.m b/chapter01/nmi.m
@@ -1,7 +1,7 @@
 function z = nmi(x, y)
-% Compute nomalized mutual information I(x,y)/sqrt(H(x)*H(y)).
-% Author
-%       Michael Chen ([email protected])
+% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)).
+% x, y: two vectors of integers of the same length
+% Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);

diff --git a/chapter01/nvi.m b/chapter01/nvi.m
@@ -1,6 +1,7 @@
 function z = nvi(x, y)
-% Compute nomalized variation information (1-I(x,y)/H(x,y)).
-% Written by Michael Chen ([email protected]).
+% Compute normalized variation information (1-I(x,y)/H(x,y)).
+% x, y: two vectors of integers of the same length
+% Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);

diff --git a/chapter01/relatEntropy.m b/chapter01/relatEntropy.m
@@ -1,5 +1,6 @@
 function z = relatEntropy (x, y)
 % Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
+% x, y: two vectors of integers of the same length
 % Written by Mo Chen ([email protected]).    
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m
@@ -32,5 +32,5 @@
     c = d*(log(2*pi)+2*log(sigma));          % normalization constant
     y = -0.5*bsxfun(@plus,q,c);
 else
-    error('Parameters mismatched.');
+    error('Parameters are mismatched.');
 end
diff --git a/chapter02/logSt.m b/chapter02/logSt.m
@@ -1,10 +1,10 @@
 function y = logSt(X, mu, sigma, v)
-% Compute log pdf of a student-t distribution.
+% Compute log pdf of a Student's t distribution.
 % Written by mo Chen ([email protected]).
 [d,k] = size(mu);
 
 if size(sigma,1)==d && size(sigma,2)==d && k==1
-    [R,p]= cholcov(sigma,0);
+    [R,p]= chol(sigma);
     if p ~= 0
         error('ERROR: sigma is not SPD.');
     end
@@ -29,5 +29,5 @@
     c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2;
     y = bsxfun(@plus,o,c);
 else
-    error('Parameters mismatched.');
+    error('Parameters are mismatched.');
 end
diff --git a/chapter03/demo.m b/chapter03/demo.m
@@ -1,42 +1,18 @@
 % Done
 % demo for chapter 03
 clear; close all;
-n = 100;
-beta = 1e-1;
-X = rand(1,n);
-w = randn;
-b = randn;
-t = w'*X+b+beta*randn(1,n);
-x = linspace(min(X)-1,max(X)+1,n);   % test data
+d = 1;
+n = 200;
+[x,t] = linRnd(d,n);
 %%
-model = regress(X, t);
-y = linInfer(x, model);
-figure;
-hold on;
-plot(X,t,'o');
-plot(x,y,'r-');
-hold off
-pause
+% model = linReg(x,t);
+% linPlot(model,x,t);
 %%
-[model,llh] = regressEbEm(X,t);
-[y, sigma] = linInfer(x,model,t);
-figure;
-hold on;
-plotBand(x,y,2*sigma);
-plot(X,t,'o');
-plot(x,y,'r-');
-hold off
-figure
-plot(llh);
-pause
+% [model1,llh1] = linRegEm(x,t);
+% plot(llh);
+% linPlot(model,x,t);
 %%
-[model,llh] = regressEbFp(X,t);
-[y, sigma] = linInfer(x,model,t);
-figure;
-hold on;
-plotBand(x,y,2*sigma);
-plot(X,t,'o');
-plot(x,y,'r-');
-hold off
-figure
-plot(llh);
+[model,llh] = linRegFp(x,t);
+[y, sigma] = linPred(model,x,t);
+plot(llh);
+linPlot(model,x,t);
diff --git a/chapter03/linInfer.m b/chapter03/linInfer.m
diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m
@@ -0,0 +1,16 @@
+function linPlot(model, X, t)
+% Plot linear function and data
+% X: 1xn data
+% t: 1xn response
+% Written by Mo Chen ([email protected]).
+color = [255,228,225]/255; %pink
+% [x,idx] = sort(x);
+x = linspace(min(X),max(X));
+[y,s] = linPred(model,x);
+figure;
+hold on;
+fill([x,fliplr(x)],[y+s,fliplr(y-s)],color);
+plot(X,t,'o');
+plot(x,y,'r-');
+hold off
+
diff --git a/chapter03/linPred.m b/chapter03/linPred.m
@@ -0,0 +1,23 @@
+function [y, sigma, p] = linPred(model, X, t)
+% Compute linear model reponse y = w'*X+w0 and likelihood
+%   model: trained model structure
+%   X: d x n testing data
+%   t (optional): 1 x n testing response
+% Written by Mo Chen ([email protected]).
+w = model.w;
+w0 = model.w0;
+y = w'*X+w0;
+%% probability prediction
+if nargout > 1
+    beta = model.beta;
+    U = model.U;        % 3.54
+    Xo = bsxfun(@minus,X,model.xbar);
+    XU = U'\Xo;
+    sigma = sqrt((1+dot(XU,XU,1))/beta);   %3.59
+end
+
+if nargin == 3 && nargout == 3
+    p = exp(logGauss(t,y,sigma));
+%     p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma));
+end
+
diff --git a/chapter03/linReg.m b/chapter03/linReg.m
@@ -1,26 +1,33 @@
 function model = linReg(X, t, lambda)
-% Fit linear regression model t=w'x+w0
-% X: d x n data
-% t: 1 x n response
+% Fit linear regression model y=w'x+w0  
+%   X: d x n data
+%   t: 1 x n response
 % Written by Mo Chen ([email protected]).
 if nargin < 3
     lambda = 0;
 end
 d = size(X,1);
+idx = (1:d)';
+dg = sub2ind([d,d],idx,idx);
+
 xbar = mean(X,2);
 tbar = mean(t,2);
-
 X = bsxfun(@minus,X,xbar);
 t = bsxfun(@minus,t,tbar);
 
-S = X*X';
-idx = (1:d)';
-dg = sub2ind([d,d],idx,idx);
-S(dg) = S(dg)+lambda;
-% w = S\(X*t');
-U = chol(S);
+XX = X*X';
+XX(dg) = XX(dg)+lambda;     % 3.54 XX=inv(S)/beta
+% w = XX\(X*t');
+U = chol(XX);
 w = U\(U'\(X*t'));  % 3.15 & 3.28
 w0 = tbar-dot(w,xbar);  % 3.19
 
 model.w = w;
 model.w0 = w0;
+model.xbar = xbar;
+%% for probability prediction
+beta = 1/mean((t-w'*X).^2); % 3.21
+% alpha = lambda*beta;           % lambda=a/b P.153 3.55
+% model.alpha = alpha;
+model.beta = beta;
+model.U = U;
diff --git a/chapter03/linRegEbEm.m b/chapter03/linRegEbEm.m