merge pull request

tonysy · Dec 1, 2015 · 69352c7 · 69352c7
2 parents f8d27d3 + 4856f16
commit 69352c7
Show file tree

Hide file tree

Showing 86 changed files with 1,263 additions and 1,405 deletions.
diff --git a/TODO.txt b/TODO.txt
@@ -0,0 +1,9 @@
+TODO: 
+derive simpler bound for vb and improve vb functions chapter 10
+fix llh for rvm cd
+viterbi normalize update
+
+Other:
+Add plot function
+Add inference function for classification, mixture models
+Add unit test
diff --git a/chapter01/condEntropy.m b/chapter01/condEntropy.m
@@ -1,6 +1,6 @@
 function z = condEntropy (x, y)
 % Compute conditional entropy H(x|y) of two discrete variables x and y.
-% Written by Mo Chen (mochen80@gmail.com).
+% Written by Mo Chen (sth4nth@gmail.com).
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);
@@ -22,4 +22,4 @@
 
 % conditional entropy H(x|y)
 z = Hxy-Hy;
-
+z = max(0,z);
diff --git a/chapter01/entropy.m b/chapter01/entropy.m
@@ -1,8 +1,9 @@
 function z = entropy(x)
 % Compute entropy H(x) of a discrete variable x.
-% Written by Mo Chen (mochen80@gmail.com).
+% Written by Mo Chen (sth4nth@gmail.com).
 n = numel(x);
 x = reshape(x,1,n);
 [u,~,label] = unique(x);
 p = full(mean(sparse(1:n,label,1,n,numel(u),n),1));
 z = -dot(p,log2(p+eps));
+z = max(0,z);
diff --git a/chapter01/jointEntropy.m b/chapter01/jointEntropy.m
@@ -1,6 +1,6 @@
 function z = jointEntropy(x, y)
 % Compute joint entropy H(x,y) of two discrete variables x and y.
-% Written by Mo Chen (mochen80@gmail.com).    
+% Written by Mo Chen (sth4nth@gmail.com).    
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);
@@ -15,3 +15,4 @@
 p = nonzeros(sparse(idx,x,1,n,k,n)'*sparse(idx,y,1,n,k,n)/n); %joint distribution of x and y
 
 z = -dot(p,log2(p));
+z = max(0,z);
diff --git a/chapter01/mutInfo.m b/chapter01/mutInfo.m
@@ -1,6 +1,6 @@
 function z = mutInfo(x, y)
 % Compute mutual information I(x,y) of two discrete variables x and y.
-% Written by Mo Chen (mochen80@gmail.com).
+% Written by Mo Chen (sth4nth@gmail.com).
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);
@@ -25,3 +25,4 @@
 Hy = -dot(Py,log2(Py));
 % mutual information
 z = Hx+Hy-Hxy;
+z = max(0,z);
diff --git a/chapter01/nmi.m b/chapter01/nmi.m
@@ -18,7 +18,7 @@
 Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
 Hxy = -dot(Pxy,log2(Pxy));
 
-
+
 % hacking, to elimative the 0log0 issue
 Px = nonzeros(mean(Mx,1));
 Py = nonzeros(mean(My,1));
@@ -31,5 +31,6 @@
 MI = Hx + Hy - Hxy;
 
 % normalized mutual information
-z = max(0,sqrt((MI/Hx)*(MI/Hy))) ; % hacking to avoid NaN
-
+z = sqrt((MI/Hx)*(MI/Hy));
+z = max(0,z);
+
diff --git a/chapter01/nvi.m b/chapter01/nvi.m
@@ -26,3 +26,4 @@
 
 % nvi
 z = 2-(Hx+Hy)/Hxy;
+z = max(0,z);
diff --git a/chapter01/relatEntropy.m b/chapter01/relatEntropy.m
@@ -1,6 +1,6 @@
 function z = relatEntropy (x, y)
 % Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
-% Written by Mo Chen (mochen80@gmail.com).    
+% Written by Mo Chen (sth4nth@gmail.com).    
 assert(numel(x) == numel(y));
 n = numel(x);
 x = reshape(x,1,n);
@@ -18,4 +18,4 @@
 Py = nonzeros(mean(My,1));
 
 z = -dot(Px,log2(Py)-log2(Px));
-
+z = max(0,z);
diff --git a/chapter02/demo.m b/chapter02/demo.m
diff --git a/chapter02/pdfDirichletLn.m → chapter02/logDirichlet.m b/chapter02/pdfDirichletLn.m → chapter02/logDirichlet.m
@@ -1,13 +1,13 @@
-function y = pdfDirichletLn(X, a)
-% Compute log pdf of a Dirichlet distribution.
-%   X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
-%   a: d x k parameters
-%   y: k x n probability density
-% Written by Mo Chen (mochen80@gmail.com).
-X = bsxfun(@times,X,1./sum(X,1));
-if size(a,1) == 1
-    a = repmat(a,size(X,1),1);
-end
-c = gammaln(sum(a,1))-sum(gammaln(a),1);
-g = (a-1)'*log(X);
-y = bsxfun(@plus,g,c');
+function y = logDirichlet(X, a)
+% Compute log pdf of a Dirichlet distribution.
+%   X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
+%   a: d x k parameters
+%   y: k x n probability density
+% Written by Mo Chen (sth4nth@gmail.com).
+X = bsxfun(@times,X,1./sum(X,1));
+if size(a,1) == 1
+    a = repmat(a,size(X,1),1);
+end
+c = gammaln(sum(a,1))-sum(gammaln(a),1);
+g = (a-1)'*log(X);
+y = bsxfun(@plus,g,c');
diff --git a/chapter13/pdfGaussLn.m → chapter02/logGauss.m b/chapter13/pdfGaussLn.m → chapter02/logGauss.m
@@ -1,36 +1,36 @@
-function y = pdfGaussLn(X, mu, sigma)
-% Compute log pdf of a Gaussian distribution.
-% Written by Mo Chen (mochen80@gmail.com).
-
-[d,n] = size(X);
-k = size(mu,2);
-if n == k && size(sigma,1) == 1           
-    X = bsxfun(@times,X-mu,1./sigma);
-    q = dot(X,X,1);  % M distance
-    c = d*log(2*pi)+2*log(sigma);          % normalization constant
-    y = -0.5*(c+q);
-elseif size(sigma,1)==d && size(sigma,2)==d && k==1   % one mu and one dxd sigma
-    X = bsxfun(@minus,X,mu);
-    [R,p]= chol(sigma);
-    if p ~= 0
-        error('ERROR: sigma is not PD.');
-    end
-    Q = R'\X;
-    q = dot(Q,Q,1);  % quadratic term (M distance)
-    c = d*log(2*pi)+2*sum(log(diag(R)));   % normalization constant
-    y = -0.5*(c+q);
-elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma
-    lambda = 1./sigma;
-    ml = mu.*lambda;
-    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
-    c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant
-    y = -0.5*bsxfun(@plus,q,c);
-elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma
-    X2 = repmat(dot(X,X,1)',1,k);
-    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
-    q = bsxfun(@times,D,1./sigma);  % M distance
-    c = d*(log(2*pi)+2*log(sigma));          % normalization constant
-    y = -0.5*bsxfun(@plus,q,c);
-else
-    error('Parameters mismatched.');
-end
+function y = logGauss(X, mu, sigma)
+% Compute log pdf of a Gaussian distribution.
+% Written by Mo Chen (sth4nth@gmail.com).
+
+[d,n] = size(X);
+k = size(mu,2);
+if n == k && size(sigma,1) == 1           
+    X = bsxfun(@times,X-mu,1./sigma);
+    q = dot(X,X,1);  % M distance
+    c = d*log(2*pi)+2*log(sigma);          % normalization constant
+    y = -0.5*(c+q);
+elseif size(sigma,1)==d && size(sigma,2)==d && k==1   % one mu and one dxd sigma
+    X = bsxfun(@minus,X,mu);
+    [R,p]= chol(sigma);
+    if p ~= 0
+        error('ERROR: sigma is not PD.');
+    end
+    Q = R'\X;
+    q = dot(Q,Q,1);  % quadratic term (M distance)
+    c = d*log(2*pi)+2*sum(log(diag(R)));   % normalization constant
+    y = -0.5*(c+q);
+elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma
+    lambda = 1./sigma;
+    ml = mu.*lambda;
+    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
+    c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant
+    y = -0.5*bsxfun(@plus,q,c);
+elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma
+    X2 = repmat(dot(X,X,1)',1,k);
+    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
+    q = bsxfun(@times,D,1./sigma);  % M distance
+    c = d*(log(2*pi)+2*log(sigma));          % normalization constant
+    y = -0.5*bsxfun(@plus,q,c);
+else
+    error('Parameters mismatched.');
+end
diff --git a/chapter02/logKde.m b/chapter02/logKde.m
@@ -0,0 +1,5 @@
+function z = logKde (X, Y, sigma)
+% Compute log pdf of kernel density estimator.
+% Written by Mo Chen ([email protected]).
+D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X));
+z = logSumExp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2));
diff --git a/chapter02/pdfMnLn.m → chapter02/logMn.m b/chapter02/pdfMnLn.m → chapter02/logMn.m
@@ -1,11 +1,11 @@
-function z = pdfMnLn (x, p)
-% Compute log pdf of a multinomial distribution.
-% Written by Mo Chen (mochen80@gmail.com).    
-    if numel(x) ~= numel(p)
-        n = numel(x);
-        x = reshape(x,1,n);
-        [u,~,label] = unique(x);
-        x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
-    end
-    z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
-endfunction
+function z = logMn (x, p)
+% Compute log pdf of a multinomial distribution.
+% Written by Mo Chen (sth4nth@gmail.com).    
+    if numel(x) ~= numel(p)
+        n = numel(x);
+        x = reshape(x,1,n);
+        [u,~,label] = unique(x);
+        x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
+    end
+    z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
+endfunction
diff --git a/chapter02/pdfMvGammaLn.m → chapter02/logMvGamma.m b/chapter02/pdfMvGammaLn.m → chapter02/logMvGamma.m
@@ -1,10 +1,10 @@
-function y = pdfMvGammaLn(x,d)
-% Compute logarithm multivariate Gamma function.
-% Gamma_p(x) = pi^(p(p-1)/4) prod_(j=1)^p Gamma(x+(1-j)/2)
-% log Gamma_p(x) = p(p-1)/4 log pi + sum_(j=1)^p log Gamma(x+(1-j)/2)
-% Written by Michael Chen ([email protected]).
-s = size(x);
-x = reshape(x,1,prod(s));
-x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2);
-y = d*(d-1)/4*log(pi)+sum(gammaln(x),1);
+function y = logMvGamma(x,d)
+% Compute logarithm multivariate Gamma function.
+% Gamma_p(x) = pi^(p(p-1)/4) prod_(j=1)^p Gamma(x+(1-j)/2)
+% log Gamma_p(x) = p(p-1)/4 log pi + sum_(j=1)^p log Gamma(x+(1-j)/2)
+% Written by Michael Chen ([email protected]).
+s = size(x);
+x = reshape(x,1,prod(s));
+x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2);
+y = d*(d-1)/4*log(pi)+sum(gammaln(x),1);
 y = reshape(y,s);
diff --git a/chapter02/pdfStLn.m → chapter02/logSt.m b/chapter02/pdfStLn.m → chapter02/logSt.m
@@ -1,33 +1,33 @@
-function y = pdfStLn(X, mu, sigma, v)
-% Compute log pdf of a student-t distribution.
-% Written by mo Chen (mochen80@gmail.com).
-[d,k] = size(mu);
-
-if size(sigma,1)==d && size(sigma,2)==d && k==1
-    [R,p]= cholcov(sigma,0);
-    if p ~= 0
-        error('ERROR: sigma is not SPD.');
-    end
-    X = bsxfun(@minus,X,mu);
-    Q = R'\X;
-    q = dot(Q,Q,1);  % quadratic term (M distance)
-    o = -log(1+q/v)*((v+d)/2);
-    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2;
-    y = c+o;
-elseif size(sigma,1)==d && size(sigma,2)==k
-    lambda = 1./sigma;
-    ml = mu.*lambda;
-    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
-    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
-    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(pi*v)+sum(log(sigma),1))/2;
-    y = bsxfun(@plus,o,c);
-elseif size(sigma,1)==1 && size(sigma,2)==k
-    X2 = repmat(dot(X,X,1)',1,k);
-    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
-    q = bsxfun(@times,D,1./sigma);  % M distance
-    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
-    c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2;
-    y = bsxfun(@plus,o,c);
-else
-    error('Parameters mismatched.');
-end
+function y = logSt(X, mu, sigma, v)
+% Compute log pdf of a student-t distribution.
+% Written by mo Chen (sth4nth@gmail.com).
+[d,k] = size(mu);
+
+if size(sigma,1)==d && size(sigma,2)==d && k==1
+    [R,p]= cholcov(sigma,0);
+    if p ~= 0
+        error('ERROR: sigma is not SPD.');
+    end
+    X = bsxfun(@minus,X,mu);
+    Q = R'\X;
+    q = dot(Q,Q,1);  % quadratic term (M distance)
+    o = -log(1+q/v)*((v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2;
+    y = c+o;
+elseif size(sigma,1)==d && size(sigma,2)==k
+    lambda = 1./sigma;
+    ml = mu.*lambda;
+    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
+    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(pi*v)+sum(log(sigma),1))/2;
+    y = bsxfun(@plus,o,c);
+elseif size(sigma,1)==1 && size(sigma,2)==k
+    X2 = repmat(dot(X,X,1)',1,k);
+    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
+    q = bsxfun(@times,D,1./sigma);  % M distance
+    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2;
+    y = bsxfun(@plus,o,c);
+else
+    error('Parameters mismatched.');
+end
diff --git a/chapter02/pdflogVmfLn.m → chapter02/logVmf.m b/chapter02/pdflogVmfLn.m → chapter02/logVmf.m
@@ -1,7 +1,7 @@
-function y = pdflogVmfLn(X, mu, kappa)
-% Compute log pdf of a von Mises-Fisher distribution.
-% Written by Mo Chen (mochen80@gmail.com).
-d = size(X,1);
-c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);
-q = bsxfun(@times,mu,kappa)'*X;
-y = bsxfun(@plus,q,c');
+function y = logVmf(X, mu, kappa)
+% Compute log pdf of a von Mises-Fisher distribution.
+% Written by Mo Chen (sth4nth@gmail.com).
+d = size(X,1);
+c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);
+q = bsxfun(@times,mu,kappa)'*X;
+y = bsxfun(@plus,q,c');
diff --git a/chapter02/pdfWishartLn.m → chapter02/logWishart.m b/chapter02/pdfWishartLn.m → chapter02/logWishart.m
@@ -1,6 +1,6 @@
-function y = pdfWishartLn(Sigma, v, W)
-% Compute log pdf of a Wishart distribution.
-% Written by Mo Chen (mochen80@gmail.com).
-d = length(Sigma);
-B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);
+function y = logWishart(Sigma, v, W)
+% Compute log pdf of a Wishart distribution.
+% Written by Mo Chen (sth4nth@gmail.com).
+d = length(Sigma);
+B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);
 y = B+0.5*(v-d-1)*logdet(Sigma)-0.5*trace(W\Sigma);
diff --git a/chapter02/pdfKdeLn.m b/chapter02/pdfKdeLn.m
diff --git a/chapter03/demo.m b/chapter03/demo.m
@@ -7,7 +7,6 @@
 w = randn;
 b = randn;
 t = w'*X+b+beta*randn(1,n);
-
 x = linspace(min(X)-1,max(X)+1,n);   % test data
 %%
 model = regress(X, t);
@@ -17,6 +16,7 @@
 plot(X,t,'o');
 plot(x,y,'r-');
 hold off
+pause
 %%
 [model,llh] = regressEbEm(X,t);
 [y, sigma] = linInfer(x,model,t);
@@ -28,6 +28,7 @@
 hold off
 figure
 plot(llh);
+pause
 %%
 [model,llh] = regressEbFp(X,t);
 [y, sigma] = linInfer(x,model,t);