Poisson Mixture Model

We assume that the observations are distributed as the following model

\[\begin{eqnarray*} Y_{i} | Z_i = k &\sim& \mathcal{P}(\mu_k)\\ P(Z_i = k) &=& \pi_k\\ \end{eqnarray*}\]

We sample observations under that model:

seed <- 291
set.seed(seed)
############### paramètres de simulation
K <- 3;
pi_prop <- c(1/6,1/2,1/3)
mutrue <- c(10,20,30)
###############  Simulation
n <-  100
Ztrue  <- t(rmultinom(n, 1, prob= pi_prop))
Zsim <- Ztrue %*%(1:K)
Y <- rpois(n, Ztrue %*% mutrue)
hist(Y)

Prior distribution

We consider the prior distributions:

\[\begin{eqnarray*} \mu_k &\sim& \Gamma(a_k,b_k)\\ (\pi_1, \dots, \pi_K) &\sim& \mathcal{D}ir(e_1, \dots, e_K) \end{eqnarray*}\]

muprior <- mean(Y)
sprior <- 30
b = muprior/sprior^2
a = b*muprior;
e = rep(1,K)
par(mfrow=c(1,1))
curve(dgamma(x,a,b),0,100,main='Prior on mu',ylab='prior density',xlab='mu')

Tasks Are you OK with this prior distributions on \(\mu\) and \(\pi\)?

Variational Bayes Posterior

We apply the VB we design in the course.

source('BayesPoissonMixture_functions.R')
res_VB <- VBEM_PoissonMixtures(Y,a,b,e)
res_VB$iter
## [1] 110

Tasks Have a look a the code. Do you recognize the two steps? Is it fast? How is it initialized?

Results

We plot the posterior density

Ntrue <- colSums(Ztrue)
Strue <- c(Y%*% Ztrue)
par(mfrow = c(2,K))
for (k in 1:K) {
  curve(dgamma(x,res_VB$aVB[k],res_VB$bVB[k]),min(Y),max(Y),ylab = 'dens',col = 'red',main=paste('mu',k,sep=''))
  curve(dgamma(x,a+Strue[k],b+Ntrue[k]),ylab = 'dens',col = 'orange',add=TRUE)
  curve(dgamma(x,a ,b ),add = TRUE,col = 'green')
  abline(v  = mutrue[k],col = 'purple')
}

for (k in 1:K) {
  curve(dbeta(x,res_VB$eVB[k],sum(res_VB$eVB) - res_VB$eVB[k]),0,1,ylab = 'dens',col = 'red')
  curve(dbeta(x,e[k] ,sum(e) - e[k] ),add = TRUE,col = 'green')
  curve(dbeta(x,e[k]+Ntrue[k],sum(e)+n-e[k]-Ntrue[k]),col='orange',add =TRUE)
  abline(v  = pi_prop[k],col = 'purple')
}

Tasks

  • What do you think about the posteriors?
  • What are we plotting with curve(dbeta(x,e[k]+Ntrue[k],sum(e)+n-e[k]-Ntrue[k])

Clustering

Zhat <- apply(res_VB$tau,1,which.max)
Tab <-as.matrix(table(Zhat,Zsim))
Tab
##     Zsim
## Zhat  1  2  3
##    1 17  1  0
##    2  3 26  0
##    3  0 13 40
  • What is Zhat? What do you think of our clustering?

Gibbs sampling

We propose to sample the posterior with a Gibbs sampler.

Tasks Have a look at the function Gibbs_PoissonMixtures. Do you recognize your steps?

M = 20000
res_Gibbs <- Gibbs_PoissonMixtures(Y,a,b,e,M)
## [1] "iter 1000"
## [1] "iter 2000"
## [1] "iter 3000"
## [1] "iter 4000"
## [1] "iter 5000"
## [1] "iter 6000"
## [1] "iter 7000"
## [1] "iter 8000"
## [1] "iter 9000"
## [1] "iter 10000"
## [1] "iter 11000"
## [1] "iter 12000"
## [1] "iter 13000"
## [1] "iter 14000"
## [1] "iter 15000"
## [1] "iter 16000"
## [1] "iter 17000"
## [1] "iter 18000"
## [1] "iter 19000"
## [1] "iter 20000"
burnin  = M/2
thin = 5; 
ind <- seq(M/2,M,by = 5)
mu_post <- res_Gibbs$mat_mu[ind,]
pi_post <- res_Gibbs$mat_pi[ind,]

Trajectories

par(mfrow = c(2,K))
for (k in 1:K){
  plot(mu_post[,k],type='l')
  abline(h  = mutrue,col='purple',lwd=2)
}
for (k in 1:K){
  plot(pi_post[,k],type='l')
  abline(h  = pi_prop[k],col='purple',lwd=2)
}

Tasks

  • What do you think of the trajectories? What is happening?
  • Have a look at the post densities. Comment.

Post densities

par(mfrow = c(2,K))
for (k in 1:K) {
  xlim <- range(Y)
  curve(dgamma(x,res_VB$aVB[k],res_VB$bVB[k]),xlim[1],xlim[2],ylab = 'dens',col = 'red')
  curve(dgamma(x,a ,b ),add = TRUE,col = 'green')
  lines(density(mu_post[,k]),col='magenta')
  abline(v  = mutrue,col = 'purple')
}
for (k in 1:K) {
  curve(dbeta(x,res_VB$eVB[k],sum(res_VB$eVB) - res_VB$eVB[k]),0,1,ylab = 'dens',col = 'red')
  curve(dbeta(x,e[k] ,sum(e) - e[k] ),add = TRUE,col = 'green')
  abline(v  = pi_prop,col = 'purple')
  lines(density(pi_post[,k]),col ='magenta')
}

Re-run the algorithm with the option labelswitching = TRUE What are we doing?

M = 20000
res_Gibbs <- Gibbs_PoissonMixtures(Y,a,b,e,M, labelswitching = TRUE)
## [1] "iter 1000"
## [1] "iter 2000"
## [1] "iter 3000"
## [1] "iter 4000"
## [1] "iter 5000"
## [1] "iter 6000"
## [1] "iter 7000"
## [1] "iter 8000"
## [1] "iter 9000"
## [1] "iter 10000"
## [1] "iter 11000"
## [1] "iter 12000"
## [1] "iter 13000"
## [1] "iter 14000"
## [1] "iter 15000"
## [1] "iter 16000"
## [1] "iter 17000"
## [1] "iter 18000"
## [1] "iter 19000"
## [1] "iter 20000"
burnin  = M/2
thin = 5; 
ind <- seq(M/2,M,by = 5)
mu_post <- res_Gibbs$mat_mu[ind,]
pi_post <- res_Gibbs$mat_pi[ind,]

Trajectories

par(mfrow = c(2,K))
for (k in 1:K){
  plot(mu_post[,k],type='l')
  abline(h  = mutrue,col='purple',lwd=2)
}
for (k in 1:K){
  plot(pi_post[,k],type='l')
  abline(h  = pi_prop[k],col='purple',lwd=2)
}

Comparison of Gibbs and VB

############################# LOIS a posteriori

par(mfrow = c(2,K))
for (k in 1:K) {
  xlim <- range(Y)
  curve(dgamma(x,res_VB$aVB[k],res_VB$bVB[k]),xlim[1],xlim[2],ylab = 'dens',col = 'red')
  curve(dgamma(x,a ,b ),add = TRUE,col = 'green')
  lines(density(mu_post[,k]),col='magenta')
  abline(v  = mutrue[k],col = 'purple')
}
for (k in 1:K) {
  curve(dbeta(x,res_VB$eVB[k],sum(res_VB$eVB) - res_VB$eVB[k]),0,1,ylab = 'dens',col = 'red')
  curve(dbeta(x,e[k] ,sum(e) - e[k] ),add = TRUE,col = 'green')
  abline(v  = pi_prop[k],col = 'purple')
  lines(density(pi_post[,k]),col ='magenta')
}

Tasks Compare the various distributions (VB/MCMC). What do you think?