Bayesian zero-inflated Poisson in Python using Stan

From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press

you are kindly asked to include the complete citation if you used this material in a publication

Code 7.2 Bayesian zero-inflated Poisson in Python using Stan

====================================================

import numpy as np
import pystan
import statsmodels.api as sm

from rpy2.robjects import r, FloatVector
from scipy.stats import uniform, norm

def zipoisson(N, lambda_par, psi):
"""Zero inflated Poisson sampler."""

# load R package
r('library(VGAM)')

# get R functions
zipoissonR = r['rzipois']

res = zipoissonR(N, FloatVector(lambda_par),
pstr0=FloatVector(psi))

return np.array([int(item) for item in res])

# Data
np.random.seed(141) # set seed to replicate example
nobs= 5000 # number of obs in model

x1 = uniform.rvs(size=nobs)

xb = 1 + 2.0 * x1 # linear predictor
xc = 2 - 5.0 * x1

exb = np.exp(xb)
exc = 1.0 / (1.0 + np.exp(-xc))

zipy = zipoisson(nobs, exb, exc) # create y as adjusted

X = np.transpose(x1)
X = sm.add_constant(X)

mydata = {} # build data dictionary
mydata['N'] = nobs # sample size
mydata['Xb'] = X # predictors
mydata['Xc'] = X
mydata['Y'] = zipy # response variable
mydata['Kb'] = X.shape[1] # number of coefficients
mydata['Kc'] = X.shape[1]

# Fit
stan_code = """
data{
int N;
int Kb;
int Kc;
matrix[N, Kb] Xb;
matrix[N, Kc] Xc;
int Y[N];
}
parameters{
vector[Kc] beta;
vector[Kb] gamma;

}
transformed parameters{
vector[N] mu;
vector[N] Pi;

mu = exp(Xc * beta);
for (i in 1:N) Pi[i] = inv_logit(Xb[i] * gamma);
}
model{
real LL[N];

for (i in 1:N) {
if (Y[i] == 0) {
LL[i] = log_sum_exp(bernoulli_lpmf(1|Pi[i]),
bernoulli_lpmf(0|Pi[i]) +
poisson_lpmf(Y[i]|mu[i]));
} else {
LL[i] = bernoulli_lpmf(0|Pi[i]) +
poisson_lpmf(Y[i]|mu[i]);
}
}

target += LL;
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code, data=mydata, iter=5000, chains=3,
warmup=4000, n_jobs=3)

# Output
nlines = 9 # number of lines in screen output

output = str(fit).split('\n')
for item in output[:nlines]:
print(item)

====================================================

GET SOURCE

Output on screen:

Inference for Stan model: anon_model_49dc4b15f67427c0728b1e06ab4a4a1e.
3 chains, each with iter=5000; warmup=4000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=3000.

mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0] 1.0 6.2e-4 0.02 0.96 0.99 1.0 1.02 1.04 1097.0 1.0
beta[1] 1.99 8.0e-4 0.03 1.94 1.98 1.99 2.01 2.05 1097.0 1.0
gamma[0] 1.98 2.2e-3 0.07 1.84 1.93 1.98 2.03 2.12 1126.0 1.0
gamma[1] -5.06 4.2e-3 0.14 -5.34 -5.15 -5.06 -4.96 -4.78 1188.0 1.0

HSI

HSI