Negative binomial: globular clusters x host mag in Python using Stan

From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press

you are kindly asked to include the complete citation if you used this material in a publication

Code 10.17 Negative binomial model in Python using Stan, for modeling the relationship between globular cluster population and host galaxy visual magnitude

================================================================================

import numpy as np
import pandas as pd
import pystan
import statsmodels.api as sm

# Data
path_to_data = 'https://raw.githubusercontent.com/astrobayes/BMAD/master/data/Section_10p7/GCs.csv'

data_frame = dict(pd.read_csv(path_to_data))

# prepare data for Stan
data = {}
data['X'] = sm.add_constant(np.array(data_frame['MV_T']))
data['Y'] = np.array(data_frame['N_GC'])
data['N'] = len(data['X'])
data['K'] = 2

# Fit
stan_code="""
data{
int<lower=0> N; # number of data points
int<lower=1> K; # number of linear predictor coefficients
matrix[N,K] X; # galaxy visual magnitude
int Y[N]; # size of globular cluster population
}
parameters{
vector[K] beta; # linear predictor coefficients
real<lower=0> theta;
}
model{
vector[N] mu; # linear predictor

mu = exp(X * beta);

theta ~ gamma(0.001, 0.001);

# likelihood
Y ~ neg_binomial_2(mu, theta);
}
generated quantities{
real dispersion;
vector[N] expY; # mean
vector[N] varY; # variance
vector[N] PRes;
vector[N] mu2;

mu2 = exp(X * beta);
expY = mu2;

for (i in 1:N){
varY[i] = mu2[i] + pow(mu2[i], 2) / theta;
PRes[i] = pow((Y[i] - expY[i]) / sqrt(varY[i]),2);
}

dispersion = sum(PRes) / (N - (K + 1));
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code, data=data, iter=10000, chains=3,
warmup=5000, thin=1, n_jobs=3)

# Output
nlines = 9 # number of lines in screen output

output = str(fit).split('\n')

for item in output[:nlines]:
print(item)

================================================================================

GET SOURCE

Output on screen:

Inference for Stan model: anon_model_723b570e1a19f3dc30e5da8afbc7bc52.
3 chains, each with iter=10000; warmup=5000; thin=1;
post-warmup draws per chain=5000, total post-warmup draws=15000.

mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0] -11.73 5.4e-3 0.33 -12.36 -11.95 -11.73 -11.51 -11.08 3645 1.0
beta[1]    -0.88 2.7e-4 0.02 -0.91 -0.89 -0.88 -0.87   -0.85 3661    1.0
theta 1.1 9.6e-4 0.07 0.97 1.05     1.1 1.15 1.25 5573    1.0
dispersion 1.93 2.2e-3 0.2 1.55 1.78 1.92 2.06 2.36    8955 1.0

HSI

HSI