top of page

From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press

(c) 2017,  Joseph M. Hilbe, Rafael S. de Souza and Emille E. O. Ishida  

 

you are kindly asked to include the complete citation if you used this material in a publication

​

​

Code 5.19 Logistic model in Python using Stan

==============================================

import numpy as np
import pystan
import statsmodels.api as sm

from scipy.stats import uniform, bernoulli

​

# Data
np.random.seed(13979)                                        # set seed to replicate example
nobs= 5000                                                           # number of obs in model 

x1 = bernoulli.rvs(0.6, size=nobs)
x2 = uniform.rvs(size=nobs) 

​

beta0 = 2.0
beta1 = 0.75
beta2 = -5.

​

xb = beta0 + beta1 * x1 + beta2 * x2     
exb = 1.0/(1 + np.exp(-xb))                                 # logit link function

by = bernoulli.rvs(exb, size=nobs)

​

mydata = {}
mydata['Y'] = by
mydata['N'] = nobs
mydata['X'] = sm.add_constant(np.column_stack((x1, x2)))
mydata['K'] = 3
mydata['logN'] = np.log(nobs)


# Fit
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    real logN;
}
parameters{
    vector[K] beta;
}
transformed parameters{
    vector[N] eta;

    eta = X * beta;
}
model{

    Y ~ bernoulli_logit(eta);
}
generated quantities{
    real LLi[N];
    real AIC;
    real BIC;
    real LogL;
    vector[N] etanew;
    real<lower=0, upper=1.0> pnew[N];

    etanew = X * beta;

    for (i in 1:N){ 
        pnew[i] = inv_logit(etanew[i]);
        LLi[i] = bernoulli_lpmf(1|pnew[i]);
    }

    LogL = sum(LLi);
    AIC = -2 * LogL + 2 * K; 
    BIC = -2 * LogL + logN * K; 
}
"""

fit = pystan.stan(model_code=stan_code, data=mydata, iter=10000, chains=3,
                           warmup=5000, n_jobs=1)

​

# Output
lines = list(range(8)) + [2 * nobs + 8, 2 * nobs + 9, 2 * nobs + 10]


output = str(fit).split('\n')

for i in lines:
    print(output[i])   

 

==============================================

Output on screen:

​

Inference for Stan model: anon_model_0d7c02b16b493c2889fb0fbb3160bfc7.
3 chains, each with iter=10000; warmup=5000; thin=1; 
post-warmup draws per chain=5000, total post-warmup draws=15000.

​

                       mean      se_mean             sd        2.5%         25%           50%        75%     97.5%         n_eff   Rhat
beta[0]             1.92          1.0e-3          0.08        1.77          1.87            1.92         1.98       2.08       6192.0     1.0
beta[1]             0.75          7.9e-4          0.07        0.62           0.71           0.75           0.8        0.89      7319.0     1.0
beta[2]            -4.89          1.8e-3          0.14       -5.16         -4.98          -4.89          -4.8       -4.63     6189.0     1.0
AIC              9647.1             1.72      210.86     9239.1      9505.1       9642.1      9789.5      1.0e4      15000     1.0
BIC              9666.7             1.72      210.86     9258.7      9524.6       9661.7      9809.0      1.0e4      15000     1.0
LogL          -4820.0             0.86       105.43       -5030        -4891        -4818       -4749      -4616      15000    1.0

 

bottom of page