HSI
From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press
(c) 2017, Joseph M. Hilbe, Rafael S. de Souza and Emille E. O. Ishida
you are kindly asked to include the complete citation if you used this material in a publication
​
​
Code 10.6 Multivariate Gaussian mixed model in Python, using Stan, for accessing the relationship between luminosity, period, and color in early-type contact binaries
===================================================================================
import numpy as np
import pandas as pd
import pystan
import statsmodels.api as sm
​
# Data
path_to_data = 'https://raw.githubusercontent.com/astrobayes/BMAD/master/data/Section_10p3/PLC.csv'
​
# read data
data_frame = dict(pd.read_csv(path_to_data))
​
# prepare data for Stan
data = {}
data['x1'] = np.array(data_frame['logP'])
data['x2'] = np.array(data_frame['V_I'])
data['y'] = np.array(data_frame['M_V'])
data['nobs'] = len(data['x1'])
data['type'] = np.array([1 if item == data_frame['type'][0] else 0
for item in data_frame['type']])
data['M'] = 3
data['K'] = data['M'] - 1
​
# Fit
stan_code="""
data{
int<lower=0> nobs; # number of data points
int<lower=1> M; # number of linear predicor coefficients
int<lower=1> K; # number of distinct populations
vector[nobs] x1; # obs log period
vector[nobs] x2; # obs color V-I
vector[nobs] y; # obs luminosity
int type[nobs]; # system type (near/genuine contact)
}
parameters{
matrix[M,K] beta; # linear predictor coefficients
real<lower=0> sigma[K]; # scatter around linear predictor
real mu0;
real sigma0;
}
model{
vector[nobs] mu; # linear predictor
​
for (i in 1:nobs) {
if (type[i] == type[1])
mu[i] = beta[1,2] + beta[2,2] * x1[i] + beta[3,2] * x2[i];
else mu[i] = beta[1,1] + beta[2,1] * x1[i] + beta[3,1] * x2[i];
}
​
# priors and likelihood
mu0 ~ normal(0, 100);
sigma0 ~ gamma(0.001, 0.001);
​
for (i in 1:K) {
sigma[i] ~ gamma(0.001, 0.001);
for (j in 1:M) beta[j,i] ~ normal(mu0,sigma0);
}
​
for (i in 1:nobs){
if (type[i] == type[1]) y[i] ~ normal(mu[i], sigma[2]);
else y[i] ~ normal(mu[i], sigma[1]);
}
}
"""
​
# Run mcmc
fit = pystan.stan(model_code=stan_code, data=data, iter=5000, chains=3,
warmup=2500, thin=1, n_jobs=3)
​
# Output
nlines = 13 # number of lines in screen output
​
output = str(fit).split('\n')
for item in output[:nlines]:
print(item)
===================================================================================
Output on screen:
​
Inference for Stan model: anon_model_fa17b80801723fc7926798026c342239.
3 chains, each with iter=5000; warmup=2500; thin=1;
post-warmup draws per chain=2500, total post-warmup draws=7500.
​
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0,0] -1.01 3.9e-3 0.26 -1.53 -1.17 -1.01 -0.83 -0.5 4541 1.0
beta[1,0] -3.31 0.01 0.94 -5.19 -3.92 -3.3 -2.69 -1.45 6476 1.0
beta[2,0] 7.26 0.02 1.23 4.85 6.45 7.28 8.06 9.66 5041 1.0
beta[0,1] -0.41 2.2e-3 0.15 -0.72 -0.51 -0.41 -0.32 -0.11 4684 1.0
beta[1,1] -3.19 7.2e-3 0.57 -4.31 -3.57 -3.19 -2.79 -2.09 6266 1.0
beta[2,1] 8.48 0.01 0.79 6.89 7.97 8.48 8.99 10.06 4834 1.0
sigma[0] 0.62 1.2e-3 0.09 0.47 0.55 0.61 0.67 0.82 5800 1.0
sigma[1] 0.42 6.6e-4 0.05 0.34 0.39 0.42 0.46 0.54 6344 1.0