Modeling PLC for early contact binaries in Python using Stan

From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press

you are kindly asked to include the complete citation if you used this material in a publication

Code 10.6 Multivariate Gaussian mixed model in Python, using Stan, for accessing the relationship between luminosity, period, and color in early-type contact binaries

===================================================================================

import numpy as np
import pandas as pd
import pystan
import statsmodels.api as sm

# Data
path_to_data = 'https://raw.githubusercontent.com/astrobayes/BMAD/master/data/Section_10p3/PLC.csv'

# read data
data_frame = dict(pd.read_csv(path_to_data))

# prepare data for Stan
data = {}
data['x1'] = np.array(data_frame['logP'])
data['x2'] = np.array(data_frame['V_I'])
data['y'] = np.array(data_frame['M_V'])
data['nobs'] = len(data['x1'])
data['type'] = np.array([1 if item == data_frame['type'][0] else 0
for item in data_frame['type']])
data['M'] = 3
data['K'] = data['M'] - 1

# Fit
stan_code="""
data{
int<lower=0> nobs; # number of data points
int<lower=1> M; # number of linear predicor coefficients
int<lower=1> K; # number of distinct populations
vector[nobs] x1; # obs log period
vector[nobs] x2; # obs color V-I
vector[nobs] y; # obs luminosity
int type[nobs]; # system type (near/genuine contact)
}
parameters{
matrix[M,K] beta; # linear predictor coefficients
real<lower=0> sigma[K]; # scatter around linear predictor
real mu0;
real sigma0;
}
model{
vector[nobs] mu; # linear predictor

for (i in 1:nobs) {
if (type[i] == type[1])
mu[i] = beta[1,2] + beta[2,2] * x1[i] + beta[3,2] * x2[i];
else mu[i] = beta[1,1] + beta[2,1] * x1[i] + beta[3,1] * x2[i];
}

# priors and likelihood
mu0 ~ normal(0, 100);
sigma0 ~ gamma(0.001, 0.001);

for (i in 1:K) {
sigma[i] ~ gamma(0.001, 0.001);
for (j in 1:M) beta[j,i] ~ normal(mu0,sigma0);
}

for (i in 1:nobs){
if (type[i] == type[1]) y[i] ~ normal(mu[i], sigma[2]);
else y[i] ~ normal(mu[i], sigma[1]);
}
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code, data=data, iter=5000, chains=3,
warmup=2500, thin=1, n_jobs=3)

# Output
nlines = 13 # number of lines in screen output

output = str(fit).split('\n')

for item in output[:nlines]:
print(item)

===================================================================================

GET SOURCE

Output on screen:

Inference for Stan model: anon_model_fa17b80801723fc7926798026c342239.
3 chains, each with iter=5000; warmup=2500; thin=1;
post-warmup draws per chain=2500, total post-warmup draws=7500.

mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta[0,0] -1.01 3.9e-3    0.26   -1.53 -1.17 -1.01 -0.83 -0.5   4541 1.0
beta[1,0] -3.31 0.01 0.94 -5.19 -3.92 -3.3 -2.69 -1.45 6476 1.0
beta[2,0]    7.26 0.02 1.23 4.85 6.45 7.28 8.06 9.66 5041 1.0
beta[0,1]    -0.41   2.2e-3 0.15 -0.72 -0.51 -0.41 -0.32 -0.11 4684 1.0
beta[1,1] -3.19   7.2e-3 0.57 -4.31 -3.57 -3.19 -2.79 -2.09 6266 1.0
beta[2,1] 8.48 0.01 0.79 6.89 7.97 8.48 8.99   10.06 4834 1.0
sigma[0] 0.62 1.2e-3   0.09 0.47 0.55 0.61 0.67 0.82 5800 1.0
sigma[1] 0.42 6.6e-4 0.05 0.34 0.39 0.42 0.46 0.54 6344 1.0

HSI

HSI