# read in data #### load('horizon_regressions_output.Rdata') # previous data processing #### # load('horizon_preprocessed_data.Rdata') # rm(quest_data) # rm(trial_data) # rm(trial_data_pre) # trial_data_pre=read.csv(paste0(data_path,'horizon_trial_data_2021-04-13.csv'), # stringsAsFactors=F,header=T) # load(paste0(data_path,'scripts/preprocessing/excluded_subjects.Rdata')) # horizon_exclude_IDs=unique(c(flag_horizon_IDs,flag_questionnaire_consistency, # demo_flag_IDs,dass_flag_IDs,oasis_flag_IDs,flag_grid_skips)) # trial_data=trial_data_pre[!(trial_data_pre$ID %in% horizon_exclude_IDs),] # trial_data$valence=ifelse(trial_data$outcome_valenced>0,1,0) # trial_data$valence=factor(trial_data$valence,labels=c('loss','gain')) # trial_data$valence=relevel(trial_data$valence,ref="gain") # trial_data$correct=ifelse(trial_data$valence=='gain', # ifelse(trial_data$Rvalue>trial_data$Lvalue, # ifelse(trial_data$Rchosen==1,1,0), # ifelse(trial_data$Rchosen==0,1,0)), # ifelse(trial_data$Rvalue>trial_data$Lvalue, # ifelse(trial_data$Rchosen==0,1,0), # ifelse(trial_data$Rchosen==1,1,0))) # trial_data$free_trial=ifelse(trial_data$trial_block<5,NA,trial_data$trial_block-4) # trial_data$times_Rchosen=NA # trial_data$uncR=-1*trial_data$uncertainty #coded 1/-1 if R/L chosen more, should be -1/1 # for (t in 1:dim(trial_data)[1]) { # if (!is.na(trial_data$free_trial[t]) & trial_data$free_trial[t]==1) { # trial_data$times_Rchosen[t]=trial_data$Rchosen[t] # } else if (!is.na(trial_data$free_trial[t]) & trial_data$free_trial[t]>1) { # trial_data$times_Rchosen[t]=trial_data$times_Rchosen[t-1]+ # trial_data$Rchosen[t] # } # } # trial_data$diff_Rchosen=ifelse(is.na(trial_data$free_trial),NA, # 2*trial_data$times_Rchosen-trial_data$free_trial) # times R - (total - times R) # trial_data$mean_diff=trial_data$Rvalue-trial_data$Lvalue # trial_data$mean_diff_valenced=ifelse(trial_data$valence=='gain',trial_data$mean_diff, # -1*trial_data$mean_diff) # trial_data$horizon=as.factor(trial_data$horizon) # quest_data=read.csv(file=paste0(data_path,'merged_questionnaire_data_2021-04-13.csv')) # trial_quest_data=merge(trial_data,quest_data,by.x='ID',by.y='subject') # trial_quest_data$med_anx=as.factor(ifelse(trial_quest_data$dass_anx<14,0,1)) # trial_quest_data_ff=trial_quest_data[trial_quest_data$free_trial==1,] # save.image('horizon_preprocessed_data.Rdata') # plot data #### library(ggplot2) trial_quest_data_ff_avg=aggregate(Rchosen~horizon*valence*uncR, data=trial_quest_data_ff,FUN=mean) ggplot(trial_quest_data_ff_avg,aes(y=Rchosen,x=as.factor(uncR), color=interaction(valence,horizon)))+ geom_point(size=20,shape = '_')+ labs(x='Relative Uncertainty of Right-sided Option', y='Proportion Right-sided Option Chosen')+ theme_classic()+geom_hline(yintercept=0.5) # frequentist analyses #### library(lme4) lm3=glmer(Rchosen~horizon*valence*(uncR+scale(mean_diff))+ (horizon*valence*(uncR+scale(mean_diff))|ID),data=trial_quest_data_ff, family='binomial', control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=100000))) summary(lm3) # bayesian analyses #### library(brms) library(bayesplot) library(rstan) library(matrixStats) #stan doesn't like NA in data trial_quest_data_ff=trial_quest_data_ff[!is.na(trial_quest_data_ff$Rchosen),] #start with intercept-only model #get default priors: a bit wide but should work get_prior(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+(1|ID)) #run intercept-only model intercept_only=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+(1|ID), iter=3000,warmup=500,chains=3,cores=3,seed=123) #diagnostics: check_hmc_diagnostics(intercept_only$fit) traceplot(intercept_only$fit,pars=c('b_Intercept','sd_1')) #plot model parameters: quick and dirty way fit_io=extract(intercept_only$fit) plot(density(fit_io$b_Intercept),xlim=c(-0.1,0.35)) abline(v=0) plot(density(fit_io$sd_1)) plot(seq(1,190,by=1),colMedians(fit_io$r_1_1),pch=19,col='blue', ylab='Intercept Value',xlab='Participant Number') abline(h=median(fit_io$b_Intercept),lwd=3) #get raw stan code io_code=make_stancode(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+(1|ID),save_model='intercept_only.stan') #another way of running this - helpful for understanding steps/debugging # io_code=make_stancode(data=trial_quest_data_ff,family=bernoulli, # Rchosen~1+(1|ID)) # io_data=make_standata(data=trial_quest_data_ff,family=bernoulli, # Rchosen~1+(1|ID)) # io_model=stan_model(model_code=io_code) # io_fit=sampling(io_model,data=io_data, # iter=3000,warmup=500,chains=3,cores=1,seed=123) #prior sensitivity check - narrow prior for intercept intercept_only_narrow=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+(1|ID), prior=c(prior(student_t(3,0,1),class=Intercept)), iter=3000,warmup=500,chains=3,cores=3,seed=123) fit_io_narrow=extract(intercept_only_narrow$fit) #compare output with different priors to see if it changes things plot(density(fit_io_narrow$b_Intercept),xlim=c(-0.1,0.35),ylim=c(0,16)) abline(v=0) par(new=TRUE) plot(density(fit_io$b_Intercept),xlim=c(-0.1,0.35),col='red',ylim=c(0,16)) plot(density(fit_io_narrow$sd_1),xlim=c(0.15,0.5),ylim=c(0,16)) par(new=TRUE) plot(density(fit_io$sd_1),col='red',xlim=c(0.15,0.5),ylim=c(0,16)) plot(seq(1,190,by=1),colMedians(fit_io_narrow$r_1_1),pch=19,col='blue', ylab='Intercept Value',xlab='Participant Number') abline(h=median(fit_io_narrow$b_Intercept),lwd=3) points(seq(1,190,by=1),colMedians(fit_io$r_1_1),pch=19,col=rgb(1,0,0,0.3)) #build up model - abbreviated version #first, test whether allowing effect of uncertainty to vary by person improves #fit uncertainty_only=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~uncR+(1|ID), iter=3000,warmup=500,chains=3,cores=3,seed=123) print(summary(uncertainty_only$fit,pars=c('b_Intercept','b'))$summary) uncertainty_ind=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~uncR+(1+uncR|ID), iter=3000,warmup=500,chains=3,cores=3,seed=123) print(summary(uncertainty_ind$fit,pars=c('b_Intercept','b'))$summary) #example of comparing fit: loo_compare sorts so best fitting model is on top uncertainty_only=add_criterion(uncertainty_only,"waic") uncertainty_ind=add_criterion(uncertainty_ind,"waic") loo_compare(uncertainty_only,uncertainty_ind,criterion="waic") #better to start with simple model and build up in case deubugging is needed. but, # for this example, skip to the full model full_code=make_stancode(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+horizon*valence*(uncR+scale(mean_diff))+ (1+horizon*valence*(uncR+scale(mean_diff))|ID), save_model='full_model.stan') full_model=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+horizon*valence*(uncR+scale(mean_diff))+ (1+horizon*valence*(uncR+scale(mean_diff))|ID), iter=3000,warmup=500,chains=3,cores=3,seed=123) check_hmc_diagnostics(full_model$fit) print(summary(full_model$fit,pars=c('b_Intercept','b'))$summary) #fancier plotting with bayesplot::mcmc_intervals mcmc_intervals(full_model,pars=c('b_Intercept','b_horizon10','b_valenceloss', 'b_uncR','b_scalemean_diff','b_horizon10:valenceloss','b_horizon10:uncR', 'b_horizon10:scalemean_diff','b_valenceloss:uncR', 'b_valenceloss:scalemean_diff','b_horizon10:valenceloss:uncR', 'b_horizon10:valenceloss:scalemean_diff'),prob_outer = 0.95) mcmc_intervals(full_model,pars=c('b_uncR','b_horizon10:uncR', 'b_valenceloss:uncR','b_horizon10:valenceloss:uncR'),prob_outer = 0.95) #how strong of evidence? calculate proportion of samples in a direction # total # samples = #chains*(total iterations-warmup iterations) = 3*(3000-500)=7500 fit_full=extract(full_model$fit,pars=c('b_Intercept','b_horizon10','b_valenceloss', 'b_uncR','b_scalemean_diff','b_horizon10:valenceloss','b_horizon10:uncR', 'b_horizon10:scalemean_diff','b_valenceloss:uncR', 'b_valenceloss:scalemean_diff','b_horizon10:valenceloss:uncR', 'b_horizon10:valenceloss:scalemean_diff')) sum(fit_full$`b_horizon10:uncR`>0)/7500 #100% sum(fit_full$b_uncR>0)/7500 #98% #look at correlations among effects fit_full_corr=extract(full_model$fit,pars='cor_1')$cor_1 hist(rowMedians(fit_full_corr),main='median correlation among participant-level predictors') fit_full_popcorr=vcov(full_model,correlation=TRUE) hist(fit_full_popcorr,main='median correlation among population-level predictors') #predict reponses from fitted model & compare to empirical data plotted above pp_full=posterior_predict(full_model) pp_full_median=colMedians(pp_full) trial_quest_data_ff_pred=trial_quest_data_ff trial_quest_data_ff_pred$Rchosen=pp_full_median trial_quest_data_ff_pred$predict=1 trial_quest_data_ff$predict=0 trial_quest_data_ff_comb=rbind(trial_quest_data_ff,trial_quest_data_ff_pred) trial_quest_data_ff_comb$predict=as.factor(trial_quest_data_ff_comb$predict) levels(trial_quest_data_ff_comb$predict)=c("empirical","predicted") trial_quest_data_ff_avgpred=aggregate(Rchosen~horizon*valence*uncR*predict, data=trial_quest_data_ff_comb,FUN=mean) ggplot(trial_quest_data_ff_avgpred,aes(y=Rchosen,x=as.factor(uncR), color=interaction(valence,horizon)))+facet_wrap(~predict)+ geom_point(size=20,shape = '_')+ labs(x='Relative Uncertainty of Right-sided Option', y='Proportion Right-sided Option Chosen')+ theme_classic()+geom_hline(yintercept=0.5) # extra: add L1 predictor full_anx_code=make_stancode(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+scale(dass_anx)*horizon*valence*(uncR+scale(mean_diff))+ (1+horizon*valence*(uncR+scale(mean_diff))|ID), save_model='full_anx_model.stan') full_anx_model=brm(data=trial_quest_data_ff,family=bernoulli, Rchosen~1+scale(dass_anx)*horizon*valence*(uncR+scale(mean_diff))+ (1+horizon*valence*(uncR+scale(mean_diff))|ID), iter=3000,warmup=500,chains=3,cores=3,seed=123) check_hmc_diagnostics(full_anx_model$fit) print(summary(full_anx_model$fit,pars=c('b_Intercept','b'))$summary) mcmc_intervals(full_anx_model,pars=c('b_uncR','b_horizon10:uncR', 'b_valenceloss:uncR','b_horizon10:valenceloss:uncR','b_scaledass_anx:uncR', 'b_scaledass_anx:horizon10:uncR','b_scaledass_anx:valenceloss:uncR', 'b_horizon10:valenceloss:uncR','b_scaledass_anx:horizon10:valenceloss:uncR'), prob_outer = 0.95) save.image('horizon_regressions_output.Rdata')