Example for simulating and running the iCARH model

library(iCARH)

## Loading required package: rstan

## Loading required package: StanHeaders

## 
## rstan version 2.32.6 (Stan version 2.32.2)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## Loading required package: MASS

## Loading required package: ggplot2

## Loading required package: glue

library(abind)

Tp=4L # timepoints
N=10L # number of samples
J=14L # number of metabolites
K=2L # number of bacteria species
P=8L  # number of pathways

set.seed(12473)

For real data Build pathway matrices using iCARH.getPathwaysMat. Elements in KEGG id list may contain multiple KEGG ids per metabolite. If KEGG id unknown use : “Unk[number]”.

keggid = list("Unk1", "C03299","Unk2","Unk3",
 c("C08363", "C00712")  # allowing multiple ids per metabolite
 )
 pathways = iCARH.getPathwaysMat(keggid, "rno")

To simulate data use iCARH.simulate. Use path.names to manually choose pathways or simply specify the expected proportion of metabolites per pathway via path.probs.

# Example of manually picked pathways
# path.names = c("path:map00564","path:map00590","path:map00061","path:map00591",
#               "path:map00592","path:map00600","path:map01040","path:map00563")
# Specify expected proportion of metabolites per pathway
path.probs = 0.8
data.sim = iCARH.simulate(Tp, N, J, P, K, path.probs = 0.8, Zgroupeff=c(0,4),
                          beta.val=c(1,-1,0.5, -0.5))

XX = data.sim$XX
Y = data.sim$Y
Z = data.sim$Z
pathways = data.sim$pathways
XX[2,2,2] = NA #missing value example

Check inaccuracies between covariance and design matrices

pathways.bin = lapply(pathways, function(x) { y=1/(x+1); diag(y)=0; y})
adjmat = rowSums(abind::abind(pathways.bin, along = 3), dims=2)
cor.thresh = 0.7
# check number of metabolites in same pathway but not correlated
for(i in 1:Tp) print(sum(abs(cor(XX[i,,])[which(adjmat>0)])<cor.thresh))

## [1] 50
## [1] NA
## [1] 44
## [1] 66

Run iCARH model.

rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 2)
# For testing, does not converge
fit.sim = iCARH.model(XX, Y, Z, groups=rep(c(0,1), each=5), pathways = pathways, control = list(max_treedepth=8),
                     iter = 4, chains = 1)

## 
## SAMPLING FOR MODEL 'iCARH' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.001261 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 12.61 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: No variance estimation is
## Chain 1:          performed for num_warmup < 20
## Chain 1: 
## Chain 1: Iteration: 1 / 4 [ 25%]  (Warmup)
## Chain 1: Iteration: 2 / 4 [ 50%]  (Warmup)
## Chain 1: Iteration: 3 / 4 [ 75%]  (Sampling)
## Chain 1: Iteration: 4 / 4 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.006 seconds (Warm-up)
## Chain 1:                0.004 seconds (Sampling)
## Chain 1:                0.01 seconds (Total)
## Chain 1: 
## [1] "Stan warning: simpleWarning: There were 2 divergent transitions after warmup. See\nhttps://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup\nto find out why this is a problem and how to eliminate them.\n"

# Not run
# fit.sim = iCARH.model(XX, Y, Z, pathways, control = list(adapt_delta = 0.99, max_treedepth=10),
#                      iter = 2000, chains = 2, pars=c("YY","Xmis","Ymis"), include=F)

Check convergence

if(!is.null(fit.sim$icarh)){
  rhats = iCARH.checkRhats(fit.sim)
}

Processing results. Bacteria effects.

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotBeta(fit.sim)
  gplot
}

Treatments effects

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotTreatmentEffect(fit.sim)
  gplot
}

Pathway analysis

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotPathwayPerturbation(fit.sim, path.names=names(data.sim$pathways))
  gplot
}

Normality assumptions

if(!is.null(fit.sim$icarh)){
  par(mfrow=c(1,2))
  iCARH.checkNormality(fit.sim)
}

WAIC

if(!is.null(fit.sim$icarh)){
  waic = iCARH.waic(fit.sim)
  waic
}

Posterior predictive checks MAD : mean absolute deviation between covariance matrices

if(!is.null(fit.sim$icarh)){
  mad = iCARH.mad(fit.sim)
  quantile(mad)
}

Get missing data

if(!is.null(fit.sim$icarh)){
  imp = iCARH.getDataImputation(fit.sim)
}