JOFC on CxP data

YP, MT, JV, CEP, …

Sat Jun 8 13:36:27 2013

Goal

The long term goal is to discover structure that is meaningful in terms of treatment outcomes.

Data

  1. C-data (time_series_cc200_20130402): C is for connectome, which just refers, in this case, to the fMRI data. The 197 regions are obtained from the 'all voxel' data via parsing voxels into regions. We already know the function relating the 197 to the all voxels case, because we wrote it. It is in the form of \( (region=197)\times (time=194)\times (subj=42) \). Unfortunately, the time-series across subjects are not aligned relative to one another.

  2. P-data (NEO2012.csv): P is short for phenotype, in this case, it is the 5 factor model (\( (subj=42)\times (factor=5) \)), which are the 5 factors: ENOAC (sometimes ordered differently and called OCEAN).

Supplements

  1. NEO2012.csv has 42 subjects and the following columns: ParticipantID,Sex,Age,E,N,O,A,C the last 5 are the “5 factors”

  2. NEO2012_linkage.csv has 42 subjects and 2 columns: the first is the ParticipantID, which is listed in the other file, the second is the file name. so it maps participant id to file, which we need.

  3. ScaledScales.xls: this is the file that had the original data that i extracted NEO2012.csv from. you shouldn't need it, but it is the raw data, so i didn't want to discard it.

C Data

dname <- "http://www.cis.jhu.edu/~parky/CGP/Data/time_series_cc200_20130402/"
urlfiles <- readHTMLTable(dname, skip.rows = 1:2)[[1]]$V2
urlfiles <- urlfiles[!is.na(urlfiles)]
files <- paste0(dname, urlfiles)
(n <- length(files))  # 42

require(flexmix)
coh <- ts <- spect <- spect_norm <- NULL
for (i in 1:length(files)) {
    ts[[i]] <- readMat(files[i])[[1]]  # 194 x 197
    spect[[i]] <- spectrum(t(ts[[i]]))$spec[6:40, ]  # 100 (freq) x 194 => (bandpass) 35 x 194
    spect_norm[[i]] <- scale(spect[[i]], center = FALSE, scale = colSums(spect[[i]]))  # normalization
    coh[[i]] <- KLdiv(spect_norm[[i]])  # Delta: 194 x 194
}

plot of chunk dissmatata

run spectrum (FFT) on ts for each region

plot of chunk unnamed-chunk-2 plot of chunk unnamed-chunk-2

bandpass and normalize it

plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3

calc KLdiv among regions

plot of chunk unnamed-chunk-4

calc normalized Hellinger distance among subjects

dissmat <- matrix(0, n, n)
for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
        dissmat[i, j] <- helldist(coh[[i]], coh[[j]])  # hellinger distance
    }
}
dissmat <- dissmat + t(dissmat)
diag(dissmat) <- 0

distC <- normalize(dissmat)
dim(distC)
plotmat(distC, "subject", "subject", "Hellinger")
hist(distC)
## [1] 42 42

plot of chunk plotC plot of chunk plotC

embed and cluster it

n <- nrow(distC)
mdsC <- cmdscale(distC, n - 1)
elbowC <- getElbows(mdsC, 3)
mcC <- Mclust(mdsC[, 1:elbowC[2]])
plot(mcC, "BIC")

plot of chunk clustC plot of chunk clustC

P Data

require(gdata)
fname <- "http://www.cis.jhu.edu/~parky/CGP/Data/NEO2012.csv"
neo <- read.csv(fname)

pdat <- neo[, 4:8]
distP <- as.matrix(dist(pdat))
plotmat(as.matrix(pdat), "subject", "factor")
distP <- normalize(distP)
dim(distP)

plotmat(distP, "subject", "subject")
hist(distP)

mdsP <- cmdscale(distP, n - 1)
elbowP <- getElbows(mdsP, 3)
mcP <- Mclust(mdsP[, 1:elbowP[3]])
plot(mcP, "BIC")

plot of chunk distPdata

plot of chunk plotP plot of chunk plotP plot of chunk plotP plot of chunk plotP

JOFC

Use distC = distP

distC <- distP
# distC <- jitter(distP,100) plotmat(distC,main='distC =
# jitter(distP,100)') plotmat(distP,main='distP')
# plotmat(abs(distC-distP),main='abs(distC-distP)')

## form omnibus matrix
meanD <- pmax(distC, distP)  #(distC+distP)/2
omni <- rbind(cbind(distC, meanD), cbind(t(meanD), distP))
dim(omni)
## [1] 84 84
## embed
n2 <- n * 2
mds <- cmdscale(omni, n2 - 1)
# w <- 0.01 W1 <- matrix(w,n,n) W2 <- matrix(0,n,n) diag(W2) <- 1-w W <-
# rbind(cbind(W1,W2),cbind(t(W2),W1)) mds <-
# smacofSym(omni,n2-1,weightmat=W)$conf

plot of chunk JOFC-omni-w

elbow <- getElbows(mds, 3)

plot of chunk JOFC-mds

knee <- elbow[1]
dat <- mds[, 1:knee]

plot of chunk JOFC-scatter

permutate P

nummc <- 1000
dist0 <- matrix(0, nummc + 1, n)
set.seed(12345)
for (mc in 1:(nummc + 1)) {
    if (mc == 1) {
        idx <- 1:n
    } else {
        idx <- sample(n)
    }
    distP0 <- distP[idx, idx]

    meanD0 <- pmax(distC, distP0)  #distC * distP0 #(distC+distP0)/2
    omni0 <- rbind(cbind(distC, meanD0), cbind(t(meanD0), distP0))

    dat0 <- cmdscale(omni0, knee)
    dist0[mc, ] <- rowSums((dat0[1:n, ] - dat0[(n + 1):n2, ])^2)
}
dist0 <- apply(dist0, 1, mean)

hist(dist0)
abline(v = dist0[1], col = 2, lty = 2, lwd = 2)

plot of chunk null


sum(dist0 > dist0[1])/nummc
## [1] 1