JOFC on CxP data

YP, MT, JV, CEP, …

Fri Aug 30 11:13:37 2013

Goal

The long term goal is to discover structure that is meaningful in terms of treatment outcomes.

Data

C-data (time_series_cc200_20130402): C is for connectome, which just refers, in this case, to the fMRI data. The 197 regions are obtained from the 'all voxel' data via parsing voxels into regions. We already know the function relating the 197 to the all voxels case, because we wrote it. It is in the form of \((region=197)\times (time=194)\times (subj=42)\). Unfortunately, the time-series across subjects are not aligned relative to one another.
P-data (NEO2012.csv): P is short for phenotype, in this case, it is the 5 factor model (\((subj=42)\times (factor=5)\)), which are the 5 factors: ENOAC (sometimes ordered differently and called OCEAN).

Supplements

NEO2012.csv has 42 subjects and the following columns: ParticipantID,Sex,Age,E,N,O,A,C the last 5 are the “5 factors”
NEO2012_linkage.csv has 42 subjects and 2 columns: the first is the ParticipantID, which is listed in the other file, the second is the file name. so it maps participant id to file, which we need.
ScaledScales.xls: this is the file that had the original data that i extracted NEO2012.csv from. you shouldn't need it, but it is the raw data, so i didn't want to discard it.

## Note: no visible global function definition for 'getWindowsHandle' 
## Note: no visible global function definition for 'getWindowsHandle' 
## Note: no visible global function definition for 'getWindowsHandle'

C Data

dname <- "http://www.cis.jhu.edu/~parky/CGP/Data/time_series_cc200_20130402/"
urlfiles <- readHTMLTable(dname, skip.rows = 1:2)[[1]]$V2
urlfiles <- urlfiles[!is.na(urlfiles)]
files <- paste0(dname, urlfiles)
(n <- length(files))  # 42

require(flexmix)
coh <- ts <- spect <- spect_norm <- NULL
for (i in 1:length(files)) {
    ts[[i]] <- readMat(files[i])[[1]]  # 194 x 197
    spect[[i]] <- spectrum(t(ts[[i]]))$spec[6:40, ]  # 100 (freq) x 194 => (bandpass) 35 x 194
    spect_norm[[i]] <- scale(spect[[i]], center = FALSE, scale = colSums(spect[[i]]))  # normalization
    coh[[i]] <- KLdiv(spect_norm[[i]])  # Delta: 194 x 194
}

# Note: no visible binding for global variable 'fields' 
# Note: no visible binding for global variable 'RColorBrewer' 
# Note: no visible global function definition for 'brewer.pal' 
# Note: no visible global function definition for 'image.plot'

plot of chunk dissmatata

run spectrum (FFT) on ts for each region

plot of chunk unnamed-chunk-2

bandpass and normalize it

plot of chunk unnamed-chunk-3

calc KLdiv among regions

plot of chunk unnamed-chunk-4

calc normalized Hellinger distance among subjects

dissmat <- matrix(0, n, n)
for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
        dissmat[i, j] <- helldist(coh[[i]], coh[[j]])  # hellinger distance
    }
}
dissmat <- dissmat + t(dissmat)
diag(dissmat) <- 0

distC <- normalize(dissmat)

# [1] 42 42

plot of chunk plotC

embed and cluster it

n <- nrow(distC)
mdsC <- cmdscale(distC, n - 1)
elbowC <- getElbows(mdsC, 3)

plot of chunk unnamed-chunk-6

mcC <- Mclust(mdsC[, 1:elbowC[2]])
plot(mcC, "BIC")

plot of chunk unnamed-chunk-6

P Data

require(gdata)
fname <- "http://www.cis.jhu.edu/~parky/CGP/Data/NEO2012.csv"
neo <- read.csv(fname)

pdat <- neo[, 4:8]
distP <- as.matrix(dist(pdat))
plotmat(as.matrix(pdat), "subject", "factor")
distP <- normalize(distP)
dim(distP)

plotmat(distP, "subject", "subject")
hist(distP)

mdsP <- cmdscale(distP, n - 1)
elbowP <- getElbows(mdsP, 3)
mcP <- Mclust(mdsP[, 1:elbowP[3]])
plot(mcP, "BIC")

plot of chunk distPdata

plot of chunk plotP

JOFC

# distC <- distP set.seed(123) distC <- jitter(distP,17000)
# plotmat(distC,main='distC = jitter(distP,100)')
# plotmat(distP,main='distP')
# plotmat(abs(distC-distP),main='abs(distC-distP)')

## form omnibus matrix
meanD <- (distC + distP)/2
omni <- rbind(cbind(distC, meanD), cbind(t(meanD), distP))
dim(omni)

# [1] 84 84

## embed
n2 <- n * 2
mds <- cmdscale(omni, n2 - 1)

plot of chunk JOFC-omni-w

elbow <- getElbows(mds, 3)

plot of chunk JOFC-mds

knee <- elbow[1]
dat <- mds[, 1:knee]

plot of chunk JOFC-scatter

Validation

the null (ie, truly matched subjects) and alternative holdout pdf estimates of the oos distances!

loop through the data, embedding but holding out two matched pairs. call these two matched pairs (i1,i2) and (j1,j2).
- oos embed (i1,i2) under the null – that is, assuming they are a matched pair – and record d(i1,i2) in the embedded space.
- oos embed (j1,j2) under the null – that is, assuming they are a matched pair – and record d(j1,j2) in the embedded space.
- oos embed (i1,j2) under the null – that is, assuming they are a matched pair – and record d(i1,j2) in the embedded space.
- oos embed (j1,i2) under the null – that is, assuming they are a matched pair – and record d(j1,i2) in the embedded space.
calc null pdf estimates of the first two oos distances. calc alternative pdf estimates of the last two oos distances.
plot these two kernel estimates together on the same plot – null in blue, alt in red.

set.seed(12345)
nmc <- 1000
out <- foreach(mc = 1:nmc, .combine = "rbind") %dopar% {
    ij1 <- sample(1:n, 2, replace = FALSE)  # i1, j1
    ij2 <- ij1 + n  # i2, j2
    ij <- c(ij1, ij2)

    ## 'joint' oos
    Y <- fast.oos2(omni[-ij, -ij], knee, omni[ij, -ij])
    DY <- as.matrix(dist(Y$X.oos))
    D0 <- c(DY[1, 3], DY[2, 4])  # d(i1,i2), d(j1,j2)
    DA <- c(DY[1, 4], DY[2, 3])  # d(i1,j2), d(j1,i2)
    c(D0, DA)
}

plot of chunk plotpdf

plot of chunk plotpwr

Smacof OOS Experiment 1

n2 <- n * 2

## omni
meanD <- matrix(1, n, n) - diag(1, n, n)
omni1 <- rbind(cbind(distC, meanD), cbind(t(meanD), distP))
plotmat(omni1, main = expression(omni[1]))

plot of chunk W1


## smacof W
Wa <- matrix(1, n, n)
Wb <- matrix(0, n, n)
diag(Wb) <- 1
W1 <- rbind(cbind(Wa, Wb), cbind(t(Wb), Wa))  # 84 x 84
plotmat(W1, main = expression(W[1]))

plot of chunk W1


## smacof embedding of all 2*n points
knee <- 2
smacof <- smacofSym(omni1, knee, weightmat = W1)$conf
lab <- factor(rep(c("C", "P"), each = n))
plot(smacof[, 1], smacof[, 2], col = as.numeric(lab), pch = as.numeric(lab), 
    main = "C vs P via smacof embedding")
segments(smacof[1:n, 1], smacof[1:n, 2], smacof[(n + 1):n2, 1], smacof[(n + 
    1):n2, 2], col = "orange")
legend("topright", c("C", "P"), col = 1:2, pch = 1:2)

plot of chunk W1

K <- 1
Pjhat <- matrix(0, n, knee)
for (i in 1:n) {
    j <- i + n  # matching P
    ij <- c(i, j)  ## (Ci,Pj) pair

    ## in sample without (Ci,Pj) pair
    Dij <- omni1[-ij, -ij]
    Wij <- W1[-ij, -ij]
    Ci <- smacof[i, ]
    Xij <- smacof[-ij, ]

    X <- smacofSym(Dij, knee, weightmat = Wij, init = Xij)$conf  # 82 x 5
    Chat <- X[1:(n - 1), ]
    Phat <- X[-c(1:(n - 1)), ]

    ## oos Ci only
    Di <- omni1[-j, -j]
    Wi <- matrix(0, nrow(omni1), ncol = ncol(omni1))
    Wi[i, 1:n] <- 1
    Wi[1:n, i] <- 1
    Wi <- Wi[-j, -j]
    within <- rep(1, n2)
    within[ij] <- 0
    Cihat[i, ] <- oosIM(Di, X, W = Wi, init = Ci, isWithin = within[-j])

    ## estimate Pj
    dChat <- outer(rowSums(Cihat^2), rowSums(Chat^2), "+") - (2 * Cihat %*% 
        t(Chat))
    dChat <- sqrt((dChat + abs(dChat))/2)
    ind <- order(dChat)  # index of k-NN from Cihat to Chat
    Pjhat[i, ] <- colMeans(Phat[ind[1:K], , drop = FALSE])
}

plot of chunk load1

JOFC on CxP data

YP, MT, JV, CEP, …

Goal

Data

Supplements

C Data

run spectrum (FFT) on ts for each region

bandpass and normalize it

calc KLdiv among regions

calc normalized Hellinger distance among subjects

embed and cluster it

P Data

JOFC

Validation

Smacof OOS Experiment 1

html

Smacof OOS Experiment 2