CCC on Mitya's Data

YP & & VL & DS & MT & NL & CEP & …
Department of Applied Mathematics and Statistics
Johns Hopkins University
Fri Mar 6 08:18:19 2015

Cleaning the data

#fname <- url("http://www.cis.jhu.edu/~parky/Data/drosophila_retina_1.graphml")
#g <- read.graph(fname, format="graphml")
fname <- "drosophila_retina.Rbin"
load(fname); g <- upgrade_graph(g)
summary(g)

# IGRAPH DN-- 1781 33641 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n), pre.x (e/n), post.x (e/n), pre.y (e/n), post.y
# | (e/n), pre.z (e/n), post.z (e/n), Proofreading.Details (e/c)

plot of chunk plot-orig-g-deg

Fig 1.

g <- simplify(g)      # make g unweighted & hollow
g <- as.undirected(g) # make g undirected (symmetric)

# IGRAPH UN-- 1781 8911 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n)

plot of chunk reorder

Fig 2.

## remove the argmax(deg)
maxdv <- which.max(degree(g))
g <- delete.vertices(g,maxdv)

## find the largest connected component (lcc)
cc <- clusters(g)
table(cc$memb)

# 
#    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
# 1748    1    1    1    1    1    1    1    3    2    1    1    1    1    1 
#   16   17   18   19   20   21   22   23   24   25   26 
#    2    2    2    1    1    1    1    2    1    1    1

g <- induced.subgraph(g, which(cc$memb == which.max(cc$csize)))
A <- g[]
summary(g)

# IGRAPH UN-- 1748 7977 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n)

plot of chunk plot-clean-A

Fig 3.

plot of chunk plot-clean-g

Fig 4.

Inference

Given the Fly Data Graph \(G\),
\(skmeans \circ ase(G) \rightarrow \hat{H}_r, r=1,\ldots,\hat{R}\),
\(\hat{R} =\) 8; subgraph orders = 184, 214, 228, 365, 232, 148, 138, 239,
plot of chunk skmeans-A

Fig 5.

plot of chunk skmeans-g

Fig 6.

Fig 7.

Fig 8.

    M <- get.vertex.attribute(g,"M")
    adjustedRandIndex(M,membp)

# [1] 0.2040246

#                M1       M2       M3       M4       M5       M6       M7 
#  166.000  299.000  409.000  549.000  589.000  669.000  729.000  879.000 
#       M8       M9      M10 
# 1039.000 1249.000 1378.111

plot of chunk layer2

Fig 9.

Take the largest connected componets of each of 8 \(\hat{H}_r\), 173, 193, 217, 363, 224, 114, 126, 198

Fig 10.

Fig 11.

Now, compute kernel-based distance for motif detection:

aa <- reembed(g, 5, membp)
S <- computeS(aa, 0.2)
rownames(S) <- colnames(S) <- 1:pamkout$nc
image2(S,text.cex=0.8,round=0,srt=0)

plot of chunk kernel

bb <- pamk(S, krange = 1:(pamkout$nc-1), diss = TRUE)
bb$pamobject$clustering

# 1 2 3 4 5 6 7 8 
# 1 1 1 2 1 1 1 2

Fig 12.

plot of chunk asw-plot

Fig 13.

Fig 14.