## Cleaning the data

#fname <- url("http://www.cis.jhu.edu/~parky/Data/drosophila_retina_1.graphml")
#g <- read.graph(fname, format="graphml")
fname <- "drosophila_retina.Rbin"
load(fname); g <- upgrade_graph(g)
summary(g)
# IGRAPH DN-- 1781 33641 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n), pre.x (e/n), post.x (e/n), pre.y (e/n), post.y
# | (e/n), pre.z (e/n), post.z (e/n), Proofreading.Details (e/c)
Figure 1: original adjacency matrix

g <- simplify(g)      # make g unweighted & hollow
g <- as.undirected(g) # make g undirected (symmetric)
# IGRAPH UN-- 1781 8911 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n)
Figure 2: adjacency matrix reordered by z location

## remove the argmax(deg)
maxdv <- which.max(degree(g))
g <- delete.vertices(g,maxdv)

## find the largest connected component (lcc)
cc <- clusters(g)
table(cc$memb)
# 
#    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
# 1748    1    1    1    1    1    1    1    3    2    1    1    1    1    1 
#   16   17   18   19   20   21   22   23   24   25   26 
#    2    2    2    1    1    1    1    2    1    1    1
g <- induced.subgraph(g, which(cc$memb == which.max(cc$csize)))
A <- g[]
summary(g)
# IGRAPH UN-- 1748 7977 -- 
# + attr: source (g/c), info (g/c), name (v/c), x (v/n), y (v/n), z
# | (v/n), M (v/n)
Figure 3: adjacency matrix after clean up

Figure 4: a graph layout

0.1 Inference

Given the Fly Data Graph \(G\),
\(vlclust \circ ase(G) \rightarrow \hat{H}_r, r=1,\ldots,\hat{R}\),
\(\hat{R} =\) 8; subgraph orders = 179, 190, 268, 163, 392, 143, 174, 239,

dhat <- 13
Rmax <- floor(dhat * 1.5)
cl.out <- vlclustpar(sXhat,krange=2:Rmax,verbose=FALSE,clusinfo=TRUE) # to choose Rhat
(Rhat <- dim_select(cl.out$asw))
# [1] 8
cl.out <- vlclust(sXhat,R=Rhat,seed=1234,verbose=FALSE,clusinfo=TRUE)
membp <- cl.out$memb
#cl.out$clusinfo
mycol2 <- rainbow(Rhat)
plotmemb(A,membp,main="",drawborder=TRUE,lwd=.5,lcol=mycol2,lwdb=2)
Figure 5: adjacency matrix organized by clustering

Figure 6: the graph colored by clustering

Take the largest connected componets of each of 8 \(\hat{H}_r\), and compute kernel-based distance for motif detection:

set.seed(12345) # 145 for 12
aa <- reembed(g, 5, membp)
S <- computeS(aa, 0.2)
rownames(S) <- colnames(S) <- 1:Rhat
#image2(S,text.cex=0.8,round=0,srt=0)
bb <- pamk(S, krange = 1:(Rhat-1), diss = TRUE)
bb$pamobject$clustering
# 1 2 3 4 5 6 7 8 
# 1 2 1 2 2 1 1 2
Figure 8: average silhouette plot

Figure 9: heatmap of Shat