#! /usr/bin/Rscript
###
### Exploritory analysis of core vertices between the high school
### friendship and facebook networks. For one of the shared vertices
### in the facebook network, v, we randomly select 3 seeds from a 2-hop
### neighborhood around v so as to form the
### seed set S and then obtain the corresponding seed set S' in the
### survey network. We then match N_2(S) and N_2(S').
### We then add some excess vertices (10 from each Facebook unshared and
### Survey unshared, then all vertices) and using the same seed sets
### again match the 2-hop neighborhoods around the seed sets and
### evaluate the performance for these scenarios.
###
### Heather Gaddy Patsolic
### 2017 <[email protected]>
### S.D.G
#
require(raster)
require(igraph)
require(clue)
require(foreach)
require(ggplot2)
require(VN)
data('HSgraphs')
## User: If false, loads rdata file. If true, runs full script.
run <- FALSE
if(run){
#require(doMC); nCores <- 4
require(doMPI)
# Register Cores for parallelism
if("package:doMPI" %in% search()){
cl <- startMPIcluster()
registerDoMPI(cl)} else {
registerDoMC(nCores)
}
PERMUTE <- TRUE
set.seed(1234)
adj2 <- get.adjacency(HSfrgraphcore)
if(PERMUTE){
perm2 <- sample(vcount(HSfrgraphcore),vcount(HSfrgraphcore),replace=FALSE)
adj2 <- adj2[perm2,perm2]
#map <- cbind(1:vcount(HSfrgraphcore),perm2)
map2 <- cbind(perm2,1:vcount(HSfrgraphcore))
sp <- sort(perm2,index.return=TRUE)
map <- map2[sp$ix,]
colnames(map) <- c("core1id","core2id")
g.frscrambled <- graph_from_adjacency_matrix(adj2,mode="max")
}else{
map <- cbind(1:vcount(HSfrgraphcore),1:vcount(HSfrgraphcore))
colnames(map) <- c("core1id","core2id")
g.frscrambled <- HSfrgraphcore
}
hhop <- 2
ell <- 2
g <- 0.1
R <- 50
iter <- 20
voi <- 27
fbvoi <- map[voi,1]
frvoi <- map[voi,2]
Nhtmp <- unlist(ego(HSfbgraphcore,hhop,nodes=fbvoi,mindist=1)) # mindist=0: close, 1: open
cfblabelseeds <- map[Nhtmp,1]
cfrlabelseeds <- map[Nhtmp,2]
nbdx <- length(Nhtmp)
NhtmpR <- map[Nhtmp,2]
#NhtmpR <- unlist(ego(HSfrgraphcore,hhop,nodes=voi,mindist=1)) # mindist=0: close, 1: open
fbseedid <- map[Nhtmp,1]
frseedid <- map[NhtmpR,2]
len2 <- 5 #100 #50
comb <- list(combn(nbdx,2),combn(nbdx,3))
ss <- 1234
set.seed(ss)
seedsidx <- lapply(comb,function(x){sort(sample(ncol(x),100,replace=FALSE))})
nums <- 3
seedsidx <- seedsidx[[(nums-1)]]
lens <- length(seedsidx)
jvec <- c(5,10,15)
jvl <- length(jvec)
maxj <- 20
junkfb <- setdiff(V(HSfbgraphfull),coremap[,1])
junkfr <- setdiff(V(HSfriendsgraphfull),coremap[,2])
cjfb <- c(coremap[,1],junkfb)
cjfr <- c(coremap[,2],junkfr)
Afb <- as.matrix(get.adjacency(HSfbgraphfull))
Afr1 <- as.matrix(get.adjacency(HSfriendsgraphfull))
Afb <- Afb[cjfb,cjfb]
Afr2 <- Afr1[cjfr,cjfr]
Afr <- Afr2[c(perm2,setdiff(1:nrow(Afr2),perm2)),
c(perm2,setdiff(1:nrow(Afr2),perm2))]
cjfbS <- graph_from_adjacency_matrix(Afb,mode="undirected")
cjfrS <- graph_from_adjacency_matrix(Afr,mode="undirected")
#SET27j <- foreach(j = 1:maxj)%:%
SET27j <- foreach(ji = 1:jvl)%:%
foreach(le = 1:len2,.combine="rbind")%dopar%{
j <- jvec[ji]
set.seed(537*j+18400*le)
jfb <- sort(sample((length(coremap[,1])+1):length(cjfb),j))
jfr <- sort(sample((length(coremap[,2])+1):length(cjfr),j))
afb <- Afb[c(1:length(V(HSfbgraphcore)),jfb),
c(1:length(V(HSfbgraphcore)),jfb)]
afr <- Afr[c(1:length(V(HSfrgraphcore)),jfr),
c(1:length(V(HSfrgraphcore)),jfr)]
HScjfb <- graph_from_adjacency_matrix(afb,mode="undirected")
HScjfr <- graph_from_adjacency_matrix(afr,mode="undirected")
normrank <- rep(NA,length(seedsidx))
for(si in 1:length(seedsidx)){
sid <- seedsidx[si]
Sfb <- cfblabelseeds[comb[[2]][,sid]]
Sfr <- cfrlabelseeds[comb[[2]][,sid]]
frtmp <- sum(Sfr>frvoi)+frvoi
#Sfb <- which(cjfb %in% cfblabelseeds[comb[[2]][,sid]])
#nSfb <- setdiff(c(1:(82+j)),c(Sfb,fbvoi))
#afbS <- afb[c(Sfb,fbvoi,nSfb),c(Sfb,fbvoi,nSfb)]
#Sfr <- which(cjfr %in% cfrlabelseeds[comb[[2]][,sid]])
#frvoi <- which(cjfr==corefr[voi])
#nSfr <- setdiff(c(1:(82+j)),c(Sfr,frvoi))
#afrS <- afr[c(Sfr,frvoi,nSfr),c(Sfr,frvoi,nSfr)]
#cjfbS <- graph_from_adjacency_matrix(afbS,mode="undirected")
#cjfrS <- graph_from_adjacency_matrix(afrS,mode="undirected")
L <- localnbd(fbvoi,cbind(Sfb,Sfr),HScjfb,HScjfr,hhop,ell,R,g)
if(L$case=="possible"){
if(frtmp %in% L$labelsGxp){
#if(length(L$Cxp)==1){
# rankvoi <- 0
#}else{
Ps <- L$P
Ps <- Ps[1:length(L$labelsGx),1:length(L$labelsGxp)]
rankPs <- pass2ranksuplus(Ps)
rankvoi <- rankPs[(nums+1),
which(L$labelsGxp==(frtmp))]
## note, this assumes all seeds used which
## will be the case in our rigged example ---
## labeling identification is different if not
### all seeds used in vnsgm algorithm
rankvoi <- (rankvoi - 1)/(length(L$Cxp)-1)
}else{
rankvoi <- NA
}
}else{
rankvoi <- NA
}
normrank[si] <- rankvoi
}
nafreenr <- normrank[which(!is.na(normrank))]
mnr <- max(nafreenr)
punifmax <- function(x){
pumax <- punif(x,min=0,max=mnr)
pumax
}
pvalks <- ks.test(normrank,punifmax,alternative="greater")
numna <- sum(is.na(normrank))
list(pvalks=pvalks$p.value,normrank=normrank,numna=numna)
}
save.image("psubv27unshared_h2l2.RData")
}else{
load("psubv27unshared_h2l2.RData")
}
########################### 3 plots 0j, 10j, allj
load("../pvoi27_seedinc/psxinc_ss1234h12ell123.RData")
load("pv27fullgraphs_h2l2.RData")
require(ggplot2)
require(reshape2)
jadd <- 10
core <- Reduce(c,SET27[[2]][,1])
j5 <- Reduce(c,SET27j[[which(jvec==5)]][,2]$result.1)
j10 <- Reduce(c,SET27j[[which(jvec==jadd)]][,2]$result.1)
j15 <- Reduce(c,SET27j[[which(jvec==15)]][,2]$result.1)
jall <- Reduce(c,SET27ja[[3]]$normrank)
full <- cbind(core,j10,jall)
dfl <- data.frame(full)
colnames(dfl) <- c("0","10","all")
dfp1 <- melt(dfl)
## No id variables; using all as measure variables
colnames(dfp1) <- c("m","normrank")
full2 <- cbind(core,j5,j10,j15,jall)
dfl2 <- data.frame(full2)
colnames(dfl2) <- c("0","5","10","15","all")
dfp2 <- melt(dfl2)
## No id variables; using all as measure variables
colnames(dfp2) <- c("m","normrank")
##### Lattice plot
pjlat <-
ggplot(data=dfp1, aes(x=normrank, y = ..count.., colour=m,fill=m)) +
geom_histogram(binwidth=.05) + facet_wrap(~ m,nrow=1) +
theme(text=element_text(size=20)) +
#theme(axis.text.x = element_text(angle=90, hjust=-10)) +
xlab(expression(paste(tau,"(x')"))) +
ggtitle("HS with VOI=27 -- Inc m")
pjlat2 <-
ggplot(data=dfp2, aes(x=normrank, y = ..count.., colour=m,fill=m)) +
geom_histogram(binwidth=.05) + facet_wrap(~ m,nrow=1) +
theme(text=element_text(size=20)) +
xlab(expression(paste(tau,"(x')"))) +
ggtitle("HS with VOI=27 -- Inc m")
#pdf('./pHSundirJinc_voi27lattice_example10_h2l2.pdf',height=4.3,width=11)
print(pjlat)
## Warning: Removed 31 rows containing non-finite values (stat_bin).
#pdf('./HSundirJinc_voi27lattice_example10_h2l2.pdf',height=4,width=6)
#print(pjlat)
#dev.off()
#pdf('./pHSundirJinc_voi27lattice_example51015_h2l2.pdf',height=5,width=17)
#print(pjlat2)
#dev.off()
#if(run){
# if(exists("cl")){
# closeCluster(cl)
# mpi.quit()
# }
#}
# Time:
## Working status:
### Comments:
####Soli Deo Gloria