#! /usr/bin/Rscript
###
### Exploritory analysis of effects of VN-SGM on recovering the true
### match to a particular voi known to be in Facebook, to be found in
### the survey network, using only the shared vertices, using various
### seed-set sizes.
###
### Heather Gaddy Patsolic
### 2017 <[email protected]>
### S.D.G
#
require(raster)
require(igraph)
require(clue)
require(foreach)
require(ggplot2)
require(reshape2)
require(VN)
data('HSgraphs')
## User: If false, loads rdata file. If true, runs full script.
PERMUTE <- TRUE
run <- FALSE
if(run){
#require(doMC); nCores <- 48
require(doMPI)
# Register Cores for parallelism
if("package:doMPI" %in% search()){
cl <- startMPIcluster()
registerDoMPI(cl)} else {
registerDoMC(nCores)
}
set.seed(1234)
adj2 <- get.adjacency(HSfrgraphcore)
if(PERMUTE){
perm2 <- sample(vcount(HSfrgraphcore),vcount(HSfrgraphcore),replace=FALSE)
adj2 <- adj2[perm2,perm2]
#map <- cbind(1:vcount(HSfrgraphcore),perm2)
map2 <- cbind(perm2,1:vcount(HSfrgraphcore))
sp <- sort(perm2,index.return=TRUE)
map <- map2[sp$ix,]
colnames(map) <- c("core1id","core2id")
g.frscrambled <- graph_from_adjacency_matrix(adj2,mode="max")
}else{
map <- cbind(1:vcount(HSfrgraphcore),1:vcount(HSfrgraphcore))
colnames(map) <- c("core1id","core2id")
g.frscrambled <- HSfrgraphcore
}
hhop <- 2 #1
ell <- 2 # 1
g <- 0.1
R <- 50
iter <- 20
voi <- 27
fb.voi <- map[voi,1]
fr.voi <- map[voi,2]
Nhtmp <- unlist(ego(HSfbgraphcore,hhop,nodes=fb.voi,mindist=1)) # mindist=0: close, 1: open
nbdx <- length(Nhtmp)
NhtmpR <- unlist(ego(g.frscrambled,hhop,nodes=fr.voi,mindist=1)) # mindist=0: close, 1: open
Nhtmp1 <- unlist(ego(HSfbgraphcore,(hhop+1),nodes=fb.voi,mindist=1)) # mindist=0: close, 1: open
nbdx1 <- length(Nhtmp1)
NhtmpR1 <- unlist(ego(g.frscrambled,(hhop+1),nodes=fr.voi,mindist=1)) # mindist=0: close, 1: open
len2 <- 100
comb1 <- list(combn(Nhtmp,2),combn(Nhtmp,3),combn(Nhtmp,4),combn(Nhtmp,5),
combn(Nhtmp,6))# ,combn(Nhtmp,7),combn(Nhtmp,8),combn(Nhtmp,9))
#comb1 <- list(combn(Nhtmp1,2),combn(Nhtmp1,3),combn(Nhtmp1,4),combn(Nhtmp1,5),
#combn(Nhtmp1,6)) # ,combn(Nhtmp1,7),combn(Nhtmp1,8),combn(Nhtmp1,9))
#ss <- 1234 #56789 #31415926 #1222234
#set.seed(ss)
#seedsidx <- lapply(comb,function(x){sort(sample(ncol(x),len2,replace=FALSE))})
ss <- 1234 #56789 #31415926 #1222234
set.seed(ss)
seedsidx <- lapply(comb1,function(x){sort(sample(ncol(x),len2,replace=FALSE))})
maxs <- 9
maxs1 <- 6
SET27 <- foreach(s = 2:maxs)%:%
foreach(le = 1:len2,.combine="rbind")%dopar%{
set.seed(537*s+18400*le)
#sid <- seedsidx[[(s-1)]][le]
#seeds <- comb[[(s-1)]][,sid]
#S <- map[seeds,]
if(s <= maxs1){
sid <- seedsidx[[(s-1)]][le]
seeds1 <- comb1[[(s-1)]][,sid]
S <- map[seeds1,]
}else{
seeds1 <- sample(Nhtmp,s,replace=FALSE)
S <- map[seeds1,]
}
frtmp <- sum(S[,2]>fr.voi)+fr.voi
L1 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,hhop,ell,R,g)
L2 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,hhop,(ell+1),R,g)
set.seed(537*s+18400*le)
L11 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,(hhop+1),(ell+1),R,g)
L21 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,(hhop+1),(ell+2),R,g)
L <- L1
if(L$case=="possible"){
if(frtmp %in% L$labelsGxp){
#if(length(L$Cxp)==1){
# rankvoi <- 0
#}else{
Ps <- L$P
Ps <- Ps[1:length(L$labelsGx),1:length(L$labelsGxp)]
rankPs <- pass2ranksuplus(Ps)
rankvoi <- rankPs[(nrow(S)+1), which(L$labelsGxp==(frtmp))]
## note, this assumes all seeds used which
## will be the case in our rigged example ---
## labeling identification is different if not
### all seeds used in vnsgm algorithm
rankvoi <- (rankvoi - 1)/(length(L$Cxp)-1)
}else{
rankvoi <- NA
}
}else{
rankvoi <- NA
}
if(L2$case=="possible"){
if(frtmp %in% L2$labelsGxp){
#if(length(L2$Cxp)==1){
# rankvoi2 <- 0
#}else{
Ps <- L2$P
Ps <- Ps[1:length(L2$labelsGx),1:length(L2$labelsGxp)]
rankPs <- pass2ranksuplus(Ps)
rankvoi2 <- rankPs[(nrow(S)+1),
which(L2$labelsGxp==(frtmp))]
rankvoi2 <- (rankvoi2 - 1)/(length(L2$Cxp)-1)
}else{
rankvoi2 <- NA
}
}else{
rankvoi2 <- NA
}
if(L11$case=="possible"){
if(frtmp %in% L11$labelsGxp){
#if(length(L11$Cxp)==1){
# rankvoi11 <- 0
#}else{
Ps <- L11$P
Ps <- Ps[1:length(L11$labelsGx),1:length(L11$labelsGxp)]
rankPs <- pass2ranksuplus(Ps)
rankvoi11 <- rankPs[(nrow(S)+1),
which(L11$labelsGxp==(frtmp))]
rankvoi11 <- (rankvoi11 - 1)/(length(L11$Cxp)-1)
}else{
rankvoi11 <- NA
}
}else{
rankvoi11 <- NA
}
if(L21$case=="possible"){
if(frtmp %in% L21$labelsGxp){
#if(length(L21$Cxp)==1){
# rankvoi21 <- 0
#}else{
Ps <- L21$P
Ps <- Ps[1:length(L21$labelsGx),1:length(L21$labelsGxp)]
rankPs <- pass2ranksuplus(Ps)
rankvoi21 <- rankPs[(nrow(S)+1),
which(L21$labelsGxp==(frtmp))]
rankvoi21 <- (rankvoi21 - 1)/(length(L21$Cxp)-1)
}else{
rankvoi21 <- NA
}
}else{
rankvoi21 <- NA
}
list(normrank1=rankvoi,numcand1=length(L$Cxp),
normrank2=rankvoi2,numcand2=length(L2$Cxp),
normrank11=rankvoi11,numcand11=length(L11$Cxp),
normrank21=rankvoi21,numcand21=length(L21$Cxp))
}
countnr10 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,1])))==0))}))
countnr1g.5 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,1]))>=0.5)))}))
count1imp <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,1]))}))
countnr20 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,3])))==0))}))
countnr2g.5 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,3]))>=0.5)))}))
count2imp <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,3]))}))
countnr10h2 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,5])))==0))}))
countnr1g.5h2 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,5]))>=0.5)))}))
count1imph2 <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,5]))}))
countnr20h2 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,7])))==0))}))
countnr2g.5h2 <- Reduce(c,lapply(SET27,function(x){
length(which(rbind(
as.numeric(as.character(x[,7]))>=0.5)))}))
count2imph2 <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,7]))}))
if(PERMUTE){
save.image("psxinc_ss1234h12ell123.RData")
}else{
save.image("idsxinc_ss1234h12ell123.RData")
}
}else{
if(PERMUTE){
load("psxinc_ss1234h12ell123.RData")
}else{
load("idsxinc_ss1234h12ell123.RData")
}
}
##################################################################
#Plots to include s=1 seed
require(ggplot2)
require(reshape2)
if(PERMUTE){
load("../psummaryplot/psummary_h12ell123.RData")
}else{
load("../psummaryplot/idsummary_h12ell123.RData")
}
### Generate all plots
#h1l1 <- 7
#h1l2 <- 8
#h2l2 <- 10
#h2l3 <- 12
nrankxpBad_h1l2 <- length(which(as.numeric(as.character(
SET[[27]]$nrvois_h1l2))>=0.5))
nrankxpBad_h2l2 <- length(which(as.numeric(as.character(
SET[[27]]$nrvois_h2l2))>=0.5))
nrankxpBad_h2l3 <- length(which(as.numeric(as.character(
SET[[27]]$nrvois_h2l3))>=0.5))
### >> First case with h1 ell1
#countnr <- c(SET[[27]]$nrankxp0,countnr10)
#countnrg.5 <- c(SET[[27]]$nrankxpBad,countnr1g.5)
#countimp <- c(SET[[27]]$imp2,count1imp)
#nbdx12 <- c(SET[[27]]$potseeds,rep(100,8))
### >> Case with h1 ell2
#countnr <- c(length(which(as.vector(unlist(SET[[27]][h1l2]))==0)),countnr10)
#countnrg.5 <- c(nrankxpBad_h1l2,countnr1g.5)
#countimp <- unlist(c(SET[[27]][h1l2+1],count2imp))
#nbdx12 <- c(SET[[27]]$potseeds,rep(100,8))
### >> Case with h2 ell2
countnr <- c(length(which(as.vector(unlist(SET[[27]]$nrvois_h2l2))==0)),countnr10)
countnrg.5 <- c(nrankxpBad_h2l2,countnr1g.5)
countimp <- unlist(c(sum(is.na(SET[[27]]$nrvois_h2l2)),count1imp))
nbdx12 <- c(47,rep(100,8)) # |N_2(x)|=47
### >> Case with h2 ell3
#countnr <- c(length(which(as.vector(unlist(SET[[27]][h2l3]))==0)),countnr20h2)
#countnrg.5 <- c(nrankxpBad_h2l3,countnr2g.5h2)
#countimp <- unlist(c(SET[[27]][h2l3+1],count2imph2))
#nbdx12 <- c(47,rep(100,8)) # |N_2(x)|=47
countbw <- nbdx12 - countnr - countnrg.5 - countimp
dfsum1 <- data.frame(s = c(1:9),"zero"=countnr,
"between"=countbw,"chance"=countnrg.5, "na"=countimp)
ds1 <- melt(dfsum1,id="s")
colnames(ds1) <- c("s","group","count")
ds1$groupf <- factor(ds1$group,levels=c("na","chance","between","zero"))
#levels(ds1$group) <- c("0","between","chance","NA")
cpPalpurple<- rev(c('#a6dba0','#008837','#c2a5cf','#7b3294'))
pAll <-
ggplot(ds1, aes(x=s, y=count, group=groupf, fill=groupf)) +
scale_x_continuous(breaks=c(1:9)) +
geom_bar(stat = 'identity') +
theme(text=element_text(size=20)) +
scale_fill_manual(name =expression(paste(tau,"(x')")), values = cpPalpurple) +
ylab("count") +
xlab(expression(s[x])) +
#xlab("sx") +
labs(title="VN-SGM on HS data using VOI=27")
#print(pAll)
if(PERMUTE){
main <- 'pvoi27sinc_ell2ss1234_h2ell2.pdf'
}else{
main <- 'idvoi27sinc_ell2ss1234_h2ell2.pdf'
}
#pdf(main)
print(pAll)
#dev.off()
##### Lattice plot
v271s22 <- c(SET[[27]]$nrvois_h2l2,rep(NA,(100-length(SET[[27]]$nrvois_h2l2))))
#v271s23 <- c(as.vector(unlist(SET[[27]][h1l2])),rep(NA,(100-SET[[27]]$potseeds)))
#v271s33 <- c(SET[[27]]$nrvois_h2l2,rep(NA,(100-47)))
#v271s34 <- c(as.vector(unlist(SET[[27]][h2l3])),rep(NA,(100-47)))
sh22 <- 1
sh23 <- 3
sh33 <- 5
sh34 <- 7
lat <- as.matrix(t(Reduce(rbind,lapply(SET27,function(x){Reduce(c,x[,sh22])}))))
lat <- cbind(v271s22,lat)
dfl <- data.frame(lat)
colnames(dfl) <- c(1:9) #paste0("s=", 1:maxs) #add = sign 20160727
d2 <- melt(dfl)
## No id variables; using all as measure variables
colnames(d2) <- c("s","value")
p27 <-
ggplot(data=d2, aes(x=value, y =..count.., colour=s, fill=s)) +
geom_histogram(binwidth=.05) + facet_wrap(~ s) +
theme(text=element_text(size=20)) +
xlab(expression(paste(tau,"(x')"))) +
ggtitle("Normalized Comparison of Seeds for HS Data")
#print(p27)
if(PERMUTE){
mainlat <- 'pvoi27sincLattice_ell2ss1234_h2ell2.pdf'
}else{
mainlat <- 'idvoi27sincLattice_ell2ss1234_h2ell2.pdf'
}
#pdf(mainlat,width=8,height=8)
print(p27)
## Warning: Removed 129 rows containing non-finite values (stat_bin).
#dev.off()
## Saved without pdfs. just generate one plot at a time from the data.
#save.image("HSundircoreVOI27sxinc_ss1234h12ell123.RData")
#if(run){
# if(exists("cl")){
# closeCluster(cl)
# mpi.quit()
# }
#}
# Time:
## Working status:
### Comments:
####Soli Deo Gloria