#! /usr/bin/Rscript
###
### Exploritory analysis of effects of VN-SGM on recovering the true
### match to a particular voi known to be in Facebook, to be found in
### the survey network, using only the shared vertices, using various
### seed-set sizes.
###
### Heather Gaddy Patsolic 
### 2017 <[email protected]>
### S.D.G 
#


require(raster)
require(igraph)
require(clue)
require(foreach)
require(ggplot2)
require(reshape2)
require(VN)
data('HSgraphs')


## User: If false, loads rdata file. If true, runs full script.
PERMUTE <- TRUE
run <- FALSE


if(run){
    #require(doMC); nCores <- 48
    require(doMPI)
    
    # Register Cores for parallelism 
    if("package:doMPI" %in% search()){
        cl <- startMPIcluster()
        registerDoMPI(cl)} else {
        registerDoMC(nCores)
        }
    

    set.seed(1234)
    adj2 <- get.adjacency(HSfrgraphcore)
    if(PERMUTE){
        perm2 <- sample(vcount(HSfrgraphcore),vcount(HSfrgraphcore),replace=FALSE)
        adj2 <- adj2[perm2,perm2]
        #map <- cbind(1:vcount(HSfrgraphcore),perm2)
        map2 <- cbind(perm2,1:vcount(HSfrgraphcore))
        sp <- sort(perm2,index.return=TRUE)
        map <- map2[sp$ix,]
        colnames(map) <- c("core1id","core2id")
        g.frscrambled <- graph_from_adjacency_matrix(adj2,mode="max")
    }else{
        map <- cbind(1:vcount(HSfrgraphcore),1:vcount(HSfrgraphcore))
        colnames(map) <- c("core1id","core2id")
        g.frscrambled <- HSfrgraphcore
    }
    
    hhop <- 2 #1
    ell <- 2 # 1
    g <- 0.1
    R <- 50
    iter <- 20

    voi <- 27
    fb.voi <- map[voi,1]
    fr.voi <- map[voi,2]

    Nhtmp <- unlist(ego(HSfbgraphcore,hhop,nodes=fb.voi,mindist=1)) # mindist=0: close, 1: open
    nbdx <- length(Nhtmp)
    NhtmpR <- unlist(ego(g.frscrambled,hhop,nodes=fr.voi,mindist=1)) # mindist=0: close, 1: open
    
    Nhtmp1 <- unlist(ego(HSfbgraphcore,(hhop+1),nodes=fb.voi,mindist=1)) # mindist=0: close, 1: open
    nbdx1 <- length(Nhtmp1)
    NhtmpR1 <- unlist(ego(g.frscrambled,(hhop+1),nodes=fr.voi,mindist=1)) # mindist=0: close, 1: open
    
    len2 <- 100
    
    comb1 <- list(combn(Nhtmp,2),combn(Nhtmp,3),combn(Nhtmp,4),combn(Nhtmp,5),
    combn(Nhtmp,6))# ,combn(Nhtmp,7),combn(Nhtmp,8),combn(Nhtmp,9))
    #comb1 <- list(combn(Nhtmp1,2),combn(Nhtmp1,3),combn(Nhtmp1,4),combn(Nhtmp1,5),
    #combn(Nhtmp1,6)) # ,combn(Nhtmp1,7),combn(Nhtmp1,8),combn(Nhtmp1,9))
    
    #ss <- 1234 #56789 #31415926  #1222234
    #set.seed(ss)
    #seedsidx <- lapply(comb,function(x){sort(sample(ncol(x),len2,replace=FALSE))})
    ss <- 1234 #56789 #31415926  #1222234
    set.seed(ss)
    seedsidx <- lapply(comb1,function(x){sort(sample(ncol(x),len2,replace=FALSE))})
    
    maxs <- 9
    maxs1 <- 6
    
    
    SET27 <- foreach(s = 2:maxs)%:%
           foreach(le = 1:len2,.combine="rbind")%dopar%{
              set.seed(537*s+18400*le)
    
                    #sid <- seedsidx[[(s-1)]][le]
                    #seeds <- comb[[(s-1)]][,sid]
                    #S <- map[seeds,]
                    
                    if(s <= maxs1){
                        sid <- seedsidx[[(s-1)]][le]
                        seeds1 <- comb1[[(s-1)]][,sid]
                        S <- map[seeds1,]
                    }else{
                        seeds1 <- sample(Nhtmp,s,replace=FALSE)
                        S <- map[seeds1,]
                    }
                    frtmp <- sum(S[,2]>fr.voi)+fr.voi
                
                L1 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,hhop,ell,R,g)
                L2 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,hhop,(ell+1),R,g)
    
                set.seed(537*s+18400*le)
    
                L11 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,(hhop+1),(ell+1),R,g)
                L21 <- localnbd(fb.voi,S,HSfbgraphcore,g.frscrambled,(hhop+1),(ell+2),R,g)
                L <- L1
                if(L$case=="possible"){
                    if(frtmp %in% L$labelsGxp){
                    #if(length(L$Cxp)==1){
                    #    rankvoi <- 0
                    #}else{
                        Ps <- L$P
                        Ps <- Ps[1:length(L$labelsGx),1:length(L$labelsGxp)]
                        rankPs <- pass2ranksuplus(Ps) 
                        rankvoi <- rankPs[(nrow(S)+1), which(L$labelsGxp==(frtmp))] 
                        ## note, this assumes all seeds used which 
                        ## will be the case in our rigged example --- 
                        ## labeling identification is different if not 
                        ### all seeds used in vnsgm algorithm
                        rankvoi <- (rankvoi - 1)/(length(L$Cxp)-1)
                    }else{
                        rankvoi <- NA
                    }
                }else{
                    rankvoi <- NA
                }
            
                if(L2$case=="possible"){
                    if(frtmp %in% L2$labelsGxp){
                    #if(length(L2$Cxp)==1){
                    #    rankvoi2 <- 0
                    #}else{
                        Ps <- L2$P
                        Ps <- Ps[1:length(L2$labelsGx),1:length(L2$labelsGxp)]
                        rankPs <- pass2ranksuplus(Ps) 
                        rankvoi2 <- rankPs[(nrow(S)+1),     
                                          which(L2$labelsGxp==(frtmp))] 
                        rankvoi2 <- (rankvoi2 - 1)/(length(L2$Cxp)-1)
                    }else{
                        rankvoi2 <- NA
                    }
                }else{
                    rankvoi2 <- NA
                }
    
                if(L11$case=="possible"){
                    if(frtmp %in% L11$labelsGxp){
                    #if(length(L11$Cxp)==1){
                    #    rankvoi11 <- 0
                    #}else{
                        Ps <- L11$P
                        Ps <- Ps[1:length(L11$labelsGx),1:length(L11$labelsGxp)]
                        rankPs <- pass2ranksuplus(Ps) 
                        rankvoi11 <- rankPs[(nrow(S)+1),     
                                          which(L11$labelsGxp==(frtmp))] 
                        rankvoi11 <- (rankvoi11 - 1)/(length(L11$Cxp)-1)
                    }else{
                        rankvoi11 <- NA
                    }
                }else{
                    rankvoi11 <- NA
                }
            
                if(L21$case=="possible"){
                    if(frtmp %in% L21$labelsGxp){
                    #if(length(L21$Cxp)==1){
                    #    rankvoi21 <- 0
                    #}else{
                        Ps <- L21$P
                        Ps <- Ps[1:length(L21$labelsGx),1:length(L21$labelsGxp)]
                        rankPs <- pass2ranksuplus(Ps) 
                        rankvoi21 <- rankPs[(nrow(S)+1),     
                                          which(L21$labelsGxp==(frtmp))] 
                        rankvoi21 <- (rankvoi21 - 1)/(length(L21$Cxp)-1)
                    }else{
                        rankvoi21 <- NA
                    }
                }else{
                    rankvoi21 <- NA
                }
    
    
    
              list(normrank1=rankvoi,numcand1=length(L$Cxp),
                   normrank2=rankvoi2,numcand2=length(L2$Cxp),
                   normrank11=rankvoi11,numcand11=length(L11$Cxp),
                   normrank21=rankvoi21,numcand21=length(L21$Cxp))
    }

    countnr10 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,1])))==0))}))
    countnr1g.5 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,1]))>=0.5)))}))
    count1imp <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,1]))}))
    countnr20 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,3])))==0))}))
    countnr2g.5 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,3]))>=0.5)))}))
    count2imp <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,3]))}))
    
    countnr10h2 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,5])))==0))}))
    countnr1g.5h2 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,5]))>=0.5)))}))
    count1imph2 <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,5]))}))
    countnr20h2 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,7])))==0))}))
    countnr2g.5h2 <- Reduce(c,lapply(SET27,function(x){
                         length(which(rbind(
                            as.numeric(as.character(x[,7]))>=0.5)))}))
    count2imph2 <- Reduce(c,lapply(SET27,function(x){sum(is.na(x[,7]))}))

    if(PERMUTE){
        save.image("psxinc_ss1234h12ell123.RData")
    }else{
        save.image("idsxinc_ss1234h12ell123.RData")
    }
}else{
    if(PERMUTE){
        load("psxinc_ss1234h12ell123.RData")
    }else{
        load("idsxinc_ss1234h12ell123.RData")
    }
}

##################################################################
#Plots to include s=1 seed
require(ggplot2)
require(reshape2)
if(PERMUTE){
    load("../psummaryplot/psummary_h12ell123.RData")
}else{
    load("../psummaryplot/idsummary_h12ell123.RData")
}


### Generate all plots


#h1l1 <- 7
#h1l2 <- 8
#h2l2 <- 10
#h2l3 <- 12

nrankxpBad_h1l2 <- length(which(as.numeric(as.character(
                            SET[[27]]$nrvois_h1l2))>=0.5))
nrankxpBad_h2l2 <- length(which(as.numeric(as.character(
                            SET[[27]]$nrvois_h2l2))>=0.5))
nrankxpBad_h2l3 <- length(which(as.numeric(as.character(
                            SET[[27]]$nrvois_h2l3))>=0.5))
### >> First case with h1 ell1
#countnr <- c(SET[[27]]$nrankxp0,countnr10)
#countnrg.5 <- c(SET[[27]]$nrankxpBad,countnr1g.5)
#countimp <- c(SET[[27]]$imp2,count1imp)
#nbdx12 <- c(SET[[27]]$potseeds,rep(100,8))

### >> Case with h1 ell2
#countnr <- c(length(which(as.vector(unlist(SET[[27]][h1l2]))==0)),countnr10)
#countnrg.5 <- c(nrankxpBad_h1l2,countnr1g.5)
#countimp <- unlist(c(SET[[27]][h1l2+1],count2imp))
#nbdx12 <- c(SET[[27]]$potseeds,rep(100,8))

### >> Case with h2 ell2
countnr <- c(length(which(as.vector(unlist(SET[[27]]$nrvois_h2l2))==0)),countnr10)
countnrg.5 <- c(nrankxpBad_h2l2,countnr1g.5)
countimp <- unlist(c(sum(is.na(SET[[27]]$nrvois_h2l2)),count1imp))
nbdx12 <- c(47,rep(100,8)) # |N_2(x)|=47

### >> Case with h2 ell3
#countnr <- c(length(which(as.vector(unlist(SET[[27]][h2l3]))==0)),countnr20h2)
#countnrg.5 <- c(nrankxpBad_h2l3,countnr2g.5h2)
#countimp <- unlist(c(SET[[27]][h2l3+1],count2imph2))
#nbdx12 <- c(47,rep(100,8)) # |N_2(x)|=47


countbw <- nbdx12 - countnr - countnrg.5 - countimp

dfsum1 <- data.frame(s = c(1:9),"zero"=countnr,
                  "between"=countbw,"chance"=countnrg.5, "na"=countimp)
ds1 <- melt(dfsum1,id="s")
colnames(ds1) <- c("s","group","count")
ds1$groupf <- factor(ds1$group,levels=c("na","chance","between","zero"))
#levels(ds1$group) <- c("0","between","chance","NA")

cpPalpurple<- rev(c('#a6dba0','#008837','#c2a5cf','#7b3294'))

pAll <- 
    ggplot(ds1, aes(x=s, y=count, group=groupf, fill=groupf)) +
       scale_x_continuous(breaks=c(1:9)) +
       geom_bar(stat = 'identity') + 
       theme(text=element_text(size=20)) +
       scale_fill_manual(name  =expression(paste(tau,"(x')")), values = cpPalpurple) +
       ylab("count") + 
       xlab(expression(s[x])) + 
       #xlab("sx") + 
       labs(title="VN-SGM on HS data using VOI=27")
#print(pAll)

if(PERMUTE){
    main <- 'pvoi27sinc_ell2ss1234_h2ell2.pdf'
}else{
    main <- 'idvoi27sinc_ell2ss1234_h2ell2.pdf'
}

#pdf(main)
print(pAll)

#dev.off()




##### Lattice plot

v271s22 <- c(SET[[27]]$nrvois_h2l2,rep(NA,(100-length(SET[[27]]$nrvois_h2l2))))
#v271s23 <- c(as.vector(unlist(SET[[27]][h1l2])),rep(NA,(100-SET[[27]]$potseeds)))
#v271s33 <- c(SET[[27]]$nrvois_h2l2,rep(NA,(100-47)))
#v271s34 <- c(as.vector(unlist(SET[[27]][h2l3])),rep(NA,(100-47)))

sh22 <- 1
sh23 <- 3
sh33 <- 5
sh34 <- 7

lat <- as.matrix(t(Reduce(rbind,lapply(SET27,function(x){Reduce(c,x[,sh22])}))))
lat <- cbind(v271s22,lat)
dfl <- data.frame(lat)
colnames(dfl) <- c(1:9) #paste0("s=", 1:maxs) #add = sign 20160727
d2 <- melt(dfl)
## No id variables; using all as measure variables
colnames(d2) <- c("s","value")

p27 <- 
  ggplot(data=d2, aes(x=value, y =..count.., colour=s, fill=s)) + 
  geom_histogram(binwidth=.05)  + facet_wrap(~ s) +
  theme(text=element_text(size=20)) + 
  xlab(expression(paste(tau,"(x')"))) + 
  ggtitle("Normalized Comparison of Seeds for HS Data")
#print(p27)

if(PERMUTE){
    mainlat <- 'pvoi27sincLattice_ell2ss1234_h2ell2.pdf'
}else{
    mainlat <- 'idvoi27sincLattice_ell2ss1234_h2ell2.pdf'
}

#pdf(mainlat,width=8,height=8)
print(p27)
## Warning: Removed 129 rows containing non-finite values (stat_bin).

#dev.off()

## Saved without pdfs. just generate one plot at a time from the data. 
#save.image("HSundircoreVOI27sxinc_ss1234h12ell123.RData")




#if(run){
#    if(exists("cl")){
#            closeCluster(cl)
#       mpi.quit()
#       }
#}


#   Time:
##  Working status:
### Comments:
####Soli Deo Gloria