#! /usr/bin/Rscript
###
### Exploritory analysis of core vertices between the high school
### friendship and facebook networks. For one of the shared vertices 
### in the facebook network, v, we randomly select 3 seeds from a 2-hop
### neighborhood around v so as to form the
### seed set S and then obtain the corresponding seed set S' in the
### survey network. We then match N_2(S) and N_2(S').
### We then add some excess vertices (10 from each Facebook unshared and
### Survey unshared, then all vertices) and using the same seed sets
### again match the 2-hop neighborhoods around the seed sets and
### evaluate the performance for these scenarios.
###
### Heather Gaddy Patsolic 
### 2017 <[email protected]>
### S.D.G 
#


require(raster)
require(igraph)
require(clue)
require(foreach)
require(ggplot2)
require(VN)
data('HSgraphs')


## User: If false, loads rdata file. If true, runs full script.
run <- FALSE


if(run){
    #require(doMC); nCores <- 4
    require(doMPI)
    
    # Register Cores for parallelism 
    if("package:doMPI" %in% search()){
        cl <- startMPIcluster()
        registerDoMPI(cl)} else {
        registerDoMC(nCores)
        }
    
    PERMUTE <- TRUE

    set.seed(1234)
    adj2 <- get.adjacency(HSfrgraphcore)
    if(PERMUTE){
        perm2 <- sample(vcount(HSfrgraphcore),vcount(HSfrgraphcore),replace=FALSE)
        adj2 <- adj2[perm2,perm2]
        #map <- cbind(1:vcount(HSfrgraphcore),perm2)
        map2 <- cbind(perm2,1:vcount(HSfrgraphcore))
        sp <- sort(perm2,index.return=TRUE)
        map <- map2[sp$ix,]
        colnames(map) <- c("core1id","core2id")
        g.frscrambled <- graph_from_adjacency_matrix(adj2,mode="max")
    }else{
        map <- cbind(1:vcount(HSfrgraphcore),1:vcount(HSfrgraphcore))
        colnames(map) <- c("core1id","core2id")
        g.frscrambled <- HSfrgraphcore
    }
    
    hhop <- 2
    ell <- 2
    g <- 0.1
    R <- 50
    iter <- 20
    voi <- 27

    fbvoi <- map[voi,1]
    frvoi <- map[voi,2]
    Nhtmp <- unlist(ego(HSfbgraphcore,hhop,nodes=fbvoi,mindist=1)) # mindist=0: close, 1: open
    cfblabelseeds <- map[Nhtmp,1]
    cfrlabelseeds <- map[Nhtmp,2]
    nbdx <- length(Nhtmp)
    NhtmpR <- map[Nhtmp,2]
    #NhtmpR <- unlist(ego(HSfrgraphcore,hhop,nodes=voi,mindist=1)) # mindist=0: close, 1: open
    
    fbseedid <- map[Nhtmp,1]
    frseedid <- map[NhtmpR,2]

    len2 <- 5 #100 #50
    
    comb <- list(combn(nbdx,2),combn(nbdx,3)) 
    
    ss <- 1234 
    set.seed(ss)
    seedsidx <- lapply(comb,function(x){sort(sample(ncol(x),100,replace=FALSE))})
    
    nums <- 3
    seedsidx <- seedsidx[[(nums-1)]]
    lens <- length(seedsidx)
    
    jvec <- c(5,10,15)
    jvl <- length(jvec)
    maxj <- 20
    
    junkfb <- setdiff(V(HSfbgraphfull),coremap[,1])
    junkfr <- setdiff(V(HSfriendsgraphfull),coremap[,2])
    cjfb <- c(coremap[,1],junkfb)
    cjfr <- c(coremap[,2],junkfr)

    Afb <- as.matrix(get.adjacency(HSfbgraphfull))
    Afr1 <- as.matrix(get.adjacency(HSfriendsgraphfull))
    Afb <- Afb[cjfb,cjfb]
    Afr2 <- Afr1[cjfr,cjfr]
    Afr <- Afr2[c(perm2,setdiff(1:nrow(Afr2),perm2)),
               c(perm2,setdiff(1:nrow(Afr2),perm2))]
    
    cjfbS <- graph_from_adjacency_matrix(Afb,mode="undirected")
    cjfrS <- graph_from_adjacency_matrix(Afr,mode="undirected")
    
    #SET27j <- foreach(j = 1:maxj)%:%
    SET27j <- foreach(ji = 1:jvl)%:%
           foreach(le = 1:len2,.combine="rbind")%dopar%{
               j <- jvec[ji]

              set.seed(537*j+18400*le)
              
              jfb <- sort(sample((length(coremap[,1])+1):length(cjfb),j))
              jfr <- sort(sample((length(coremap[,2])+1):length(cjfr),j))

              afb <- Afb[c(1:length(V(HSfbgraphcore)),jfb), 
                         c(1:length(V(HSfbgraphcore)),jfb)]
              afr <- Afr[c(1:length(V(HSfrgraphcore)),jfr), 
                         c(1:length(V(HSfrgraphcore)),jfr)]
              HScjfb <- graph_from_adjacency_matrix(afb,mode="undirected")
              HScjfr <- graph_from_adjacency_matrix(afr,mode="undirected")
              
              normrank <- rep(NA,length(seedsidx))
              for(si in 1:length(seedsidx)){
                  sid <- seedsidx[si]
                  Sfb <- cfblabelseeds[comb[[2]][,sid]]
                  Sfr <- cfrlabelseeds[comb[[2]][,sid]]
                  frtmp <- sum(Sfr>frvoi)+frvoi

                  #Sfb <- which(cjfb %in% cfblabelseeds[comb[[2]][,sid]])

                  #nSfb <- setdiff(c(1:(82+j)),c(Sfb,fbvoi))
                  #afbS <- afb[c(Sfb,fbvoi,nSfb),c(Sfb,fbvoi,nSfb)]
                  #Sfr <- which(cjfr %in% cfrlabelseeds[comb[[2]][,sid]])
                  #frvoi <- which(cjfr==corefr[voi])
                  #nSfr <- setdiff(c(1:(82+j)),c(Sfr,frvoi))
                  #afrS <- afr[c(Sfr,frvoi,nSfr),c(Sfr,frvoi,nSfr)]
    
                  #cjfbS <- graph_from_adjacency_matrix(afbS,mode="undirected")
                  #cjfrS <- graph_from_adjacency_matrix(afrS,mode="undirected")
    
    
                  L <- localnbd(fbvoi,cbind(Sfb,Sfr),HScjfb,HScjfr,hhop,ell,R,g)
                  if(L$case=="possible"){
                      if(frtmp %in% L$labelsGxp){
                      #if(length(L$Cxp)==1){
                      #    rankvoi <- 0
                      #}else{
                          Ps <- L$P
                          Ps <- Ps[1:length(L$labelsGx),1:length(L$labelsGxp)]
                          rankPs <- pass2ranksuplus(Ps) 
                          rankvoi <- rankPs[(nums+1), 
                                            which(L$labelsGxp==(frtmp))] 
                          ## note, this assumes all seeds used which 
                          ## will be the case in our rigged example --- 
                          ## labeling identification is different if not 
                          ### all seeds used in vnsgm algorithm
                          rankvoi <- (rankvoi - 1)/(length(L$Cxp)-1)
                      }else{
                          rankvoi <- NA
                      }
                  }else{
                      rankvoi <- NA
                  }
                  normrank[si] <- rankvoi
              }
    
    
              nafreenr <- normrank[which(!is.na(normrank))]
              mnr <- max(nafreenr)
              punifmax <- function(x){
                  pumax <- punif(x,min=0,max=mnr)
                  pumax 
              }
              
              pvalks <- ks.test(normrank,punifmax,alternative="greater")
              numna <- sum(is.na(normrank))
            
              list(pvalks=pvalks$p.value,normrank=normrank,numna=numna) 
    }

    save.image("psubv27unshared_h2l2.RData")
}else{
    load("psubv27unshared_h2l2.RData")
}


########################### 3 plots 0j, 10j, allj
load("../pvoi27_seedinc/psxinc_ss1234h12ell123.RData") 
load("pv27fullgraphs_h2l2.RData")
require(ggplot2)
require(reshape2)

jadd <- 10
core <- Reduce(c,SET27[[2]][,1])
j5  <- Reduce(c,SET27j[[which(jvec==5)]][,2]$result.1)
j10  <- Reduce(c,SET27j[[which(jvec==jadd)]][,2]$result.1)
j15  <- Reduce(c,SET27j[[which(jvec==15)]][,2]$result.1)
jall <- Reduce(c,SET27ja[[3]]$normrank)
full <- cbind(core,j10,jall)
dfl <- data.frame(full)
colnames(dfl) <- c("0","10","all")
dfp1 <- melt(dfl)
## No id variables; using all as measure variables
colnames(dfp1) <- c("m","normrank")

full2 <- cbind(core,j5,j10,j15,jall)
dfl2 <- data.frame(full2)
colnames(dfl2) <- c("0","5","10","15","all")
dfp2 <- melt(dfl2)
## No id variables; using all as measure variables
colnames(dfp2) <- c("m","normrank")


##### Lattice plot
pjlat <- 
  ggplot(data=dfp1, aes(x=normrank, y = ..count.., colour=m,fill=m)) + 
  geom_histogram(binwidth=.05)  + facet_wrap(~ m,nrow=1) +
  theme(text=element_text(size=20)) + 
  #theme(axis.text.x = element_text(angle=90, hjust=-10)) + 
  xlab(expression(paste(tau,"(x')"))) + 
  ggtitle("HS with VOI=27 -- Inc m")

pjlat2 <- 
  ggplot(data=dfp2, aes(x=normrank, y = ..count.., colour=m,fill=m)) + 
  geom_histogram(binwidth=.05)  + facet_wrap(~ m,nrow=1) +
  theme(text=element_text(size=20)) + 
  xlab(expression(paste(tau,"(x')"))) + 
  ggtitle("HS with VOI=27 -- Inc m")


#pdf('./pHSundirJinc_voi27lattice_example10_h2l2.pdf',height=4.3,width=11)

print(pjlat)
## Warning: Removed 31 rows containing non-finite values (stat_bin).

#pdf('./HSundirJinc_voi27lattice_example10_h2l2.pdf',height=4,width=6)
#print(pjlat)
#dev.off()

#pdf('./pHSundirJinc_voi27lattice_example51015_h2l2.pdf',height=5,width=17)
#print(pjlat2)
#dev.off()


#if(run){
#    if(exists("cl")){
#            closeCluster(cl)
#       mpi.quit()
#       }
#}


#   Time:
##  Working status:
### Comments:
####Soli Deo Gloria