simulation1.R

source("ase.R")

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

## Package 'mclust' version 5.4.5
## Type 'citation("mclust")' for citing this R package in publications.

## 
## Attaching package: 'mvtnorm'

## The following object is masked from 'package:mclust':
## 
##     dmvnorm

source("emase.R")
source("clt.R")

## 
## Attaching package: 'reshape'

## The following object is masked from 'package:Matrix':
## 
##     expand

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:reshape':
## 
##     rename, round_any

## 
## Attaching package: 'ggplot2'

## The following object is masked _by_ '.GlobalEnv':
## 
##     vars

source("toolFun.R")

requiredPkg <- c("XML", "tidyverse", "parallel", "ellipse", "RColorBrewer")
newPkg <- requiredPkg[!(requiredPkg %in% installed.packages()[, "Package"])]
if (length(newPkg)) install.packages(newPkg, dependencies = TRUE)
sapply(requiredPkg, require, character.only = TRUE)

## Loading required package: XML

## Loading required package: tidyverse

## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✓ tibble  3.0.0     ✓ purrr   0.3.3
## ✓ tidyr   1.0.2     ✓ dplyr   0.8.5
## ✓ readr   1.3.1     ✓ stringr 1.4.0
## ✓ tibble  3.0.0     ✓ forcats 0.4.0

## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::arrange()       masks plyr::arrange()
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compact()       masks plyr::compact()
## x purrr::compose()       masks igraph::compose()
## x dplyr::count()         masks plyr::count()
## x tidyr::crossing()      masks igraph::crossing()
## x tidyr::expand()        masks reshape::expand(), Matrix::expand()
## x dplyr::failwith()      masks plyr::failwith()
## x dplyr::filter()        masks stats::filter()
## x dplyr::groups()        masks igraph::groups()
## x dplyr::id()            masks plyr::id()
## x dplyr::lag()           masks stats::lag()
## x purrr::map()           masks mclust::map()
## x dplyr::mutate()        masks plyr::mutate()
## x tidyr::pack()          masks Matrix::pack()
## x dplyr::rename()        masks plyr::rename(), reshape::rename()
## x dplyr::select()        masks MASS::select()
## x purrr::simplify()      masks igraph::simplify()
## x dplyr::summarise()     masks plyr::summarise()
## x dplyr::summarize()     masks plyr::summarize()
## x tidyr::unpack()        masks Matrix::unpack()
## x .GlobalEnv::vars()     masks dplyr::vars(), ggplot2::vars()

## Loading required package: parallel

## Loading required package: ellipse

## 
## Attaching package: 'ellipse'

## The following object is masked from 'package:graphics':
## 
##     pairs

## Loading required package: RColorBrewer

##          XML    tidyverse     parallel      ellipse RColorBrewer 
##         TRUE         TRUE         TRUE         TRUE         TRUE

# drive_auth_config(active = FALSE)
# folder_url <- "https://drive.google.com/open?id=1SD2RLcgww_rvr53l0KDQQBe38RncvULN"
# folder <- drive_get(as_id(folder_url))
# files <- drive_ls(folder)

folder_url <- "http://www.cis.jhu.edu/~parky/dhatKhat/Results/dhatkhat/"
docs <- XML::htmlParse(folder_url)
links <- XML::xpathSApply(docs, "//a/@href")
dfiles <- links[grepl(glob2rx("*.RData"), links)]
files <- paste0(folder_url, dfiles)
#print(load(url(files[x])))

# call "figure1(K=2)" for Fig 1 (a),
# call "figure1(K=3)" for Fig 1 (b)
figure1 <- function(n = 1000, K = 2){
    set.seed(1240)

    if (K == 2) {
        ## inf2.pdf
        rho <- c(0.5,0.5)
        B <- matrix(c(0.2,0.1,0.1,0.25), nrow = 2, ncol = 2)
    } else {
        ## inf3.pdf
        rho <- c(0.4,0.4,0.2)
        B <- rbind(c(0.2,0.1,0.08),c(0.1,0.25,0.05), c(0.08,0.05,0.4))
    }
    #    K <- length(rho)
    nB <- n*rho
    tau <- rep(1:K, times=nB)

    #    A <- rg.SBM(n, B, rho)
    g <- sample_sbm(n, B, nB, directed=FALSE)
    #    tau <- A$tau
    x <- eigen(B)
    xx <- x$vectors %*% diag(sqrt(x$values))
    X <- xx[tau,]

    #    Xhat <- stfp(A$adjacency, dim=K)
    Xhat <- ase(g, max.d=K, scale.by.values=TRUE)
    W <- procrustes(Xhat, X)
    residue <- sqrt(n)*(Xhat%*%W - X)
    print(norm(Xhat%*%W - X,"F")^2)

    sigs <- block.var(xx, rho)
    print(sigs)

    df <- data.frame(x = (Xhat%*%W)[,1], y = (Xhat%*%W)[,2],
                     block = as.character(tau),
                     tau = tau)

    df_ell <- data.frame()
    for(g in unique(df$tau)){
        sig <- sigs[[g]]
        df_ell <- rbind(df_ell, cbind(as.data.frame(
            with(df[df$tau==g,],
                 ellipse(sig[1,2]/sqrt(sig[1,1]*sig[2,2]),
                         scale=c(sqrt(sig[1,1])/sqrt(n),sqrt(sig[2,2])/sqrt(n)),
                         centre=c(xx[g,1],xx[g,2]))),group=g)))

    }

    df_ell <- cbind(df_ell, block=as.character(rep(1:K, each=nrow(df_ell)/K)))
    df.xx <- as.data.frame(xx[,1:2]); names(df.xx) <- c("x","y")
    df.xx <- cbind(df.xx, block=as.character(c(1:K)))

    p <- ggplot(data=df, aes(x=x, y=y, color=block)) +
        geom_point(size=2, alpha=0.5) +
        stat_ellipse(type="norm", size=1, linetype = "dashed", aes(color=block)) +
        geom_point(data=df.xx, aes(x=x,y=y), color="black", size=3) +
        geom_path(data=df_ell, aes(x=x, y=y, group=block), size=1, linetype = 2, colour=1) +
        labs(x = "Dimension 1", y = "Dimension 2") +
        #    geom_path(data = df_ell[-c(1:(nrow(df_ell)/K)),], aes(x = x, y = y), size = 1, linetype = 2, colour = 1) +
        theme(panel.background = element_rect(fill = 'white', colour = 'black')) +
        #  theme_light() +
        theme(legend.position = "none") +
        theme(axis.title=element_text(size=15)) +
        theme(axis.text.x=element_text(size=15)) +
        #  theme(axis.ticks = element_line(size = 5)) +
        theme(axis.text.y=element_text(size=15)) +
        #  theme(strip.text=element_text(size=rel(1.2))) +
        theme(legend.text = element_text(colour="black", size = 15, face = "plain"))
    print(p)
    ggsave(paste0("inf",K,".pdf"), width=5, height=5)
#    ggsave(paste0("Fig1-n1000-K",K,".pdf"), width=5, height=5)
}

figure23data <- function()
{
    N <- 100
    seed <- 0
    ns <- c(200,500,1000,2000,4000,8000,16000)
    B2 <- rbind(c(0.2,0.1),c(0.1,0.25))
    B3 <- rbind(c(0.2,0.1,0.08),c(0.1,0.25,0.05),
                c(0.08,0.05,0.4))
    pi2 <- c(0.5,0.5)
    pi3 <- c(0.4,0.4,0.2)
    mus2 <- vector('list',length(ns))
    mus3 <- vector('list',length(ns))
    names(mus2) <- ns
    names(mus3) <- ns
    vars2 <- vector('list',length(ns))
    vars3 <- vector('list',length(ns))
    names(vars2) <- ns
    names(vars3) <- ns
    varsS2 <- vector('list',length(ns))
    varsS3 <- vector('list',length(ns))
    names(varsS2) <- ns
    names(varsS3) <- ns
    V <- vector('list',length(ns))
    names(V) <- ns
    VS <- vector('list',length(ns))
    names(VS) <- ns
    for(i in 1:length(ns)){
        n <- ns[i]
        cat(n,"\n")
        ng2 <- n*pi2
        ng3 <- n*pi3
        cls2 <- rep(1:2,times=ng2)
        cls3 <- rep(1:3,times=ng3)
        mus2[[i]] <- vector('list',2)
        mus2[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        mus2[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        mus3[[i]] <- vector('list',3)
        mus3[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        mus3[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        mus3[[i]][[3]] <- matrix(NA,nrow=N,ncol=80)
        vars2[[i]] <- vector('list',2)
        vars2[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        vars2[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        V[[i]] <- matrix(NA,nrow=N,ncol=80)
        VS[[i]] <- matrix(NA,nrow=N,ncol=80)
        vars3[[i]] <- vector('list',3)
        vars3[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        vars3[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        vars3[[i]][[3]] <- matrix(NA,nrow=N,ncol=80)
        varsS2[[i]] <- vector('list',2)
        varsS2[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        varsS2[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        varsS3[[i]] <- vector('list',3)
        varsS3[[i]][[1]] <- matrix(NA,nrow=N,ncol=80)
        varsS3[[i]][[2]] <- matrix(NA,nrow=N,ncol=80)
        varsS3[[i]][[3]] <- matrix(NA,nrow=N,ncol=80)
        for(j in 1:N){
            set.seed(seed+i*N*2+j)
            g2 <- sample_sbm(n,B2,ng2,directed=FALSE)
            emb2 <- ase(g2,verbose=FALSE,max.d=80,scale.by.values=FALSE)
            embS2 <- ase(g2,verbose=FALSE,max.d=80,scale.by.values=TRUE)
            mus2[[i]][[1]][j,] <- apply(emb2[which(cls2==1),],2,mean)
            mus2[[i]][[2]][j,] <- apply(emb2[which(cls2==2),],2,mean)
            vars2[[i]][[1]][j,] <- apply(emb2[which(cls2==1),],2,var)
            vars2[[i]][[2]][j,] <- apply(emb2[which(cls2==2),],2,var)
            varsS2[[i]][[1]][j,] <- apply(embS2[which(cls2==1),],2,var)
            varsS2[[i]][[2]][j,] <- apply(embS2[which(cls2==2),],2,var)
            V[[i]][j,] <- apply(emb2,2,var)
            VS[[i]][j,] <- apply(embS2,2,var)
            g3 <- sample_sbm(n,B3,ng3,directed=FALSE)
            emb3 <- ase(g3,verbose=FALSE,max.d=80,scale.by.values=FALSE)
            embS3 <- ase(g3,verbose=FALSE,max.d=80,scale.by.values=TRUE)
            mus3[[i]][[1]][j,] <- apply(emb3[which(cls3==1),],2,mean)
            mus3[[i]][[2]][j,] <- apply(emb3[which(cls3==2),],2,mean)
            mus3[[i]][[3]][j,] <- apply(emb3[which(cls3==3),],2,mean)
            vars3[[i]][[1]][j,] <- apply(emb3[which(cls3==1),],2,var)
            vars3[[i]][[2]][j,] <- apply(emb3[which(cls3==2),],2,var)
            vars3[[i]][[3]][j,] <- apply(emb3[which(cls3==3),],2,var)
            varsS3[[i]][[1]][j,] <- apply(embS3[which(cls3==1),],2,var)
            varsS3[[i]][[2]][j,] <- apply(embS3[which(cls3==2),],2,var)
            varsS3[[i]][[3]][j,] <- apply(embS3[which(cls3==3),],2,var)
        }
        save(i,n,mus2,mus3,vars2,vars3,V,varsS2,varsS3,VS,N,ns,seed,
             B2,B3,pi2,pi3,
             file=paste0('fig2_3',n,'.RData'))
    }

    save(mus2,mus3,vars2,vars3,V,varsS2,varsS3,VS,N,ns,seed,
         B2,B3,pi2,pi3,
         file='fig2_3.RData')
}

# Fig 2
figure2 <- function(max.d=80)
{
#     files1 <- files %>% filter(grepl(glob2rx("fig2_3*"), name))
#     file <- drive_download(files1, overwrite = TRUE, verbose = FALSE)
#     load(file$local_path)
# #    load('fig2_3.RData')
    load(url(files[grepl(glob2rx("fig2_3*"),dfiles)]))

    max.d <- min(max.d,ncol(mus2[[1]][[1]]))

    m <- par('mar')
    par(mar=c(m[1:2],1,1))
    par(mfrow=c(2,2))
    boxplot(mus2[[1]][[1]][,-(1:2)],names=3:max.d,axes=FALSE,xlab='Dimension',ylab=expression(mu[1]), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=200")
    box()
    boxplot(mus2[[4]][[1]][,-(1:2)],names=3:max.d,axes=FALSE,xlab='Dimension',ylab=expression(mu[1]), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=2000")
    box()

    boxplot(mus2[[1]][[2]][,-(1:2)],names=3:max.d,axes=FALSE,xlab='Dimension',ylab=expression(mu[2]), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=200")
    box()
    boxplot(mus2[[4]][[2]][,-(1:2)],names=3:max.d,axes=FALSE,xlab='Dimension',ylab=expression(mu[2]), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=2000")
    box()
#    dev.print(device=pdf,file='fig1means.pdf', width=8, height=8) # Fig 2 (a)
    par(mar=m)

    m <- par('mar')
    par(mfrow=c(2,1))
    par(mar=c(m[1:2],0.5,1))
    M1 <- lapply(mus2,function(x) (c(x[[1]][,-(1:2)])))
    M2 <- lapply(mus2,function(x) (c(x[[2]][,-(1:2)])))
    boxplot(M1,names=ns,notch=TRUE,xlab="n",ylab=expression(mu[1]), cex.lab=1.5)
    boxplot(M2,names=ns,notch=TRUE,xlab="n",ylab=expression(mu[2]), cex.lab=1.5)
    par(mar=m)
#    dev.print(device=pdf,file='musall.pdf', width=8, height=8) # Fig 2 (b)
    par(mfrow=c(1,1))

}

figure3 <- function(max.d=80,useS=FALSE)
{
    # files1 <- files %>% filter(grepl(glob2rx("fig2_3*"), name))
    # file <- drive_download(files1, overwrite = TRUE, verbose = FALSE)
    # load(file$local_path)
    load(url(files[grepl(glob2rx("fig2_3*"),dfiles)]))

    max.d <- min(max.d,ncol(mus2[[1]][[1]]))
    # if(useS){
    #     vars2 <- varsS2
    #     vars3 <- varsS3
    #     V <- VS
    # }

    par(mfrow=c(2,1))
    m <- par('mar')
    par(mar=c(m[1],m[2]+1,0.75,m[4]))
    ylim <- range(unlist(vars2[[3]]))
    boxplot(vars2[[3]][[1]][,-(1:2)],names=3:max.d,axes=FALSE,
            ylim=ylim,
            xlab='Dimension',ylab=expression(sigma^2), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=1000")
    box()
    boxplot(vars2[[3]][[2]][,-(1:2)],add=TRUE,names=3:max.d,axes=FALSE, cex.lab=1.5)

    ylim <- range(unlist(vars2[[7]]))
    boxplot(vars2[[7]][[1]][,-(1:2)],names=3:max.d,axes=FALSE,
            ylim=ylim,
            xlab='Dimension',ylab=expression(sigma^2), cex.lab=1.5)
    axis(2)
    axis(1,at=c(3,(1:(max.d/10))*10)-2,label=c(3,(1:(max.d/10))*10))
    title("n=16000")
    box()
    boxplot(vars2[[7]][[2]][,-(1:2)],add=TRUE,names=3:max.d,axes=FALSE, cex.lab=1.5)
#    dev.print(device=pdf,file='fig2sigmas16000.pdf') # Fig 3 (a)
    par(mar=m)

    par(mfrow=c(1,1))
    yl <- 1.35
    cols <- 1:length(vars2)
    for(i in 1:length(vars2)){
        yl <- range(yl,ns[i]*apply(vars2[[i]][[1]][,-(1:2)],2,median))
        yl <- range(yl,ns[i]*apply(vars2[[i]][[2]][,-(1:2)],2,median))
    }
    plot(3:80,ylim=yl,xlab="Dimension",
         ylab=expression(n*sigma^2),type='n',
         xlim=c(3,82), cex.lab=1.5)
    for(i in 1:length(vars2)){
        data <- ns[i]*apply(vars2[[i]][[1]][,-(1:2)],2,median)
        lines(3:80,data,lty=1,col=cols[i],lwd=2)
        data <- ns[i]*apply(vars2[[i]][[2]][,-(1:2)],2,median)
        lines(3:80,data,lty=2,col=cols[i],lwd=2)
    }
#    legend(0,yl[2]+.025,legend=paste0("n=",ns),ncol=4,col=cols,lty=1,lwd=1)
#    legend(70,1.05,legend=c("B1","B2"),lty=1:2,col=1)
#    dev.print(device=pdf,file='allsigmas.pdf') # Fig 3 (b)
}

figure4 <- function(n=200)
{
    set.seed(1240)

    rho <- c(0.5,0.5)
    B <- matrix(c(0.2,0.1,0.1,0.25), nrow = 2, ncol = 2)
    nB <- n*rho

    mc <- 1
    dmax <- 20

    S <- matrix(0,dmax,dmax)
    for (i in 1:mc) {
        g <- sample_sbm(n, B, nB, directed=FALSE, loops=FALSE)
        Xhat <- eigASE(A=g[,,sparse=F], dim=dmax, scaling = TRUE)
        S <- S + cov(Xhat$X[1:(n/2),])
    }

    Sbar <- S / nmc

    mycol <- colorRampPalette(brewer.pal(8, "RdBu"))(25)
    if (n==200) {
        mylab <- seq(-0.04, 0.04, length.out=5) #seq(-signif(max(Sbar),digits=1), signif(max(Sbar),digits=1), length.out=5)
    } else {
        mylab <- seq(-round(max(Sbar),digits=2), round(max(Sbar),digits=2), length.out=5)
    }
    pdf("asecov1.pdf")
    levelplot(Sbar, pretty=TRUE,
              col.regions=rev(mycol),
              at=seq(-max(Sbar), max(Sbar), length.out=10),
              colorkey=list(labels=list(at=mylab, cex=1.5)),
              xlab=list("Dimension", cex=1.5), ylab=list("Dimension", cex=1.5))
    dev.off()
}

figure5Data <- function()
{
    N <- 100
    seed <- 0
    ns <- c(200,500,1000,2000,4000)
    B2 <- rbind(c(0.2,0.1),c(0.1,0.25))
    B3 <- rbind(c(0.2,0.1,0.08),c(0.1,0.25,0.05),
                c(0.08,0.05,0.4))
    pi2 <- c(0.5,0.5)
    pi3 <- c(0.4,0.4,0.2)
    vars2 <- vector('list',length(ns))
    vars3 <- vector('list',length(ns))
    names(vars2) <- ns
    names(vars3) <- ns
    for(i in 1:length(ns)){
        n <- ns[i]
        cat(n,"\n")
        ng2 <- n*pi2
        ng3 <- n*pi3
        cls2 <- rep(1:2,times=ng2)
        cls3 <- rep(1:3,times=ng3)
        vars2[[i]] <- vector('list',2)
        vars2[[i]][[1]] <- vector('list',N)
        vars2[[i]][[2]] <- vector('list',N)
        vars3[[i]] <- vector('list',3)
        vars3[[i]][[1]] <- vector('list',N)
        vars3[[i]][[2]] <- vector('list',N)
        vars3[[i]][[3]] <- vector('list',N)
        for(j in 1:N){
            set.seed(seed+i*N*2+j)
            g2 <- sample_sbm(n,B2,ng2,directed=FALSE)
            emb2 <- ase(g2,verbose=FALSE,max.d=20,scale.by.values=FALSE)
            vars2[[i]][[1]][[j]] <- var(emb2[which(cls2==1),])
            vars2[[i]][[2]][[j]] <- var(emb2[which(cls2==2),])
            g3 <- sample_sbm(n,B3,ng3,directed=FALSE)
            emb3 <- ase(g3,verbose=FALSE,max.d=20,scale.by.values=FALSE)
            vars3[[i]][[1]][[j]] <- var(emb3[which(cls3==1),])
            vars3[[i]][[2]][[j]] <- var(emb3[which(cls3==2),])
            vars3[[i]][[3]][[j]] <- var(emb3[which(cls3==3),])
        }
    }

    save(vars2,vars3,N,ns,seed,
         B2,B3,pi2,pi3,
         file='fig5.RData')
}

## Fig 5
figure5 <- function()
{
    # files1 <- files %>% filter(grepl(glob2rx("fig5*"), name))
    # file <- drive_download(files1, overwrite = TRUE, verbose = FALSE)
    # load(file$local_path)
#    load('fig5.RData')

    load(url(files[grepl(glob2rx("fig5*"),dfiles)]))

    data <- vector('list',2*length(ns))
    names(data) <- c(rbind(ns,rep("",length(ns))))
    k <- 1
    for(i in 1:length(ns)){
        data[[k]] <- unlist(lapply(vars2[[i]][[1]],function(y) {
            z <- y[-(1:2),-(1:2)]
            c(z[upper.tri(z)])
        }))
        data[[k+1]] <- unlist(lapply(vars2[[i]][[2]],function(y) {
            z <- y[-(1:2),-(1:2)]
            c(z[upper.tri(z)])
        }))
        k <- k+2
    }
    m <- par('mar')
    par(mar=c(m[1],m[1],m[2:3]))
    boxplot(data,col=c('white','gray'),xlab="n",ylab=expression(Sigma[ij]),
            cex.lab=1.5)
    abline(h=0,lty=2)
#    dev.print(device=pdf,file='offcov.pdf') # Fig 5(a)
    a <- boxplot(data,col=c('white','gray'),outline=FALSE,notch=TRUE)
    boxplot(data,col=c('white','gray'),outline=FALSE,notch=TRUE,
            ylim=c(min(a$stats[2,]),max(a$stats[4,])),
            xlab="n",ylab=expression(Sigma[ij]),cex.lab=1.5)
    abline(h=0,lty=2)
#    dev.print(device=pdf,file='offcov2.pdf') # Fig 5(b)
    par(mar=m)
}

experiment6 <- function(N=100,midp=0.2,maxp=0.5,overlap=.1,n=500,
                        ng=rep(1/2,2),seed=43523,
                        p1="exp4_all.pdf",
                        p2="exp42.pdf")
{
    m <- 2
    means <- matrix(0,nrow=N,ncol=4)
    offdiag <- rep(0,N)
    ondiag <- matrix(0,nrow=N,ncol=4)
    for(i in 1:N){
        set.seed(i+seed)
        P <- matrix(0,nrow=m,ncol=m)
        P[lower.tri(P)] <- runif(choose(m,2),0,midp)
        P[upper.tri(P)] <- t(P[lower.tri(P)])
        diag(P) <- runif(m,midp-overlap,maxp)
        NG <- n*ng
        while(sum(NG)>n) ng[which.max(ng)] <- ng[which.max(ng)] - 1
        while(sum(NG)<n) ng[which.min(ng)] <- ng[which.min(ng)] + 1
        g <- sample_sbm(n,P,NG,directed=FALSE)
        cls <- rep(1:m,times=NG)
        emb <- ase(g,max.d=m+2)
        means[i,] <- apply(emb,2,mean)
        v <- var(emb[,3:4])
        offdiag[i] <- v[1,2]
        ondiag[i,] <- diag(var(emb))
    }
    boxplot(cbind(means,offdiag,ondiag,ondiag[,3]-ondiag[,4]),
            notch=TRUE,
            #las=2,
            names=c(expression(mu[1]),
                    expression(mu[2]),
                    expression(mu[3]),
                    expression(mu[4]),
                    expression(Sigma[34]),
                    expression(Sigma[11]),
                    expression(Sigma[22]),
                    expression(Sigma[33]),
                    expression(Sigma[44]),
                    expression(Sigma[33]-Sigma[44])))
    abline(h=0,lty=2)
    dev.print(device=pdf,file=p1)
    par(mfrow=c(2,1))
    boxplot(means,
            notch=TRUE,
            #las=2,
            ylab="Means",
            names=c(expression(mu[1]),
                    expression(mu[2]),
                    expression(mu[3]),
                    expression(mu[4])))
    abline(h=0,lty=2)
    abline(v=2.5,lty=2)
    boxplot(cbind(offdiag,ondiag,ondiag[,3]-ondiag[,4]),
            ylab="Variances",
            names=c(expression(Sigma[34]),
                    expression(Sigma[11]),
                    expression(Sigma[22]),
                    expression(Sigma[33]),
                    expression(Sigma[44]),
                    expression(Sigma[33]-Sigma[44])))
    abline(h=0,lty=2)
    dev.print(device=pdf,file=p2)
    par(mfrow=c(1,1))
    list(means=means,off=offdiag,on=ondiag,midp=midp,maxp=maxp,overlap=overlap,n=n,
         ng=ng,seed=seed,p1=p1,p2=p2)
}

## Fig 6
figure6.old <- function()
{
    out2 <- experiment6(N=10000,midp=0.1,maxp=0.5,overlap=-.1,n=500,
                        ng=rep(1/2,2),seed=4353,
                        p1="exp4_all_2.pdf",
                        p2="exp42_2.pdf") # Fig 6
}

figure6 <- function(N=10000,out,
                 maxq=0.1,
                 maxp=0.2,
                 n=5000,
                 d=20,
                 ng=rep(1/2,2),
                 outlines=1,
                 seed=43523,
                 savefile=paste0("RData/exp44_",N,"_",n,".RData"),
                 pictfile=NULL)
{
    if(missing(out)){
        m <- 2
        means1 <- matrix(0,nrow=N,ncol=d)
        means2 <- matrix(0,nrow=N,ncol=d)
        ondiag1 <- matrix(0,nrow=N,ncol=d)
        ondiag2 <- matrix(0,nrow=N,ncol=d)
        bigoff1 <- vector('list',N)
        bigoff2 <- vector('list',N)
        simoff1 <- vector('list',N)
        simoff2 <- vector('list',N)
        dm12 <- rep(0,N)
        dmgt <- rep(0,N)
        off1_12 <- rep(0,N)
        off1_23 <- rep(0,N)
        off2_12 <- rep(0,N)
        off2_23 <- rep(0,N)
        for(i in 1:N){
            cat("i = ", i, "\n")
            set.seed(i+seed)
            P <- matrix(0,nrow=m,ncol=m)
            P[lower.tri(P)] <- runif(choose(m,2),0,maxq)
            P[upper.tri(P)] <- t(P[lower.tri(P)])
            diag(P) <- sort(runif(m,max(P),maxp),decreasing=TRUE)
            NG <- n*ng
            g <- sample_sbm(n,P,NG,directed=FALSE)
            cls <- rep(1:m,times=NG)
            emb <- ase(g,max.d=d)
            means1[i,] <- apply(emb[cls==1,],2,mean)
            means2[i,] <- apply(emb[cls==2,],2,mean)
            dm12[i] <- proxy::dist(means1[i,1:2,drop=FALSE],means2[i,1:2,drop=FALSE])
            dmgt[i] <- proxy::dist(means1[i,-(1:2),drop=FALSE],means2[i,-(1:2),drop=FALSE])
            v1 <- var(emb[cls==1,])
            v2 <- var(emb[cls==2,])
            ondiag1[i,] <- diag(v1)
            ondiag2[i,] <- diag(v2)
            bigoff1[[i]] <- v1[upper.tri(v1[-(1:2),-(1:2)])]
            bigoff2[[i]] <- v2[upper.tri(v2[-(1:2),-(1:2)])]
            vvv <- diag(v1)[-(1:2)]
            s1 <- rmvnorm(N,mean=means1[i,-(1:2)],sigma=diag(vvv))
            sv1 <- var(s1)
            simoff1[[i]] <- sv1[upper.tri(sv1[-(1:2),-(1:2)])]

            vvv <- diag(v2)[-(1:2)]
            s2 <- rmvnorm(N,mean=means2[i,-(1:2)],sigma=diag(vvv))
            sv2 <- var(s2)
            simoff2[[i]] <- sv2[upper.tri(sv2[-(1:2),-(1:2)])]

            off1_12[i] <- v1[1,2]
            off2_12[i] <- v2[1,2]
            off1_23[i] <- v1[3,2]
            off2_23[i] <- v2[3,2]
        }
        bv1 <- unlist(bigoff1)
        bv2 <- unlist(bigoff2)
        sv1 <- unlist(simoff1)
        sv2 <- unlist(simoff2)
        out <- list(m=m,N=N,
                    maxq=maxq,
                    maxp=maxp,
                    n=n,
                    d=d,
                    ng=ng,
                    seed=seed,
                    means1=means1,
                    means2=means2,
                    ondiag1=ondiag1,
                    ondiag2=ondiag2,
                    bigoff1=bigoff1,
                    bigoff2=bigoff2,
                    simoff1=simoff1,
                    simoff2=simoff2,
                    dm12=dm12,
                    dmgt=dmgt,
                    off1_12=off1_12,
                    off1_23=off1_23,
                    off2_12=off2_12,
                    off2_23=off2_23,
                    bv1=bv1,bv2=bv2,
                    sv1=sv1,sv2=sv2)
        save(out,file=savefile)
    } else {
        load(url(paste0("http://www.cis.jhu.edu/~parky/dhatKhat/Results/",out)))
    }
    with(out,{
        # if(is.null(pictfile))
        #     pictfile <- paste0("figures/exp44_",N,"_",n,".pdf")
        layout(rbind(c(1,1,1,1,2,2,2,2),
                     c(1,1,1,1,2,2,2,2),
                     c(3,3,3,3,4,4,4,4),
                     c(3,3,3,3,4,4,4,4)))
        margin <- par('mar')
        tcl <- par('tcl')
        par(mar=c(4.1,4.1,3.1,1.5))

        boxplot(list(means1[,1],means1[,2],means2[,1],means2[,2]),
                main="Informative Means",
                notch=TRUE,
                outline=1 %in% outlines,
                names=c(expression(mu[1]^1),
                        expression(mu[2]^1),
                        expression(mu[1]^2),
                        expression(mu[2]^2)),
                cex.names=1.5,
                ylab="Mean",
                xlab="Dimension")
        abline(h=0,lty=2)

        boxplot(list(c(means1[,-(1:2)]),c(means2[,-(1:2)])),
                main="Redundant Means",
                notch=TRUE,
                outline=2 %in% outlines,
                names=c(expression(mu[d>3]^1),expression(mu[d>3]^2)),
                cex.names=1.5,
                ylab="Mean",
                xlab="Dimension")
        abline(h=0.0,lty=2)

        boxplot(list(ondiag1[,1],ondiag2[,1],
                     ondiag1[,2],ondiag2[,2]),
                notch=TRUE,
                outline=3 %in% outlines,
                cex.names=1.5,
                ylab="Informative Variance",
                main="Variance",
                names=c(
                    expression(sigma[1]^1),
                    expression(sigma[1]^2),
                    expression(sigma[2]^1),
                    expression(sigma[2]^2)))
        abline(h=0,lty=2)

        par(tcl=-0.025)
        boxplot(list(off1_12,off2_12,off1_23,off2_23,bv1,sv1,bv2,sv2),
                notch=TRUE,
                outline=4 %in% outlines,
                main="Covariance",
                cex.names=1.5,
                col=c(rep('white',5),'gray','white','gray'),
                ylab="Covariance",
                names=c(
                    expression(Sigma[12]^1),
                    expression(Sigma[12]^2),
                    expression(Sigma[23]^1),
                    expression(Sigma[23]^2),
                    expression(Sigma[i!=j]^1),
                    expression(Sigma[i!=j]^1*s),
                    expression(Sigma[i!=j]^2),
                    expression(Sigma[i!=j]^2*s)))
        abline(h=0,lty=2)
        abline(v=2.5,lty=2)
        par(tcl=tcl)

        # dev.print(device=pdf,file=pictfile)
        par(mfrow=c(1,1))
        par(mar=margin)
        return(invisible(out))
    })
}

experiment9 <- function(p=0.115,P,
                        n=500,
                        N=100,
                        seed=0,
                        pg=c(0.5,0.5),
                        adj.diag=FALSE)
{
    if(missing(P)){
        P <- rbind(c(0.2,p),c(p,0.1))
    }
    ng <- pg*n

    k <- nrow(P)
    outS <- vector('list',N)
    for(i in 1:N){
        set.seed(i+seed)
        g <- sample_sbm(n,P,ng,directed=FALSE)

        outS[[i]] <- aseCluster(g,verbose=FALSE,max.k=6,max.d=6,
                                scale.by.values=TRUE,short.circuit=TRUE,adjust.diag=adj.diag)
        emb <- ase(g,max.d=6,scale.by.values=TRUE)
        memb1 <- predict(outS[[i]]$best.model,emb)$classification
        memb2 <- predict(outS[[i]]$reduced.model,
                         emb[,1:outS[[i]]$d])$classification
        outS[[i]]$ari <- adjustedRandIndex(rep(1:k,times=ng),memb1)
        outS[[i]]$ariR <- adjustedRandIndex(rep(1:k,times=ng),memb2)
    }
    outS
}

experiment9.2 <- function(p=0.115,P,
                          n=500,
                          N=100,
                          seed=0,
                          pg=c(0.5,0.5),
                          adj.diag=FALSE)
{
    if(missing(P)){
        P <- rbind(c(0.2,p),c(p,0.1))
    }
    ng <- pg*n

    k <- nrow(P)
    outES <- vector('list',N)
    for(i in 1:N){
        set.seed(i+seed)
        g <- sample_sbm(n,P,ng,directed=FALSE)

        outES[[i]] <- try(aseClusterEM(g,verbose=FALSE,max.k=6,max.d=6,
                                       scale.by.values=TRUE,
                                       bail=TRUE,adjust.diag=adj.diag))
        if(!inherits(outES[[i]],'try-error')){
            emb <- ase(g,max.d=6,scale.by.values=TRUE)
            memb1 <- clusterMix(emb,outES[[i]]$best.model,d=6)
            memb2 <- clusterMix(emb[,1:outES[[i]]$best.model$d],
                                outES[[i]]$reduced.model,
                                d=outES[[i]]$best.model$d)
            outES[[i]]$ari <- adjustedRandIndex(rep(1:k,times=ng),memb1)
            outES[[i]]$ariR <- adjustedRandIndex(rep(1:k,times=ng),memb2)
        } else {
            outES[[i]] <- list(ari=NA,ariR=NA,bic=NA,BIC=NA,G=NA,d=NA)
        }
    }
    outES
}

tab1Fig7 <- function()
{
    tab1 <- matrix(c(12,77,11,0,0,0), 3, 2);
    rownames(tab1) <- c(2,3,4); colnames(tab1) <- c(2,3)
    df1 <- reshape2::melt(tab1, varnames = c("dhat","Khat"), value.name = "count")
    df1 <- df1 %>% mutate(n="n =  200") %>% dplyr::select(n, everything())

    tab2 <- matrix(c(18,78,3,1,0,0), 3, 2);
    rownames(tab2) <- c(2,3,4); colnames(tab2) <- c(2,3)
    df2 <- reshape2::melt(tab2, varnames = c("dhat","Khat"), value.name = "count")
    df2 <- df2 %>% mutate(n="n =  500") %>% dplyr::select(n, everything())

    tab3 <- matrix(c(23,74,3,0,0,0), 3, 2);
    rownames(tab3) <- c(2,3,4); colnames(tab3) <- c(2,3)
    df3 <- reshape2::melt(tab3, varnames = c("dhat","Khat"), value.name = "count")
    df3 <- df3 %>% mutate(n="n = 1000") %>% dplyr::select(n, everything())

    tab4 <- matrix(c(35,63,2,0,0,0), 3, 2);
    rownames(tab4) <- c(2,3,4); colnames(tab4) <- c(2,3)
    df4 <- reshape2::melt(tab4, varnames = c("dhat","Khat"), value.name = "count")
    df4 <- df4 %>% mutate(n="n = 2000") %>% dplyr::select(n, everything())

    df.B2 <- rbind(df1,df2,df3,df4) %>% mutate(B="K = d = 2") %>% dplyr::select(B, everything())

    ## B = 3
    tab1 <- matrix(c(0,1,9,0,52,37,0,1,0), 3, 3)
    rownames(tab1) <- c(2,3,4); colnames(tab1) <- c(2,3,4)
    df1 <- reshape2::melt(tab1, varnames = c("dhat","Khat"), value.name = "count")
    df1 <- df1 %>% mutate(n="n =  200") %>% dplyr::select(n,everything())

    tab2 <- matrix(c(0,0,0,0,50,50,0,0,0), 3, 3);
    rownames(tab2) <- c(2,3,4); colnames(tab2) <- c(2,3,4)
    df2 <- reshape2::melt(tab2, varnames = c("dhat","Khat"), value.name = "count")
    df2 <- df2 %>% mutate(n="n =  500") %>% dplyr::select(n,everything())

    tab3 <- matrix(c(0,0,0,0,76,24,0,0,0), 3, 3);
    rownames(tab3) <- c(2,3,4); colnames(tab3) <- c(2,3,4)
    df3 <- reshape2::melt(tab3, varnames = c("dhat","Khat"), value.name = "count")
    df3 <- df3 %>% mutate(n="n = 1000") %>% dplyr::select(n,everything())

    tab4 <- matrix(c(0,0,0,0,77,23,0,0,0), 3, 3);
    rownames(tab4) <- c(2,3,4); colnames(tab4) <- c(2,3,4)
    df4 <- reshape2::melt(tab4, varnames = c("dhat","Khat"), value.name = "count")
    df4 <- df4 %>% mutate(n="n = 2000") %>% dplyr::select(n,everything())

    df.B3 <- rbind(df1,df2,df3,df4) %>% mutate(B="K = d = 3") %>% dplyr::select(B,everything())
    df.B23 <- rbind(df.B2, df.B3)

    # require(xtable)
    # tab.B2 <- xtable(df.B2)
    # align(tab.B2) <- rep("c", ncol(tab.B2)+1)
    # digits(tab.B2) <- 0
    # print(tab.B2, include.rownames = FALSE, booktabs = TRUE)
    #
    # tab.B3 <- xtable(df.B3)
    # align(tab.B3) <- rep("c", ncol(tab.B3)+1)
    # digits(tab.B3) <- 0
    # print(tab.B3, include.rownames = FALSE, booktabs = TRUE)

    df.B23$B <- factor(df.B23$B)
    df.B23$n <- factor(df.B23$n)
    df.B23$dhat <- factor(df.B23$dhat)
    df.B23$Khat <- factor(df.B23$Khat)
    #    df.B23$count <- factor(df.B23$count)
    p <- df.B23 %>% mutate(col=ifelse(count>30, "white", "black")) %>%
        ggplot(aes(x=Khat, y=fct_rev(dhat))) + #coord_equal() +
        facet_grid(n~B, scales = 'free') + #coord_equal() +
        geom_tile(aes(fill=count)) +
        geom_text(aes(label=sprintf("%d",round(count,0)), color=col), size=6) +
        scale_color_manual(values=c('white'='white', 'black'='black'), guide="none") +
        scale_fill_gradientn(colors=gray(255:0/255), limit=c(0,max(unlist(df.B23)))) +
        #        scale_fill_gradientn(colors=gray(255:0/255), limit=c(0,length(unique(df.B23$count)))) +
        xlab(expression(hat("K"))) + ylab(expression(hat("d"))) +
        #        theme(axis.title.x=element_blank(), axis.title.y=element_blank(), panel.background = element_rect(fill = "white", colour = "grey50")) +
        theme(panel.background = element_rect(fill = "white", colour = "grey50")) +
        theme(strip.text.x = element_text(size = 15)) +#, colour = "orange", angle = 90)) +
        theme(strip.text.y = element_text(size = 15)) +#, colour = "orange", angle = 90)) +
        theme(axis.text.x=element_text(size=15)) +
        theme(axis.text.y=element_text(size=15)) +
        theme(legend.position="none")
    print(p)
    ggsave("tab1-ggplot.pdf", p, width=8, height=8)
}

# Fig 9
figure9 <- function(n=500,N=100,ps=seq(0.005,0.200,by=0.005))
{
#    if(!dir.exists(odir)) dir.create(odir,recursive=TRUE)
    ofile <- paste0("exp_N",N,"_n",n,".RData")

    # if (!file.exists(ofile)) {
    #     out <- mclapply(ps,function(p){
    #         experiment9(n=n,p=p,N=N)
    #     },mc.cores=30)
    #     outE <- mclapply(ps,function(p){
    #         experiment9.2(n=n,p=p,N=N)
    #     },mc.cores=30)
    #     save(out,outE,ps,n,file=ofile)
    # } else {
    #     load(ofile)
    # }

    # files1 <- files %>% filter(grepl(glob2rx(ofile), name))
    # file <- drive_download(files1, overwrite = TRUE, verbose = FALSE)
    # load(file$local_path)

    load(url(files[grepl(glob2rx(ofile),dfiles)]))

    a1 <- lapply(out,function(x) unlist(lapply(x,'[[','ari')))
    cols <- rep('white',length(ps))
    inds <- which(ps>=0.09 & ps<=0.115)
    cols[inds] <- 'gray'
    x1 <- boxplot(a1,names=ps,ylab="Adjusted Rand Index",xlab="p",
                  ylim=0:1,col=cols)
#    dev.print(device=pdf,file=paste0("Picts/aris",n,".pdf")) # Fig 9
}

figure1(K=2)

## [1] 3.027658
## [[1]]
##            [,1]       [,2]
## [1,]  0.7102663 -0.3395499
## [2,] -0.3395499  2.1897337
## 
## [[2]]
##           [,1]      [,2]
## [1,] 0.9179145 0.4729445
## [2,] 0.4729445 2.0820855

figure1(K=3)

## [1] 5.085154
## [[1]]
##             [,1]        [,2]       [,3]
## [1,]  0.76197689 -0.09842259 -0.1786604
## [2,] -0.09842259  1.10046525 -0.3918768
## [3,] -0.17866042 -0.39187682  2.9185102
## 
## [[2]]
##           [,1]      [,2]      [,3]
## [1,] 0.5843389 0.1778677 0.2339345
## [2,] 0.1778677 1.3238561 0.5585122
## [3,] 0.2339345 0.5585122 2.5773492
## 
## [[3]]
##            [,1]       [,2]       [,3]
## [1,]  1.8964226 -1.3437217  0.4113969
## [2,] -1.3437217  1.6507491 -0.5526459
## [3,]  0.4113969 -0.5526459  1.5331004

figure2()

figure3()

figure5()

figure6(out="exp44_10000_5000.RData")

tab1Fig7()

figure9()

simulation1.R

parky

2020-05-15