simulation2.R

source("loadData.R")

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

requiredPkg <- c("XML", "tidyverse")
newPkg <- requiredPkg[!(requiredPkg %in% installed.packages()[, "Package"])]
if (length(newPkg)) install.packages(newPkg, dependencies = TRUE)
sapply(requiredPkg, require, character.only = TRUE)

## Loading required package: XML

## Loading required package: tidyverse

## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

##       XML tidyverse 
##      TRUE      TRUE

findex <- c("09", "095", "1", "105", "11", "115")
meanAriB2 <- rep(0,6)
meanAriB2s <- rep(0,6)
meanAriSVT1 <- rep(0,6)
meanAriSVT2 <- rep(0,6)
meanAriSVT3 <- rep(0,6)
meanDB2s <- rep(0,6)
meanDSVT1 <- rep(0,6)
meanDSVT2 <- rep(0,6)
meanDSVT3 <- rep(0,6)

# drive_auth_config(active = FALSE)
# folder_url <- "https://drive.google.com/open?id=1PXyy6-z_O-ICu79kjZB-W9hkr-vgN6kh"
# folder <- drive_get(as_id(folder_url))
# files <- drive_ls(folder)

folder_url <- "http://www.cis.jhu.edu/~parky/dhatKhat/Results/result_simulation/"
docs <- XML::htmlParse(folder_url)
links <- XML::xpathSApply(docs, "//a/@href")
dfiles <- links[grepl(glob2rx("*.RData"), links)]
files <- paste0(folder_url, dfiles)
#print(load(url(files[x])))


##############################################
# plot figure 8
##############################################

#files1 <- files %>% filter(grepl(glob2rx("eig_BF1_n500_p0095*"), name))
files1 <- files[grepl(glob2rx("eig_BF1_n500_p0095*"),dfiles)]
result <- loadData(files1)
# hist(result$ariBlock2sList -result$ariSvt1List, breaks=seq(-0.05, 0.2, 0.02), prob=T,col=1,density=10,angle=135,
#      xlim = c(-0.05,0.2), ylim = c(0, 40), main = "", xlab = "Diffence of ARI", lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt1List,adj=2),col=1,lwd=2)
# hist(result$ariBlock2sList -result$ariSvt2List, breaks=seq(-0.05, 0.2, 0.02), prob=T,col=2,density=10,angle=90,
#      add = TRUE, lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt2List,adj=10),col=2,lwd=2)
# hist(result$ariBlock2sList -result$ariSvt3List, breaks=seq(-0.05, 0.2, 0.02), prob=T,col=4,density=10,angle=45,
#      add = TRUE, lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt3List,adj=2),col=4,lwd=2)
# legend("topleft", col = c(1,2,4), legend = c("MCG-ZG1", "MCG-ZG2", "MCG-ZG3"),
#        lwd = c(1.5, 1.5, 1.5))

df.a <- data.frame(p="p = 0.095",
                   ZG1=result$ariBlock2sList - result$ariSvt1List,
                   ZG2=result$ariBlock2sList - result$ariSvt2List,
                   ZG3=result$ariBlock2sList - result$ariSvt3List)
df.a <- df.a %>% gather(`ZG1`,`ZG2`,`ZG3`, key="method", value=diff)
# df.a %>% ggplot(aes(x=method,y=diff,color=method,fill=method)) +
#   geom_boxplot(alpha=0.5,notch = TRUE) +
#   theme(legend.position="none") +
#   ylab("Difference of ARI")

df8.a <- data.frame(p="p = 0.095",
                    SMS = result$ariBlock2sList,
                    ZG1 = result$ariSvt1List,
                    ZG2 = result$ariSvt2List,
                    ZG3 = result$ariSvt3List) #%>%
#        gather(key = "group", value = "ari", -p)


##############################################
#files2 <- files %>% filter(grepl(glob2rx("eig_BF1_n500_p0115*"), name))
files2 <- files[grepl(glob2rx("eig_BF1_n500_p0115*"),dfiles)]
result <- loadData(files2)
# hist(result$ariBlock2sList -result$ariSvt1List, breaks=seq(-0.09, 0.19, 0.02), prob=T,col=1,density=10,angle=135,
#      xlim = c(-0.1,0.2), ylim = c(0, 20), main = "", xlab = "Diffence of ARI", lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt1List,adj=2),col=1,lwd=2)
# hist(result$ariBlock2sList -result$ariSvt2List, breaks=seq(-0.09, 0.19, 0.02), prob=T,col=2,density=10,angle=90,
#      add = TRUE, lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt2List,adj=2),col=2,lwd=2)
# hist(result$ariBlock2sList -result$ariSvt3List, breaks=seq(-0.09, 0.19, 0.02), prob=T,col=4,density=10,angle=45,
#      add = TRUE, lty = 1)
# lines(stats::density(result$ariBlock2sList -result$ariSvt3List,adj=2),col=4,lwd=2)
# legend("topleft", col = c(1,2,4), legend = c("MCG-ZG1", "MCG-ZG2", "MCG-ZG3"),
#        lwd = c(1.5, 1.5, 1.5))

df.b <- data.frame(p="p = 0.115",
                   ZG1=result$ariBlock2sList - result$ariSvt1List,
                   ZG2=result$ariBlock2sList - result$ariSvt2List,
                   ZG3=result$ariBlock2sList - result$ariSvt3List)
df.b <- df.b %>% gather(`ZG1`,`ZG2`,`ZG3`, key="method", value=diff)
# df.b %>% ggplot(aes(x=method,y=diff,color=method,fill=method)) +
#   geom_boxplot(alpha=0.5,notch = TRUE) +
#   theme(legend.position="none") +
#   ylab("Difference of ARI")
df8.b <- data.frame(p="p = 0.115",
                    SMS = result$ariBlock2sList,
                    ZG1 = result$ariSvt1List,
                    ZG2 = result$ariSvt2List,
                    ZG3 = result$ariSvt3List) #%>%
#        gather(key = "group", value = "ari", -p)

df.ab <- rbind(df.a, df.b)
#df.ab$p <- factor(df.ab$p)
df.ab %>% ggplot(aes(x=method,y=diff,color=method,fill=method)) +
  facet_grid(~p) +
  geom_boxplot(alpha=0.5,notch = TRUE) +
#  geom_violin(alpha=0.5) +
  theme(legend.position="none") +
  ylab("ARI(SMS) - ARI(ZG)") +
  theme(axis.title=element_text(size=15)) +
  theme(axis.text.x=element_text(size=15)) +
  #  theme(axis.ticks = element_line(size = 5)) +
  theme(axis.text.y=element_text(size=15)) +
  theme(strip.text=element_text(size=rel(1.2))) +
  theme(legend.text = element_text(colour="black", size = 15, face = "plain"))

## notch went outside hinges. Try setting notch=FALSE.

ggsave("Fig8-alt.pdf", width=9, height=5)

## notch went outside hinges. Try setting notch=FALSE.

## paired t.test
t.test(as.numeric(df8.a$SMS), as.numeric(df8.a$ZG1), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.a$SMS) and as.numeric(df8.a$ZG1)
## t = 8.4519, df = 99, p-value = 1.279e-13
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.01516626        Inf
## sample estimates:
## mean of the differences 
##              0.01887414

t.test(as.numeric(df8.a$SMS), as.numeric(df8.a$ZG2), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.a$SMS) and as.numeric(df8.a$ZG2)
## t = 2.5223, df = 99, p-value = 0.006626
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.001154085         Inf
## sample estimates:
## mean of the differences 
##               0.0033773

t.test(as.numeric(df8.a$SMS), as.numeric(df8.a$ZG3), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.a$SMS) and as.numeric(df8.a$ZG3)
## t = 4.7441, df = 99, p-value = 3.525e-06
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.007973673         Inf
## sample estimates:
## mean of the differences 
##              0.01226704

t.test(as.numeric(df8.b$SMS), as.numeric(df8.b$ZG1), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.b$SMS) and as.numeric(df8.b$ZG1)
## t = -0.35261, df = 99, p-value = 0.6374
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.001172448          Inf
## sample estimates:
## mean of the differences 
##           -0.0002053726

t.test(as.numeric(df8.b$SMS), as.numeric(df8.b$ZG2), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.b$SMS) and as.numeric(df8.b$ZG2)
## t = 8.3648, df = 99, p-value = 1.972e-13
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.02053509        Inf
## sample estimates:
## mean of the differences 
##              0.02562072

t.test(as.numeric(df8.b$SMS), as.numeric(df8.b$ZG3), paired=TRUE, alternative = "gre")

## 
##  Paired t-test
## 
## data:  as.numeric(df8.b$SMS) and as.numeric(df8.b$ZG3)
## t = 10.764, df = 99, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.03231208        Inf
## sample estimates:
## mean of the differences 
##              0.03820553

##############################################
# plot figure 10
##############################################

for (i in 1:6){
  filename = paste("eig_BF1_n500_p0", findex[i], "_mc*", sep="")
#  filei <- files %>% filter(grepl(glob2rx(filename), name))
  filei <- files[grepl(glob2rx(filename),dfiles)]
  result <- loadData(filei)

  meanAriB2[i] <- mean(result$ariBlock2List)
  meanAriB2s[i] <- mean(result$ariBlock2sList)
  meanAriSVT1[i] <- mean(result$ariSvt1List)
  meanAriSVT2[i] <- mean(result$ariSvt2List)
  meanAriSVT3[i] <- mean(result$ariSvt3List)

  meanDB2s[i] <- mean(result$dhatBlock2List)
  meanDSVT1[i] <- mean(result$dhatSvt1List)
  meanDSVT2[i] <- mean(result$dhatSvt2List)
  meanDSVT3[i] <- mean(result$dhatSvt3List)
}

x <- c(0.09, 0.095,0.1,0.105,0.11,0.115)
y <- rep(0,100)
# the results of Louvain and Walktrap are obtained separately, calculated from `LouvainAndRw.R`
meanAriLouvain <- c(0.094, 0.052, 0.033, 0.018, 0.013, 0.008)
meanAriWalktrap <- c(0.787, 0.633, 0.381, 0.203, 0.115, 0.070)
meanAriIRM <- c(0.132, 0.128, 0.125, 0.121, 0.116, 0.111)

mycol <- gg_color_hue(8)

xrange <- c(0.09,0.115)
yrange <- c(0,1)

plot(xrange, yrange, type = "n", xlab = "p", ylab = "ARI", col.axis = "black")
lines(x, meanAriSVT1, type = "l", lwd = 1.5, lty = 1, col = mycol[1]) # ZG1
lines(x, meanAriSVT2, type = "l", lwd = 1.5, lty = 1, col = mycol[2]) # ZG2
lines(x, meanAriSVT3, type = "l", lwd = 1.5, lty = 1, col = mycol[3]) # ZG3
lines(x, meanAriB2, type = "l", lwd = 1.5, lty = 1, col = mycol[4])   # SMS
lines(x, meanAriB2s, type = "l", lwd = 1.5, lty = 1, col = mycol[5])  # SMS_Reduced
lines(x, meanAriLouvain, type = "l", lwd = 1.5, lty = 1, col = mycol[6])
lines(x, meanAriWalktrap, type = "l", lwd = 1.5, lty = 1, col = mycol[7])
lines(x, meanAriIRM, type = "l", lwd = 1.5, lty = 1, col = mycol[8])
legend(0.108, 0.6, col = mycol,
       legend = c("ZG1", "ZG2", "ZG3", "SMS", "SMS_Reduced", "Louvain", "Walktrap", "IRM"),
       lwd = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5), cex = 0.5, y.intersp=1.5)
box()

dev.print(pdf, "simulation_all_methods.pdf", width=5, height=4)

## quartz_off_screen 
##                 2

##############################################
# plot figure 11(a)
##############################################

op <- par(mar = c(5,5,4,2) + 0.1) ## default is c(5,4,4,2) + 0.1

xrange <- c(0.09,0.115)
yrange <- c(0.8,1)
plot(xrange, yrange, type = "n", xlab = "p", ylab = "mean(ARI)", col.axis = "black")
x <- c(0.09, 0.095,0.1,0.105,0.11,0.115)
y <- rep(0,100)

lines(x, meanAriSVT1, type = "l", lwd = 3, lty = 1, col = mycol[1])
lines(x, meanAriSVT2, type = "l", lwd = 3, lty = 1, col = mycol[2])
lines(x, meanAriSVT3, type = "l", lwd = 3, lty = 1, col = mycol[3])
lines(x, meanAriB2s, type = "l", lwd = 4.5, lty = 1, col = mycol[5])

legend("topright", col = mycol[c(1:3,5)],#c(1,4,2,6),
       legend = c("ZG1", "ZG2", "ZG3", "SMS_Reduced"),
       lwd = c(1.5, 1.5, 1.5, 1.5), cex = 0.7)

dev.print(pdf, "simulation3_arimean.pdf")

## quartz_off_screen 
##                 2

##############################################
# plot figure 11(b)
##############################################

xrange <- c(0.09,0.115)
yrange <- c(-1.5,4)
plot(xrange, yrange, type = "n", xlab = "p", ylab = expression(paste("mean(", hat(d), " - ", d, ")")), col.axis = "black")
x <- c(0.09, 0.095,0.1,0.105,0.11,0.115)
y <- rep(0,100)

lines(x, meanDSVT1-2, type = "l", lwd = 3, lty = 1, col = mycol[1])
lines(x, meanDSVT2-2, type = "l", lwd = 3, lty = 1, col = mycol[2])
lines(x, meanDSVT3-2, type = "l", lwd = 3, lty = 1, col = mycol[3])
lines(x, meanDB2s-2, type = "l", lwd = 4.5, lty = 1, col = mycol[5])
lines(x, rep(0,6), type = "l", lwd = 1.5, lty = 2, col = 1)

# legend("topright", col = mycol[c(1:3,5)],#c(1,4,2,6),
#        legend = c("ZG1", "ZG2", "ZG3", "SMS_Reduced"),
#        lwd = c(1.5, 1.5, 1.5, 1.5), cex = 0.7)
dev.print(pdf, "simulation3_Dmean.pdf")

## quartz_off_screen 
##                 2

par(op)

simulation2.R

parky

2020-05-15