#' --- #' title: Experiment with Wikipedia Data #' output: #' html_document: #' toc: true #' css: ~/RFolder/pandoc.css #' highlight: pygments #' --- load("Wikidata.RData") # list of 4 objects, are already dissimilarity matrices D<-list() for(i in 1:4){D[[i]]<-as.matrix(Wikidata[[i]])/sqrt(sum(as.matrix(Wikidata[[i]])^2))} # embed the dissimilarities t<-proc.time();smds<-fJOFC(10,D,10,verbose=FALSE);runtime1<-proc.time()-t library('ape') # hclust the points set.seed(12345) X<-smds$conf HC<-hclust(dist(X),method="ward.D") # compute the dendrogram merge height CD<-cophenetic(HC) CD<-as.matrix(CD) DMH<-rep(0,1382) for(i in 1:1382){ MHs<-c(CD[i,i+1382],CD[i,i+1382*2],CD[i,i+1382*3],CD[i+1382,i+1382*2],CD[i+1382,i+1382*3],CD[i+1382*2,i+1382*3]) DMH[i]<-max(MHs) } # figure 5, left panel hist(DMH,col='red',main=paste("Dendrogram Merge Height for the \n 1382 points"),labels = TRUE, ylim=c(0,1400),xlab=paste("Merge Height")) # figure 5, right panel DMH2<-DMH[DMH<50] hist(DMH2,col='red',main=paste("Dendrogram Merge Height for the \n 1226 points with DMH<50"),labels = TRUE, ylim=c(0,1100),xlab=paste("Merge Height")) # figure 6 h=(0:30)/10 ari<-rep(0,31) for(i in 1:31){L<-cutree(HC,h=h[i]); ari[i]<-adjustedRandIndex(L,rep(1:1382,4))} ari<-cbind(h,ari) plot(ari,type="o",col='red',main=paste("ARI of HClust versus ground truth"),ylab=paste("ARI"),xlab=paste("Cut Height")) # figure 7 # note that coloring the branches was manually done d <- cut(as.dendrogram(HC), h=25) par(cex=0.2, mar=c(5, 8, 4, 1)) plot(d$lower[[25]],yaxes=NULL) par(cex=1) title(xlab="", ylab="", main="") ## run JOFC for time conparison # make the weight matrix # n=1382 # w=10 # W1 <- matrix(1,n,n) - diag(n) # W2 <- diag(w,n) # W <- W2 # for(i in 1:3){W<-cbind(W,W2)} # WW<-W # for(i in 1:3){W<-rbind(W,WW)} # W<-W-diag(w,1382*4) # W <- W+as.matrix(bdiag(rep(list(W1),4))) # # # make the omnibus matrix # d<-list() # for(i in 1:4){ # d[[i]]<-D[[i]] # for(l in 1:3){d[[i]]<-cbind(d[[i]],D[[i]])} # } # O<-Reduce(rbind,d) # O<-O/2+t(O)/2 # # t<-proc.time();smds2 <- smacofSym(O,10,weightmat=W,verbose=FALSE);runtime2<-proc.time()-t