# # OOS-embedding with weighted raw-stress # # This code uses some of the same input/output data massaging structures as the Smacof R package # De Leeuw, Jan, and Patrick Mair. "Multidimensional scaling using majorization: SMACOF in R." Department of Statistics, UCLA (2011). # Smacof package available at https://CRAN.R-project.org/package=smacof oosfJOFC <- function(D, X, w, verbose = TRUE, itmax = 1000, eps = 1e-6) { ## Input: ## D : list of m dissimilarities each of size n+1 (n in sample, 1 oos) ## X : list of m within-sample embeddings, each pf dimension n x d ## w : weighting in fJOFC for oos commensurability ## N= n*m where n is the number of (in-sample) objects n <- nrow(X[[1]]) m <-length(X) N <-n*m d <- ncol(X[[1]]) ## NN is the total number of objects * m NN<- N+m nn <- n+1 ## Normalize D, # make the delta's into diss objects, same subroutine used in SmacofSym from the Smacof package diss<-D for(i in 1:m){ if ((is.matrix(diss[[i]])) || (is.data.frame(diss[[i]]))) { dx<-as.vector(diss[[i]][lower.tri(diss[[i]])]) diss[[i]] <- structure(dx, Size = n+1, call = quote(as.dist.default(m=b)), class = "dist", Diag = FALSE, Upper = FALSE) attr(diss[[i]], "Labels") <- c((nn*(i-1)+1):(nn*i)) } } wgthsd<-as.dist(matrix(1,nn,nn)-diag(nn)) dhat<-list() for(i in 1:m){ dhat[[i]] <- nDiss(diss[[i]], wgthsd)*sqrt(choose(NN,2)/(m*choose(nn,2))) #normalize each dissimilarity to nvert(nvert-1)/2 } del<-list() for(i in 1:m){dd<-as.matrix(dhat[[i]]) del[[i]]<-dd[1:n,n+1]} ## randomly initialize y the oos embedding y<-list() for(i in 1:m){ y[[i]] <- t(matrix(mvrnorm(1, c(rep(0,d)), diag(apply(X[[i]],2,var),d,d)))) } ## compute baseline stress oosdist <- list() for(i in 1:m){ oosdist[[i]] <- rect.dist.sqrt(X[[i]],y[[i]]) } stress <-sum( Reduce("+",sapply( mapply("-",oosdist,del,SIMPLIFY = FALSE) , function(x) x^2,simplify=FALSE ) ) ) for(i in 2:m){for(j in 1:(i-1)){ stress<-stress+w*vec_sq(y[[i]]-y[[j]])}} stress<-stress/(n*m+choose(m,2)) itel<-0 #iteration number if (verbose) cat("Iteration: ",formatC(itel,width=3, format="d"), " Stress (normalized):", formatC(c(stress),digits=8,width=12,format="f"),"\n") itel<- 1 # iterative majorization repeat{ sold<-stress xi<-list() for(i in 1:m){ xi[[i]]<- t( -del[[i]]/oosdist[[i]]+c(rep(1,n)) )%*%X[[i]] } phi<-list() for(i in 1:m){ phi[[i]]<- sum( del[[i]]/oosdist[[i]]) } z<-list() for(i in 1:m){z[[i]]<-1/(n+m*w)*xi[[i]]+w/(n*(n+m*w))*Reduce("+",xi) z[[i]]<-z[[i]]+(phi[[i]])/(n+m*w)*y[[i]]+w/(n*(n+m*w))*Reduce("+",mapply("*",phi,y,SIMPLIFY = FALSE)) } # recompute stress oosdist <- list() for(i in 1:m){ oosdist[[i]] <- rect.dist.sqrt(X[[i]],z[[i]]) } stress <-sum( Reduce("+",sapply( mapply("-",oosdist,del,SIMPLIFY = FALSE) , function(x) x^2,simplify=FALSE ) ) ) for(i in 2:m){for(j in 1:(i-1)){ stress<-stress+w*vec_sq(z[[i]]-z[[j]])}} stress<-stress/(n*m+choose(m,2)) #print out intermediate stress if (verbose) cat("Iteration: ",formatC(itel,width=3, format="d"), " Stress (normalized):", formatC(c(stress),digits=8,width=12,format="f"), " Difference: ", formatC(sold-stress,digits=8,width=12,format="f"),"\n") if (((sold-stress)