+ Show code- Hide code
library(OpasnetUtils)
ReplicaX <- function(x,vartype,k=5,osigma=0.5)
{
y <- replica_discrete(x,vartype,k=5)
y_continuous <- replica_continuous(x[,vartype=="continuous"],osigma=osigma)
y[,vartype=="continuous"] <- y_continuous
return(y)
}
replica_discrete <- function(x,vartype,k=5)
{
y <- x
n <- nrow(x)
distmat <- rowwise_similarity(x,x,vartype=vartype)
neighbors <- t(apply(distmat,2,FUN=which.maxk,k=k+1)[-1,])
for(i in 1:n)
{
y[i,] <- new_obs2(x[neighbors[i,],],vartype=vartype)
}
return(y)
}
new_obs2 <- function(xx,vartype)
{
xnew <- xx[1,]
k <- nrow(xx)
for(j in 1:ncol(xnew))
{
if(vartype[j]=="continuous")
{
finiteind <- is.finite(xx[,j])
kfinite <- sum(finiteind)
if(kfinite>1)
{
xnew[j] <- sum(rdirichlet(rep(1,kfinite))*xx[finiteind,j])
} else {
xnew[j] <- NA
}
} else {
ind <- as.logical(rmultinom(1,1,prob=rep(1/k,k)))
xnew[j] <- xx[ind,j]
}
}
return(xnew)
}
which.maxk <- function(x,k)
{
return(order(x,decreasing=T)[1:k])
}
observation_similarity <- function(v1,d2,vartype,variance)
{
#v1 one row dataframe, d2 dataframe
p <- ncol(v1)
n <- nrow(d2)
score <- rep(0,n)
for(j in 1:p)
{
if(vartype[j]=="continuous")
{
score <- score + pmax(0,1-(v1[,j]-d2[,j])^2/variance[j],na.rm=T)
} else {
score <- score + as.numeric(as.character(d2[,j])==as.character(v1[,j]))
}
}
return(score)
}
navar <- function(x)
{
p <- ncol(x)
v <- rep(NA,p)
for(j in 1:p)
{
if(is.numeric(x[,j])) v[j] <- var(x[,j])
}
return(v)
}
rowwise_similarity <- function(x,y,vartype)
{
n <- nrow(x)
similmat <- matrix(NA,nrow=n,ncol=nrow(y))
variance <- navar(y)
for(j in 1:n)
{
similmat[j,] <- observation_similarity(x[j,],y,vartype=vartype,variance=variance)
}
return(similmat)
}
kernelCDF <- function(x,n=512,cut=3)
{
bw <- density(x,give.Rkern=T,bw="SJ")
xp <- seq(min(x)-cut*bw,max(x)+cut*bw,length.out=n)
A <- outer(xp,x,pnorm,sd=bw)/length(x)
cdf <- rowSums(A)
return(data.frame(x=xp,cdf=cdf))
}
replica_continuous <- function(x,osigma,umin=0.00000001,umax=0.99999999,kernelweight=0.9)
{
n <- dim(x)[1]
p <- dim(x)[2]
if(length(osigma)==1) osigma<-rep(osigma,p)
u <- matrix(NA,nrow=n,ncol=p)
uparam <- matrix(NA,nrow=n,ncol=p)
ukernel <- matrix(NA,nrow=n,ncol=p)
y <- x
yparam <- x
ykernel <- x
param <- list()
bestpdf <- list()
kcdf <- list()
ind <- 1:n
#Modeling of marginal distributions
for(j in 1:p)
{
xf <- na.omit(x[,j])
ind_finite <- is.finite(x[,j])
if(kernelweight>0 & kernelweight<1)
{
param[[j]] <- data2normpoly4(xf)
uparam[ind_finite,j] <- pnormpoly(x[ind_finite,j],param[[j]])
kcdf[[j]] <- kernelCDF(xf)
pspl <- splinefun(kcdf[[j]]$x,kcdf[[j]]$cdf,method="monoH.FC")
ukernel[ind_finite,j] <- pspl(x[ind_finite,j])
u[ind_finite,j] <- (1-kernelweight)*uparam[ind_finite,j] + kernelweight*ukernel[ind_finite,j]
u[!ind_finite,j] <- NA
}
if(kernelweight==1)
{
kcdf[[j]] <- kernelCDF(xf)
ukernel[ind_finite,j] <- kcdf[[j]]$cdf[ind_finite]
u[ind_finite,j] <- ukernel[ind_finite,j]
u[!ind_finite,j] <- NA
}
if(kernelweight==0)
{
param[[j]] <- data2normpoly4(xf)
uparam[ind_finite,j] <- pnormpoly(x[ind_finite,j],param[[j]])[ind]
u[ind_finite,j] <- uparam[ind_finite,j]
u[!ind_finite,j] <- NA
}
u[ind_finite,j] <- pmin(umax,pmax(u[ind_finite,j],umin))
}
#Generation of correlated noise in Gaussian space
unorm <- qnorm(u)
if(is.null(osigma))
{
dr <- dist(unorm[sample(1:n,1000),])
osigma <- quantile(as.vector(dr),0.001)
}
normcor <- cor(unorm,use="complete.obs")
epsilon <- mvrnorm(n,mu=rep(0,p),Sigma=diag(osigma)%*%normcor%*%diag(osigma))
#Generation of new observations
for(j in 1:p)
{
noisyu <- unorm[,j]+epsilon[,j]
ind_finite <- is.finite(x[,j])
noisyu[ind_finite] <- noisyu[ind_finite]/sd(noisyu[ind_finite])
if(kernelweight>0 & kernelweight<1)
{
yparam[ind_finite,j] <- qnormpoly(pmin(umax,pmax(umin,pnorm(noisyu[ind_finite]))),
param=param[[j]])
qspl <- splinefun(kcdf[[j]]$cdf,kcdf[[j]]$x,method="monoH.FC")
ykernel[ind_finite,j] <- qspl(pnorm(noisyu[ind_finite]))
y[ind_finite,j] <- (1-kernelweight)*yparam[ind_finite,j] + kernelweight*ykernel[ind_finite,j]
}
if(kernelweight==0)
{
yparam[ind_finite,j] <- qnormpoly(pmin(umax,pmax(umin,pnorm(noisyu[ind_finite]))),
param=param[[j]])
y[ind_finite,j] <- yparam[ind_finite,j]
}
if(kernelweight==1)
{
qspl <- splinefun(kcdf[[j]]$cdf,kcdf[[j]]$x,method="monoH.FC")
ykernel[ind_finite,j] <- qspl(pnorm(noisyu[ind_finite]))
y[ind_finite,j] <- ykernel[ind_finite,j]
}
}
names(y) <- names(x)
return(y)
}
objects.store(
ReplicaX,
replica_discrete,
new_obs2,
which.maxk,
observation_similarity,
navar,
rowwise_similarity,
kernelCDF,
replica_continuous
)
cat("Saved objects ReplicaX, replica_discrete, new_obs2, which.maxk, observation_similarity, navar, rowwise_similarity, kernelCDF,
replica_continuous. Remember to load packages Lmoments, bayesm, MASS, plyr, and compiler before using ReplicaX.\n")
| |