Opasnet Base Connection for R: Difference between revisions
Jump to navigation
Jump to search
m (→Functions) |
No edit summary |
||
Line 100: | Line 100: | ||
==Uploading data== | ==Uploading data== | ||
Slight problems with probabilistic data, sorting orders do not match between read and write. Very efficient. Probabilistic data supported. Appending to existing data not supported, a whole new set will always be produced. Varying numbers of iterations between cells not supported. | |||
=== | ===Functions=== | ||
op_baseWrite <- function(db, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { | |||
#Coerce input into a data frame if it isn't one already; get rid of empty cells | |||
if (is.array(input)) dataframe <- as.data.frame(as.table(input)) else dataframe <- input | |||
rescol <- colnames(dataframe) == "Freq" | |||
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else { | |||
rescol <- colnames(dataframe) == "Result" | |||
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else { | |||
rescol <- colnames(dataframe) == "result" | |||
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result" | |||
} | |||
} | |||
values <- dataframe[,rescol] | |||
cond <- (is.na(values) == FALSE) | cond <- (is.na(values) == FALSE) | ||
dataframe <- dataframe[cond,] | |||
dataframe <- | |||
#Add page to database (if it doesn't already exist) | #Add page to database (if it doesn't already exist) | ||
if (is.null(ident)==TRUE) if (interactive()) ident <- | if (is.null(ident)==TRUE) if (interactive()) ident <- readline(paste("What is the identifier of this object?", | ||
"\n", sep = "")) else stop("indentifier of object no given") | |||
wiki_name <- substring(ident, 1, 5) | wiki_name <- substring(ident, 1, 5) | ||
if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} | if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") { | ||
obj_id <- | wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} | ||
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]</nowiki> | |||
if (is.na(obj_id)==TRUE) { | if (is.na(obj_id)==TRUE) { | ||
if (is.null(name)==TRUE) if (interactive()) name <- | if (is.null(name)==TRUE) if (interactive()) name <- readline(paste("What is the name of this object?", | ||
if (is.null(unit)==TRUE) if (interactive()) unit <- | "\n", sep = "")) else stop("object name not given") | ||
if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- | if (is.null(unit)==TRUE) if (interactive()) unit <- readline(paste("What is the unit of this object?", | ||
sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = '')) | "\n", sep = "")) else stop("unit not given") | ||
obj_id <- | if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- readline(paste("What type of object is", | ||
" this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype", | |||
" FROM objtype")[,1], sep = " - "), collapse = ", "), "\n", collapse = " ")) else { | |||
stop("object type not given")} | |||
sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, | |||
<nowiki>name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))</nowiki> | |||
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]</nowiki> | |||
} | } | ||
#Write act and actobj | #Write act and actobj | ||
if (is.null(who)==TRUE) if (interactive()) who <- | if (is.null(who)==TRUE) if (interactive()) {who <- readline(paste("What is the name of the uploader?", "\n", sep = "")) | ||
sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = '')) | } else stop("uploader name not given") | ||
<nowiki>sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = ''))</nowiki> | |||
act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] | act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] | ||
sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = '')) | sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), | ||
actobj_id <- | <nowiki>')', sep = ''))</nowiki> | ||
<nowiki>actobj_id <- sqlQuery(db, paste('SELECT id FROM actobj WHERE act_id = ', act_id, sep = ''))[1,1]</nowiki> | |||
#Write indexes | |||
#Write indexes | |||
ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE]) | |||
ColNames <- ColNames[(ColNames == "obs") == FALSE] | |||
ColNames <- ColNames[(ColNames == "Result") == FALSE] | |||
ColNames <- ColNames[(ColNames == "result") == FALSE] | |||
for (i in 1: | ColNames <- ColNames[(ColNames == "Freq") == FALSE] | ||
<nowiki> | nind <- length(ColNames) | ||
<nowiki> | DimNames <- rep(vector("list", 1), nind) | ||
names(DimNames) <- ColNames | |||
indlengths <- 0 | |||
for (i in 1:nind) { | |||
<nowiki>DimNames[[i]] <- levels(factor(dataframe[, ColNames[i]]))</nowiki> | |||
<nowiki>indlengths[i] <- length(DimNames[[i]])</nowiki> | |||
} | |||
indQuery <- NA | |||
indQuery[1:nind*5 - 4] <- gsub(" ", "_", ColNames) | |||
indQuery[1:nind*5 - 3] <- '","' | |||
indQuery[1:nind*5 - 2] <- ColNames | |||
indQuery[1:nind*5 - 1] <- '",6' | |||
<nowiki> | indQuery[1:(nind - 1)*5] <- '),("' | ||
sqlQuery(db, paste('INSERT IGNORE INTO obj (ident, name, objtype_id) VALUES ("', paste(indQuery, collapse = ''), ')', | |||
<nowiki>sep = ''))</nowiki> | |||
IndIds <- sqlQuery(db, paste('SELECT id, ident FROM obj WHERE ident IN("', paste(gsub(" ", "_", ColNames), | |||
<nowiki>collapse = '","'), '")', sep = ''))</nowiki> | |||
DimIds <- DimNames | |||
DimN <- 1:nind | |||
names(DimN) <- gsub(" ", "_", ColNames) | |||
for (i in 1:nrow(IndIds)) { | |||
names(DimIds)[DimN[IndIds[i,2]]] <- IndIds[grep(gsub(" ", "_", ColNames[i]), IndIds$ident), 1] | |||
} | |||
#Write locations | |||
y <- 1 | |||
LocNames <- as.data.frame(matrix(rep(NA, 2*sum(indlengths)), sum(indlengths), 2)) | |||
for (i in 1:nind) { | |||
LocNames[y:(y + indlengths[i] - 1),1:2] <- matrix(c(rep(ColNames[i], indlengths[i]), | |||
levels(factor(dataframe[,ColNames[i]]))), indlengths[i], 2) | |||
y <- y + indlengths[i] | |||
} | |||
LocNames[,1] <- names(DimIds)[DimN[LocNames[,1]]] | |||
nloc <- nrow(LocNames) | |||
locQuery <- NA | |||
locQuery[1:nloc*4 - 3] <- LocNames[,1] | |||
locQuery[1:nloc*4 - 2] <- ',"' | |||
locQuery[1:nloc*4 - 1] <- LocNames[,2] | |||
locQuery[1:(nloc - 1)*4] <- '"),(' | |||
sqlQuery(db, paste('INSERT IGNORE INTO loc (obj_id_i, location) VALUES (', paste(locQuery, collapse = ''), '")', | |||
<nowiki>sep = ''))</nowiki> | |||
LocIds <- sqlQuery(db, paste('SELECT id, obj_id_i, location FROM loc WHERE location IN("', paste(LocNames[,2], | |||
<nowiki>collapse = '","'), '") AND obj_id_i IN(', paste(names(DimIds), collapse = ','), ')', sep = ''))</nowiki> | |||
LocMap <- NA | |||
y <- 1 | |||
for (i in 1:nind) { | |||
LocMap <- LocIds[grep(names(DimIds)[i], LocIds$obj_id_i), 1] | |||
names(LocMap) <- LocIds[grep(names(DimIds)[i], LocIds$obj_id_i), 3] | |||
<nowiki>DimIds[[i]] <- LocMap[DimNames[[i]]]</nowiki> | |||
} | } | ||
# Changing location names in table into ids | #Changing location names in table into ids | ||
for (i in 1: | for (i in 1:nind) { | ||
dataframe[,i] <- factor(dataframe[,i]) | dataframe[,ColNames[i]] <- factor(dataframe[,ColNames[i]]) | ||
<nowiki>levels(dataframe[,i]) <- DimIds[[i]]</nowiki> | <nowiki>levels(dataframe[,ColNames[i]]) <- DimIds[[i]]</nowiki> | ||
} | } | ||
maxrows <- | #A hidden parameter for adjusting query packet sizes, the higher the faster, though crash becomes likelier | ||
maxrows <- 50000 | |||
#Writing cell | #Writing cell | ||
n <- 1 | obscol <- colnames(dataframe) == "obs" | ||
if (length(obscol[obscol==TRUE]) >= 1) { | |||
obscol <- TRUE | |||
n <- length(levels(factor(dataframe[,"obs"])))} else { | |||
obscol <- FALSE | |||
n <- 1 | |||
} | |||
ncell <- nrow(dataframe)/n | |||
cellQuery <- NA | cellQuery <- NA | ||
cellQuery[1: | cellQuery[1:ncell*6-5] <- actobj_id | ||
cellQuery[1: | cellQuery[1:ncell*6-4] <- "," | ||
cellQuery[1: | if (n == 1) { | ||
cellQuery[1:ncell*6-3] <- dataframe[,rescol]} else { | |||
cellQuery[1: | cellQuery[1:ncell*6-3] <- apply(matrix(dataframe[,rescol], n, ncell), 2, mean) | ||
cellQuery[1:( | } | ||
if ( | cellQuery[1:ncell*6-2] <- "," | ||
for (i in 1:( | cellQuery[1:ncell*6-1] <- n | ||
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = '')) | cellQuery[1:(ncell-1)*6] <- "),(" | ||
if (ncell>=maxrows) { | |||
for (i in 1:(ncell%/%maxrows)) { | |||
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[ | |||
<nowiki>((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = ''))</nowiki> | |||
} | } | ||
} | } | ||
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[( | sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(ncell%/% | ||
<nowiki>maxrows*6*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = ''))</nowiki> | |||
#Writing res | #Writing res | ||
cell_id <- | cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID", sep = ""))[,1] | ||
if (ncell != length(cell_id)) stop("Number of cells written does not match with expected value") | |||
resQuery <- NA | resQuery <- NA | ||
resQuery[1: | resQuery[1:nrow(dataframe)*6-5] <- rep(cell_id, n) | ||
resQuery[1: | resQuery[1:nrow(dataframe)*6-4] <- "," | ||
resQuery[1: | if (n==1) resQuery[1:nrow(dataframe)*6-3] <- "0" else resQuery[1:nrow(dataframe)*6-3] <- dataframe[,"obs"] | ||
resQuery[1:( | resQuery[1:nrow(dataframe)*6-2] <- "," | ||
if ( | resQuery[1:nrow(dataframe)*6-1] <- dataframe[,rescol] | ||
for (i in 1:( | resQuery[1:(nrow(dataframe)-1)*6] <- "),(" | ||
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ''), ')', sep = '')) | if (nrow(dataframe)>=maxrows) { | ||
for (i in 1:(nrow(dataframe)%/%maxrows)) { | |||
sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[((i-1) | |||
<nowiki>*4*maxrows+1):(i*4*maxrows-1)], collapse = ''), ')', sep = ''))</nowiki> | |||
} | } | ||
} | } | ||
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[( | sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[(nrow(dataframe)%/%maxrows | ||
<nowiki>*4*maxrows+1):length(resQuery)], collapse = ''), ')', sep = ''))</nowiki> | |||
#Writing loccell | #Writing loccell | ||
locidmatrix <- rep(NA, | locidmatrix <- rep(NA, nind*ncell) | ||
dim(locidmatrix) <- c( | dim(locidmatrix) <- c(nind,ncell) | ||
for (i in 1: | for (i in 1:nind) { | ||
locidmatrix[i,] <- as.character(dataframe[,i]) | locidmatrix[i,] <- as.character(dataframe[1:ncell*n - n + 1,ColNames[i]]) | ||
} | } | ||
loccellQuery <- rep(NA, 4* | loccellQuery <- rep(NA, 4*nind*ncell) | ||
dim(loccellQuery) <- c(4* | dim(loccellQuery) <- c(4*nind, ncell) | ||
loccellQuery[1: | loccellQuery[1:nind*4-3, 1:ncell] <- rep(cell_id, each = nind) | ||
loccellQuery[1: | loccellQuery[1:nind*4-2, 1:ncell] <- "," | ||
loccellQuery[1: | loccellQuery[1:nind*4-1, 1:ncell] <- locidmatrix | ||
loccellQuery[1: | loccellQuery[1:nind*4, 1:ncell] <- "),(" | ||
loccellQuery[ | loccellQuery[nind*4, ncell] <- "" | ||
if ( | if (ncell>=maxrows) { | ||
for (i in 1:( | for (i in 1:(ncell%/%maxrows)) { | ||
loccellQuery[ | loccellQuery[nind*4, i*maxrows] <- "" | ||
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = '')) | sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[, | ||
<nowiki>((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = ''))</nowiki> | |||
} | } | ||
} | } | ||
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,( | sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(ncell%/% | ||
<nowiki>maxrows*maxrows+1):ncell], collapse = ''), ')', sep = ''))</nowiki> | |||
} | } | ||
====Usage==== | ====Usage==== | ||
op_baseWrite(db, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) | |||
Db and | *Db and input must be defined, the rest if not defined is prompted for by the function. | ||
*Input may be given in either array or data.frame form. | |||
**Input in data.frame form must contain a "Freq", "Result" or "result" column, this is where the numerical value is read. | |||
**Probabilistic input must contain a dimension (in case of array) or a column (in case of data.frame) named "obs". Also: all cells must have values for each "obs" value. |
Revision as of 11:12, 6 October 2010
Moderator:Teemu R (see all) |
|
Upload data
|
Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R; this defines the functions, after which they can be called for in R.
Package dependencies
These packages are required for most of the code to work. Load these first (copy-pasteable).
library(utils) library(RODBC)
Setup
Establishes the connection to the database. Copy paste first.
db <- odbcConnect("opasnet_base")
- Assuming "opasnet_base" is a correctly defined DSN.
- Note that uploading requires writing permissions.
Downloading data
Functions
- Now simplified, a lot faster. Copy paste second.
op_baseGetData <- function(ident) { obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1] series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id, ' ORDER BY series_id DESC LIMIT 1'))[1,1] Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.name AS ind, loc.location AS loc, res.result', ' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =', ' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id', ' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ', series_id, ' ORDER BY cell.id, res.obs, obj.name', sep = '')) nind <- length(levels(Data[,3])) ncell <- nrow(Data)/nind dataframe <- data.frame(NA) dataframe <- Data[1:ncell*nind, c(1,2)] for (i in 1:nind) { dataframe[,2 + i] <- Data[1:ncell*nind - i + 1, 4] colnames(dataframe)[2 + i] <- as.character(Data[nind - i + 1, 3]) } dataframe[,3 + nind] <- Data[1:ncell*nind, 5] colnames(dataframe)[3 + nind] <- "Result" rownames(dataframe) <- 1:ncell dataframe }
Usage
variable <- op_baseGetData("page identifier")
- Result will be in a table format with columns: id, obs, ind1, ind2 ... indn, Result.
Manipulating data
Functions
DataframeToArray <- function(dataframe) { DimNames <- rep(vector("list", 1), ncol(dataframe) - 2) names(DimNames) <- c(colnames(dataframe)[2:(ncol(dataframe) - 1)]) nind <- length(names(DimNames)) indlengths <- 0 for (i in 1:nind) { DimNames[[i]] <- levels(factor(dataframe[,1 + i])) indlengths[i] <- length(DimNames[[i]]) } array <- rep(NA, prod(indlengths)) dim(array) <- indlengths dimnames(array) <- DimNames array[as.matrix(dataframe[,c(1:nind + 1)])] <- dataframe[,ncol(dataframe)] array }
Usage
variable2 <- DataframeToArray(variable1)
- variable 1 must be in the same format as the result when downloading.
- first column (id) is ignored
- variable1 <- function(variable1) is allowed
Other useful stuff
as.data.frame(as.table(array)) dataframe[is.na(dataframe[,ncol(dataframe)])==FALSE,]
- First line returns an array from a data frame.
- Second line returns rows without empty values.
dataframe[grep("location", dataframe$index, ignore.case = TRUE),]
- Returns all rows of a data frame where column "index" value is "location".
Uploading data
Slight problems with probabilistic data, sorting orders do not match between read and write. Very efficient. Probabilistic data supported. Appending to existing data not supported, a whole new set will always be produced. Varying numbers of iterations between cells not supported.
Functions
op_baseWrite <- function(db, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { #Coerce input into a data frame if it isn't one already; get rid of empty cells if (is.array(input)) dataframe <- as.data.frame(as.table(input)) else dataframe <- input rescol <- colnames(dataframe) == "Freq" if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else { rescol <- colnames(dataframe) == "Result" if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else { rescol <- colnames(dataframe) == "result" if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result" } } values <- dataframe[,rescol] cond <- (is.na(values) == FALSE) dataframe <- dataframe[cond,] #Add page to database (if it doesn't already exist) if (is.null(ident)==TRUE) if (interactive()) ident <- readline(paste("What is the identifier of this object?", "\n", sep = "")) else stop("indentifier of object no given") wiki_name <- substring(ident, 1, 5) if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") { wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1] if (is.na(obj_id)==TRUE) { if (is.null(name)==TRUE) if (interactive()) name <- readline(paste("What is the name of this object?", "\n", sep = "")) else stop("object name not given") if (is.null(unit)==TRUE) if (interactive()) unit <- readline(paste("What is the unit of this object?", "\n", sep = "")) else stop("unit not given") if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- readline(paste("What type of object is", " this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype", " FROM objtype")[,1], sep = " - "), collapse = ", "), "\n", collapse = " ")) else { stop("object type not given")} sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = '')) obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1] } #Write act and actobj if (is.null(who)==TRUE) if (interactive()) {who <- readline(paste("What is the name of the uploader?", "\n", sep = "")) } else stop("uploader name not given") sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = '')) act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = '')) actobj_id <- sqlQuery(db, paste('SELECT id FROM actobj WHERE act_id = ', act_id, sep = ''))[1,1] #Write indexes ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE]) ColNames <- ColNames[(ColNames == "obs") == FALSE] ColNames <- ColNames[(ColNames == "Result") == FALSE] ColNames <- ColNames[(ColNames == "result") == FALSE] ColNames <- ColNames[(ColNames == "Freq") == FALSE] nind <- length(ColNames) DimNames <- rep(vector("list", 1), nind) names(DimNames) <- ColNames indlengths <- 0 for (i in 1:nind) { DimNames[[i]] <- levels(factor(dataframe[, ColNames[i]])) indlengths[i] <- length(DimNames[[i]]) } indQuery <- NA indQuery[1:nind*5 - 4] <- gsub(" ", "_", ColNames) indQuery[1:nind*5 - 3] <- '","' indQuery[1:nind*5 - 2] <- ColNames indQuery[1:nind*5 - 1] <- '",6' indQuery[1:(nind - 1)*5] <- '),("' sqlQuery(db, paste('INSERT IGNORE INTO obj (ident, name, objtype_id) VALUES ("', paste(indQuery, collapse = ), ')', sep = '')) IndIds <- sqlQuery(db, paste('SELECT id, ident FROM obj WHERE ident IN("', paste(gsub(" ", "_", ColNames), collapse = '","'), '")', sep = '')) DimIds <- DimNames DimN <- 1:nind names(DimN) <- gsub(" ", "_", ColNames) for (i in 1:nrow(IndIds)) { names(DimIds)[DimN[IndIds[i,2]]] <- IndIds[grep(gsub(" ", "_", ColNames[i]), IndIds$ident), 1] } #Write locations y <- 1 LocNames <- as.data.frame(matrix(rep(NA, 2*sum(indlengths)), sum(indlengths), 2)) for (i in 1:nind) { LocNames[y:(y + indlengths[i] - 1),1:2] <- matrix(c(rep(ColNames[i], indlengths[i]), levels(factor(dataframe[,ColNames[i]]))), indlengths[i], 2) y <- y + indlengths[i] } LocNames[,1] <- names(DimIds)[DimN[LocNames[,1]]] nloc <- nrow(LocNames) locQuery <- NA locQuery[1:nloc*4 - 3] <- LocNames[,1] locQuery[1:nloc*4 - 2] <- ',"' locQuery[1:nloc*4 - 1] <- LocNames[,2] locQuery[1:(nloc - 1)*4] <- '"),(' sqlQuery(db, paste('INSERT IGNORE INTO loc (obj_id_i, location) VALUES (', paste(locQuery, collapse = ), '")', sep = '')) LocIds <- sqlQuery(db, paste('SELECT id, obj_id_i, location FROM loc WHERE location IN("', paste(LocNames[,2], collapse = '","'), '") AND obj_id_i IN(', paste(names(DimIds), collapse = ','), ')', sep = '')) LocMap <- NA y <- 1 for (i in 1:nind) { LocMap <- LocIds[grep(names(DimIds)[i], LocIds$obj_id_i), 1] names(LocMap) <- LocIds[grep(names(DimIds)[i], LocIds$obj_id_i), 3] DimIds[[i]] <- LocMap[DimNames[[i]]] } #Changing location names in table into ids for (i in 1:nind) { dataframe[,ColNames[i]] <- factor(dataframe[,ColNames[i]]) levels(dataframe[,ColNames[i]]) <- DimIds[[i]] } #A hidden parameter for adjusting query packet sizes, the higher the faster, though crash becomes likelier maxrows <- 50000 #Writing cell obscol <- colnames(dataframe) == "obs" if (length(obscol[obscol==TRUE]) >= 1) { obscol <- TRUE n <- length(levels(factor(dataframe[,"obs"])))} else { obscol <- FALSE n <- 1 } ncell <- nrow(dataframe)/n cellQuery <- NA cellQuery[1:ncell*6-5] <- actobj_id cellQuery[1:ncell*6-4] <- "," if (n == 1) { cellQuery[1:ncell*6-3] <- dataframe[,rescol]} else { cellQuery[1:ncell*6-3] <- apply(matrix(dataframe[,rescol], n, ncell), 2, mean) } cellQuery[1:ncell*6-2] <- "," cellQuery[1:ncell*6-1] <- n cellQuery[1:(ncell-1)*6] <- "),(" if (ncell>=maxrows) { for (i in 1:(ncell%/%maxrows)) { sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[ ((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = '')) } } sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(ncell%/% maxrows*6*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = '')) #Writing res cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID", sep = ""))[,1] if (ncell != length(cell_id)) stop("Number of cells written does not match with expected value") resQuery <- NA resQuery[1:nrow(dataframe)*6-5] <- rep(cell_id, n) resQuery[1:nrow(dataframe)*6-4] <- "," if (n==1) resQuery[1:nrow(dataframe)*6-3] <- "0" else resQuery[1:nrow(dataframe)*6-3] <- dataframe[,"obs"] resQuery[1:nrow(dataframe)*6-2] <- "," resQuery[1:nrow(dataframe)*6-1] <- dataframe[,rescol] resQuery[1:(nrow(dataframe)-1)*6] <- "),(" if (nrow(dataframe)>=maxrows) { for (i in 1:(nrow(dataframe)%/%maxrows)) { sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[((i-1) *4*maxrows+1):(i*4*maxrows-1)], collapse = ''), ')', sep = '')) } } sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[(nrow(dataframe)%/%maxrows *4*maxrows+1):length(resQuery)], collapse = ''), ')', sep = '')) #Writing loccell locidmatrix <- rep(NA, nind*ncell) dim(locidmatrix) <- c(nind,ncell) for (i in 1:nind) { locidmatrix[i,] <- as.character(dataframe[1:ncell*n - n + 1,ColNames[i]]) } loccellQuery <- rep(NA, 4*nind*ncell) dim(loccellQuery) <- c(4*nind, ncell) loccellQuery[1:nind*4-3, 1:ncell] <- rep(cell_id, each = nind) loccellQuery[1:nind*4-2, 1:ncell] <- "," loccellQuery[1:nind*4-1, 1:ncell] <- locidmatrix loccellQuery[1:nind*4, 1:ncell] <- "),(" loccellQuery[nind*4, ncell] <- "" if (ncell>=maxrows) { for (i in 1:(ncell%/%maxrows)) { loccellQuery[nind*4, i*maxrows] <- "" sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[, ((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = '')) } } sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(ncell%/% maxrows*maxrows+1):ncell], collapse = ''), ')', sep = '')) }
Usage
op_baseWrite(db, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL)
- Db and input must be defined, the rest if not defined is prompted for by the function.
- Input may be given in either array or data.frame form.
- Input in data.frame form must contain a "Freq", "Result" or "result" column, this is where the numerical value is read.
- Probabilistic input must contain a dimension (in case of array) or a column (in case of data.frame) named "obs". Also: all cells must have values for each "obs" value.