Opasnet Base Connection for R
Moderator:Teemu R (see all) |
|
Upload data
|
Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R; this defines the functions, after which they can be called in the R instance.
Package dependencies
These packages are required for most of the code to work. Load these first (copy-pasteable).
library(utils) library(RODBC)
Setup
Establishes the connection to the database. Copy paste first.
db <- odbcConnect("opasnet_base")
- Assuming "opasnet_base" is a correctly defined DSN.
- Note that uploading requires writing permissions.
Downloading data
- Now simplified, a lot faster. Copy paste second.
op_baseGetData <- function(ident) { obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=))[1,1] series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id, ' ORDER BY series_id DESC LIMIT 1'))[1,1] Indices <- sqlQuery(db, paste('SELECT DISTINCT obj.name AS ind, loc.location AS loc FROM', ' actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN loccell ON cell.id', ' = loccell.cell_id LEFT JOIN loc ON loccell.loc_id = loc.id LEFT JOIN obj ON loc.obj_id_i', ' = obj.id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ', series_id, ' ORDER BY cell.id, obj.id, loc.id', sep = )) DimNames <- rep(vector("list", 1), length(levels(Indices[,1])) + 1) names(DimNames) <- c(levels(Indices[,1]), "Obs") nind <- length(names(DimNames)) - 1 indlengths <- 0 for (i in 1:nind) { DimNames[[i]] <- Indices[grep(names(DimNames)[i], Indices$ind),2] indlengths[i] <- length(DimNames[[i]]) } Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.name AS ind, loc.location AS loc, res.result', ' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =', ' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id', ' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ', series_id, ' ORDER BY cell.id, res.obs, obj.name', sep = )) DimNames[[nind + 1]] <- as.numeric(levels(factor(Data[,2]))) indlengths[nind + 1] <- length(DimNames[[nind + 1]]) array <- rep(NA, prod(indlengths)) dim(array) <- indlengths dimnames(array) <- DimNames ncell <- nrow(Data)/nind array[matrix(c(matrix(Data[,4], ncell, nind, byrow = TRUE), Data[1:ncell*nind, 2]), ncell, nind + 1)] <- Data[1:ncell*nind,5] array }
Usage
variable <- op_baseGetData("page identifier")
Uploading data
Tested and works. Cell writing is efficient with even with huge data, index writing requires more work. Probabilistic data not supported.
Low level
library(utils) library(RODBC) dbSimpleRead <- function(db, table, column, filtercol, filter) { sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', filter, '"', sep = ))[1,1] } dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) { sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ))[1,1] } SimplePrompt <- function(question) { readline(paste(question, "\n", sep = "")) }
High level
op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { originaldataframe <- as.data.frame(as.table(array)) values <- originaldataframe[,"Freq"] cond <- (is.na(values) == FALSE) dataframe <- originaldataframe[cond,] #Add page to database (if it doesn't already exist) if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?") wiki_name <- substring(ident, 1, 5) if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) if (is.na(obj_id)==TRUE) { if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") else error if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") else error if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1], sep = " - "), collapse = ", "), collapse = " ")) else error sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = )) obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) } #Write act and actobj if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") else error sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = )) act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = )) actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id) #Write indexes and locations x <- 1 DimIds <- list() IndId <- 0 Ind <- 0 loc <- 0 LocId <- 0 for (i in 1:(ncol(dataframe) - x)) { DimIds[[i]] <- rep(NA, length(levels(factor(dataframe[,i])))) IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i]) if (is.na(IndId)==TRUE) { sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)[[2]][i], dimnames(dataframe)[[2]][i], sep = '","'), '",', 6, ')', sep = '')) IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i]) } for (j in 1:length(levels(factor(dataframe[,i])))) { #1:length(dimnames(array)[[i]])) { LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) if (is.na(LocId)==TRUE) { sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', levels(factor(dataframe[,i]))[j], '")', sep = '')) LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) } DimIds[[i]][j] <- LocId } names(DimIds)[i] <- IndId } # Changing location names in table into ids for (i in 1:(ncol(dataframe)-1)) { dataframe[,i] <- factor(dataframe[,i]) levels(dataframe[,i]) <- DimIds[[i]] } maxrows <- 5000 #Writing cell n <- 1 cellQuery <- NA cellQuery[1:nrow(dataframe)*6-5] <- actobj_id cellQuery[1:nrow(dataframe)*6-4] <- "," cellQuery[1:nrow(dataframe)*6-3] <- dataframe[,"Freq"] cellQuery[1:nrow(dataframe)*6-2] <- "," cellQuery[1:nrow(dataframe)*6-1] <- n cellQuery[1:(nrow(dataframe)-1)*6] <- "),(" if (nrow(dataframe)>=maxrows) { for (i in 1:(nrow(dataframe)%/%maxrows)) { sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(nrow(dataframe)%/%maxrows*6*maxrows+1):length(cellQuery)], collapse = ), ')', sep = )) #Writing res cell_id <- rev(sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID DESC LIMIT ", nrow(dataframe), sep = ""))[,1]) resQuery <- NA resQuery[1:length(cell_id)*4-3] <- cell_id resQuery[1:length(cell_id)*4-2] <- "," resQuery[1:length(cell_id)*4-1] <- dataframe[,"Freq"] resQuery[1:(length(cell_id)-1)*4] <- "),(" if (length(cell_id)>=maxrows) { for (i in 1:(length(cell_id)%/%maxrows)) { sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[(length(cell_id)%/%maxrows*4*maxrows+1):length(resQuery)], collapse = ), ')', sep = )) #Writing loccell locidmatrix <- rep(NA, (ncol(dataframe)-1)*length(cell_id)) dim(locidmatrix) <- c(ncol(dataframe)-1,length(cell_id)) for (i in 1:(ncol(dataframe)-1)) { locidmatrix[i,] <- as.character(dataframe[,i]) } loccellQuery <- rep(NA, 4*(ncol(dataframe)-1)*length(cell_id)) dim(loccellQuery) <- c(4*(ncol(dataframe)-1), length(cell_id)) loccellQuery[1:(ncol(dataframe)-1)*4-3, 1:length(cell_id)] <- rep(cell_id, each = ncol(dataframe) - 1) loccellQuery[1:(ncol(dataframe)-1)*4-2, 1:length(cell_id)] <- "," loccellQuery[1:(ncol(dataframe)-1)*4-1, 1:length(cell_id)] <- locidmatrix loccellQuery[1:(ncol(dataframe)-1)*4, 1:length(cell_id)] <- "),(" loccellQuery[(ncol(dataframe)-1)*4, length(cell_id)] <- "" if (length(cell_id)>=maxrows) { for (i in 1:(length(cell_id)%/%maxrows)) { loccellQuery[(ncol(dataframe)-1)*4, i*maxrows] <- "" sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(length(cell_id)%/%maxrows*maxrows+1):length(cell_id)], collapse = ), ')', sep = )) }
Usage
op_baseWriteArray(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL)
Db and array must be defined, the rest if not defined is prompted for by the function.