Opasnet Base Connection for R: Difference between revisions
m (→Usage) |
|||
Line 317: | Line 317: | ||
==Uploading data== | ==Uploading data== | ||
Tested and works, | Tested and works. Cell writing is efficient with even with huge data, index writing requires more work. Probabilistic data not supported. | ||
===Low level=== | ===Low level=== | ||
library(utils) | |||
library(RODBC) | |||
dbSimpleRead <- function(db, table, column, filtercol, filter) { | dbSimpleRead <- function(db, table, column, filtercol, filter) { | ||
sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', | sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', filter, '"', sep = ''))[1,1] | ||
} | } | ||
dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) { | dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) { | ||
sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', | sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ''))[1,1] | ||
} | } | ||
SimplePrompt <- function(question) { | SimplePrompt <- function(question) { | ||
readline(paste(question, "\n", sep = "")) | readline(paste(question, "\n", sep = "")) | ||
} | } | ||
===High level=== | ===High level=== | ||
Line 347: | Line 340: | ||
op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { | op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { | ||
originaldataframe <- as.data.frame(as.table(array)) | |||
values <- originaldataframe[,"Freq"] | |||
cond <- (is.na(values) == FALSE) | |||
dataframe <- originaldataframe[cond,] | |||
#Add page to database (if it doesn't already exist) | #Add page to database (if it doesn't already exist) | ||
if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?") | if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?") | ||
wiki_name <- substring(ident, 1, 5) | wiki_name <- substring(ident, 1, 5) | ||
if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") { | if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} | ||
obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) | |||
if (is.na(obj_id)==TRUE) { | |||
if ( | if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") else error | ||
if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") | if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") else error | ||
if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") | if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1], sep = " - "), collapse = ", "), collapse = " ")) else error | ||
if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", | sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = '')) | ||
obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) | |||
} | } | ||
#Write act and actobj | #Write act and actobj | ||
if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") | if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") else error | ||
sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = '')) | |||
act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] | act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] | ||
sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = '')) | |||
sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', | actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id) | ||
actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id) | |||
#Write indexes and locations | #Write indexes and locations | ||
x <- 1 | x <- 1 | ||
DimIds <- | DimIds <- list() | ||
IndId <- 0 | IndId <- 0 | ||
Ind <- 0 | Ind <- 0 | ||
Line 389: | Line 377: | ||
LocId <- 0 | LocId <- 0 | ||
for (i in 1:(ncol(dataframe) - x)) { | for (i in 1:(ncol(dataframe) - x)) { | ||
< | DimIds[[i]] <- rep(NA, length(levels(factor(dataframe[,i])))) | ||
IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i]) | |||
if (is.na(IndId)==TRUE) { | if (is.na(IndId)==TRUE) { | ||
sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)[[2]][i], dimnames(dataframe)[[2]][i], sep = '","'), '",', 6, ')', sep = '')) | |||
IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i]) | |||
} | } | ||
for (j in 1:length(levels(factor(dataframe[,i])))) { #1:length(dimnames(array)[[i]])) { | |||
LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) | |||
if (is.na(LocId)==TRUE) { | if (is.na(LocId)==TRUE) { | ||
sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', levels(factor(dataframe[,i]))[j], '")', sep = '')) | |||
LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) | |||
} | } | ||
DimIds[[i]][j] <- LocId | |||
} | } | ||
names(DimIds)[i] <- IndId | |||
} | } | ||
# | # Changing location names in table into ids | ||
for (i in 1:(ncol(dataframe)-1)) { | |||
dataframe[,i] <- factor(dataframe[,i]) | |||
levels(dataframe[,i]) <- DimIds[[i]] | |||
} | |||
maxrows <- 5000 | |||
#Writing cell | |||
n <- 1 | n <- 1 | ||
cellQuery <- NA | |||
cellQuery[1:nrow(dataframe)*6-5] <- actobj_id | |||
cellQuery[1:nrow(dataframe)*6-4] <- "," | |||
cellQuery[1:nrow(dataframe)*6-3] <- dataframe[,"Freq"] | |||
cellQuery[1:nrow(dataframe)*6-2] <- "," | |||
cellQuery[1:nrow(dataframe)*6-1] <- n | |||
cellQuery[1:(nrow(dataframe)-1)*6] <- "),(" | |||
if (nrow(dataframe)>=maxrows) { | |||
for (i in 1:(nrow(dataframe)%/%maxrows)) { | |||
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = '')) | |||
} | |||
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', | } | ||
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(nrow(dataframe)%/%maxrows*6*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = '')) | |||
#Writing res | |||
cell_id <- rev(sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID DESC LIMIT ", nrow(dataframe), sep = ""))[,1]) | |||
resQuery <- NA | |||
resQuery[1:length(cell_id)*4-3] <- cell_id | |||
resQuery[1:length(cell_id)*4-2] <- "," | |||
resQuery[1:length(cell_id)*4-1] <- dataframe[,"Freq"] | |||
resQuery[1:(length(cell_id)-1)*4] <- "),(" | |||
if (length(cell_id)>=maxrows) { | |||
for (i in 1:(length(cell_id)%/%maxrows)) { | |||
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ''), ')', sep = '')) | |||
} | |||
} | |||
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[(length(cell_id)%/%maxrows*4*maxrows+1):length(resQuery)], collapse = ''), ')', sep = '')) | |||
#Writing loccell | |||
locidmatrix <- rep(NA, (ncol(dataframe)-1)*length(cell_id)) | |||
dim(locidmatrix) <- c(ncol(dataframe)-1,length(cell_id)) | |||
for (i in 1:(ncol(dataframe)-1)) { | |||
locidmatrix[i,] <- as.character(dataframe[,i]) | |||
} | |||
loccellQuery <- rep(NA, 4*(ncol(dataframe)-1)*length(cell_id)) | |||
dim(loccellQuery) <- c(4*(ncol(dataframe)-1), length(cell_id)) | |||
loccellQuery[1:(ncol(dataframe)-1)*4-3, 1:length(cell_id)] <- rep(cell_id, each = ncol(dataframe) - 1) | |||
loccellQuery[1:(ncol(dataframe)-1)*4-2, 1:length(cell_id)] <- "," | |||
loccellQuery[1:(ncol(dataframe)-1)*4-1, 1:length(cell_id)] <- locidmatrix | |||
loccellQuery[1:(ncol(dataframe)-1)*4, 1:length(cell_id)] <- "),(" | |||
loccellQuery[(ncol(dataframe)-1)*4, length(cell_id)] <- "" | |||
if (length(cell_id)>=maxrows) { | |||
for (i in 1:(length(cell_id)%/%maxrows)) { | |||
loccellQuery[(ncol(dataframe)-1)*4, i*maxrows] <- "" | |||
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = '')) | |||
} | } | ||
} | } | ||
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(length(cell_id)%/%maxrows*maxrows+1):length(cell_id)], collapse = ''), ')', sep = '')) | |||
} | } | ||
Revision as of 06:42, 27 September 2010
Moderator:Teemu R (see all) |
|
Upload data
|
Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R; this defines the functions, after which they can be called in R. I've tried to explain the code in comments (#). This is a work in progress, if you have comments or new ideas post them using the comment templates on a new line after the one you want to comment, I'm no expert in programming so input is appreciated.
Package dependencies
These packages are required for most of the code to work. Load these first.
- utils
- RODBC
Setup
Establishes the connection to the database. Copy paste first.
db <- odbcConnect("opasnet_base")
- Assuming "opasnet_base" is a correctly defined DSN.
- Note that uploading requires writing permissions.
Downloading data
- Tested and mostly works, in case of error with GetArray try GetDataFrame, it should work with data missing indexes and locations.
Low level functions (mainly sql queries)
Copy paste these second.
#Get latest series id op_baseGetLatest <- function(ident) { series_list <- sqlQuery(db, paste(' SELECT actobj.series_id FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id WHERE obj.ident = "', ident, '"', sep = '')) series_list[nrow(series_list),1] } #Get results (cell id, result, obs) op_baseGetData <- function(page_ident, series_id) { sqlQuery(db, paste(' SELECT cell.id, res.result, res.obs FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id = res.cell_id WHERE actobj.series_id = ', series_id, ' AND obj.ident = "', page_ident, '"', sep = '')) } #Not in use: Get indexes and locations per cell op_baseGetDims <- function(cell_id_list) { sqlQuery(db, paste(' SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) } #Experimental: match locations and indexes #tapply(dims_table[,3], dims_table[,5], paste) #Not in use: Get indexes from dims_table op_baseFindInd <- function(dims_table) { factor(op_baseDims[,4]) } #Get indexes op_baseGetInd <- function (cell_id_list) { indexes <- sqlQuery(db, paste(' SELECT DISTINCT loc.obj_id_i FROM cell LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id = loc.id WHERE cell.n <> 0 AND cell.id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], ' ORDER BY loc.obj_id_i', sep = '')) as.numeric(indexes[,1]) } #Get locations op_baseGetLoc <- function(index, cell_id_list) { locations <- sqlQuery(db, paste(' SELECT DISTINCT loc.id FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) as.numeric(locations[,1]) } #Not in use: Get location names op_baseGetLocNames <- function(index, cell_id_list) { locations <- sqlQuery(db, paste(' SELECT DISTINCT loc.location, loc.obj_id_i FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) as.numeric(locations[,1]) } #Get location name (simple) op_baseGetLocName <- function(loc_id) { sqlQuery(db, paste(' SELECT location FROM loc WHERE id = ', loc_id, sep = '')) } #Get index name op_baseGetIndName <- function(ind_id) { sqlQuery(db, paste(' SELECT name FROM obj WHERE id = ', ind_id, sep = '')) } #Get locations for a specific cell op_baseGetLocs <- function(cell_id) { sqlQuery(db, paste(' SELECT DISTINCT loc.location, loc.id, loc.obj_id_i FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id = ', cell_id, ' ORDER BY loc.obj_id_i', sep = '')) } #Get cell.n (cell id, cell.n) op_baseGetCelln <- function(cell_id_list) { sqlQuery(db, paste(' SELECT cell.id, cell.n FROM cell WHERE cell.id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) } #Get results from probabilistic data (cell id, result, obs) op_baseGetDataProb <- function(page_ident, series_id, Iterations) { sqlQuery(db, paste(' SELECT cell.id, res.result, res.obs FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id = res.cell_id WHERE actobj.series_id = ', series_id, ' AND obj.ident = "', page_ident, '"', ' AND res.obs > 0 AND res.obs <= ', Iterations, sep = '')) } #Prompt user for amount of iterations to download PromptIterations <- function(op_baseCelln) { as.numeric(readline(paste("There are ", max(op_baseCelln[,2]), " iterations of this data.", " How many iterations would you like to download?\n"))) }
High Level function
Copy paste this last.
op_baseGetArray <- function(page_ident, Iterations) { #Basic data downloading op_baseLatest <- op_baseGetLatest(page_ident) op_baseData <- op_baseGetData(page_ident, op_baseLatest) #cell,result, obs #Differentiate between probabilistic and non-probabilistic probabilistic <- FALSE #variable indicator of whether the object is probabilistic for (i in 1:length(op_baseData)) { if (op_baseData[i,3] > 0) probabilistic <- TRUE } #final_array <- op_baseData[,2] #ideal case (data in proper order, achievable by tweaking upload order) #op_baseDims <- op_baseGetDims(op_baseData) #with further understanding of R number of queries could be reduced #op_baseIndexes <- op_baseFindInd(op_baseDims) #improved code here op_baseInd <- op_baseGetInd(op_baseData) if (probabilistic == TRUE) op_baseCelln <- op_baseGetCelln(op_baseData) #Array structure building IndLengths <- 1 #blank vector for index lengths IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and #locations for naming the dimensions of the array IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes IndNames <- c(1) #blank vector for index names for (i in 1:length(op_baseInd)) { IndIdList[[i]] <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, #assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector IndLengths[i] <- length(IndIdList[[i]]) #backup: op_baseGetLoc(op_baseInd[i], op_baseData)) for (j in 1:IndLengths[i]) { IndList[[i]][j] <- paste(op_baseGetLocName(IndIdList[[i]][j])[1,1]) } } if (probabilistic == TRUE) { #Probabilistic exceptions (add index: Iteration) Iterations <- if (interactive()) PromptIterations(op_baseCelln) else if (Iterations < 1 && Iterations > max(op_baseCelln[,2])) max(op_baseCelln[,2]) #if not an interactive session, returns max while (Iterations < 1 && Iterations > max(op_baseCelln[,2])) Iterations <- PromptIterations(op_baseCelln) IndLengths[length(IndLengths) + 1] <- Iterations IndList[[length(IndList) + 1]] <- 1:Iterations IndNames[length(IndNames) + 1] <- "Iteration" op_baseData <- op_baseGetDataProb(page_ident, op_baseLatest, Iterations) #cell,result, obs } final_array <- rep(NA, prod(IndLengths)) dim(final_array) <- IndLengths names(IndList) <- IndNames names(IndLengths) <- op_baseInd dimnames(final_array) <- IndList #Cell population IndexNumber <- 1:length(op_baseInd) names(IndexNumber) <- op_baseInd LocList <- c(1) #blank vector for locations per cell LocMap <- c(1) #blank vector for location to position mapping LocMapNames <- c(1) CellPosition <- rep(1, length(IndLengths)) #blank vector for cell position dim(CellPosition) <- c(1, length(IndLengths)) #form into matrix of appropriate form for (i in 1:length(op_baseData[,2])) { #for each result LocList <- op_baseGetLocs(op_baseData[i,1]) #get list of locations for (j in 1:length(LocList[,1])) { #for each location per cell LocMap <- 1:IndLengths[paste(LocList[j,3])] #get available positions in given index names(LocMap) <- dimnames(final_array)[[paste(names(IndList)[IndexNumber[paste(LocList[j,3])]])]] #add names (locations) of positions in given index CellPosition[1,IndexNumber[paste(LocList[j,3])]] <- LocMap[paste(LocList[j,1])] #match cell specific locations to positions in index } if (probabilistic == TRUE) CellPosition[1,length(CellPosition)] <- op_baseData[i,3] final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition } final_array #print/return the final array } ArraytoDataFrame <- function(array) { as.data.frame(as.table(array)) } op_baseGetDataFrame <- function(page_ident) { #Basic data downloading op_baseLatest <- op_baseGetLatest(page_ident) op_baseData <- op_baseGetData(page_ident, op_baseLatest) #cell,result, obs #Differentiate between probabilistic and non-probabilistic probabilistic <- FALSE #variable indicator of whether the object is probabilistic for (i in 1:length(op_baseData)) { if (op_baseData[i,3] > 0) probabilistic <- TRUE } if (probabilistic == TRUE) op_baseData <- op_baseGetDataProb(page_ident, op_baseLatest) #cell,result, obs op_baseInd <- op_baseGetInd(op_baseData) #Table building for (i in 1:length(op_baseData[,2])) { #for each result LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations if (length(LocList) != 0) { for (j in 1:length(LocList)) { #for each location per cell op_baseData[i,(3+j)] <- LocList[j] #put in place in table } } } final_table <- data.frame(1:length(op_baseData[,1])) #final_table[,1] <- op_baseData[,1] x <- 0 if (probabilistic == TRUE) { x <- 1 dimnames(final_table)[[2]][1] <- "Iteration" final_table[,1] <- op_baseData[,3] } if (is.na(op_baseInd[1]) == TRUE) y <- 0 else y <- length(op_baseInd) if (y > 0) { for (i in 1:y) { final_table[,x + i] <- op_baseData[,3+i] } } final_table[,(x + y + 1)] <- op_baseData[,2] if (y > 0) { for (i in 1:y) { dimnames(final_table)[[2]][x+i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) } } dimnames(final_table)[[2]][(x + y + 1)] <- "Result" final_table }
Usage
variable <- op_baseGetArray("page identifier") variable <- ArraytoDataFrame(array name) variable <- op_baseGetDataFrame("page identifier")
Uploading data
Tested and works. Cell writing is efficient with even with huge data, index writing requires more work. Probabilistic data not supported.
Low level
library(utils) library(RODBC) dbSimpleRead <- function(db, table, column, filtercol, filter) { sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', filter, '"', sep = ))[1,1] } dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) { sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ))[1,1] } SimplePrompt <- function(question) { readline(paste(question, "\n", sep = "")) }
High level
op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) { originaldataframe <- as.data.frame(as.table(array)) values <- originaldataframe[,"Freq"] cond <- (is.na(values) == FALSE) dataframe <- originaldataframe[cond,] #Add page to database (if it doesn't already exist) if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?") wiki_name <- substring(ident, 1, 5) if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0} obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) if (is.na(obj_id)==TRUE) { if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") else error if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") else error if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1], sep = " - "), collapse = ", "), collapse = " ")) else error sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = )) obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident) } #Write act and actobj if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") else error sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = )) act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1] sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = )) actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id) #Write indexes and locations x <- 1 DimIds <- list() IndId <- 0 Ind <- 0 loc <- 0 LocId <- 0 for (i in 1:(ncol(dataframe) - x)) { DimIdsi <- rep(NA, length(levels(factor(dataframe[,i])))) IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)2[i]) if (is.na(IndId)==TRUE) { sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)2[i], dimnames(dataframe)2[i], sep = '","'), '",', 6, ')', sep = )) IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)2[i]) } for (j in 1:length(levels(factor(dataframe[,i])))) { #1:length(dimnames(array)i)) { LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) if (is.na(LocId)==TRUE) { sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', levels(factor(dataframe[,i]))[j], '")', sep = )) LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId) } DimIdsi[j] <- LocId } names(DimIds)[i] <- IndId } # Changing location names in table into ids for (i in 1:(ncol(dataframe)-1)) { dataframe[,i] <- factor(dataframe[,i]) levels(dataframe[,i]) <- DimIdsi } maxrows <- 5000 #Writing cell n <- 1 cellQuery <- NA cellQuery[1:nrow(dataframe)*6-5] <- actobj_id cellQuery[1:nrow(dataframe)*6-4] <- "," cellQuery[1:nrow(dataframe)*6-3] <- dataframe[,"Freq"] cellQuery[1:nrow(dataframe)*6-2] <- "," cellQuery[1:nrow(dataframe)*6-1] <- n cellQuery[1:(nrow(dataframe)-1)*6] <- "),(" if (nrow(dataframe)>=maxrows) { for (i in 1:(nrow(dataframe)%/%maxrows)) { sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(nrow(dataframe)%/%maxrows*6*maxrows+1):length(cellQuery)], collapse = ), ')', sep = )) #Writing res cell_id <- rev(sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID DESC LIMIT ", nrow(dataframe), sep = ""))[,1]) resQuery <- NA resQuery[1:length(cell_id)*4-3] <- cell_id resQuery[1:length(cell_id)*4-2] <- "," resQuery[1:length(cell_id)*4-1] <- dataframe[,"Freq"] resQuery[1:(length(cell_id)-1)*4] <- "),(" if (length(cell_id)>=maxrows) { for (i in 1:(length(cell_id)%/%maxrows)) { sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[(length(cell_id)%/%maxrows*4*maxrows+1):length(resQuery)], collapse = ), ')', sep = )) #Writing loccell locidmatrix <- rep(NA, (ncol(dataframe)-1)*length(cell_id)) dim(locidmatrix) <- c(ncol(dataframe)-1,length(cell_id)) for (i in 1:(ncol(dataframe)-1)) { locidmatrix[i,] <- as.character(dataframe[,i]) } loccellQuery <- rep(NA, 4*(ncol(dataframe)-1)*length(cell_id)) dim(loccellQuery) <- c(4*(ncol(dataframe)-1), length(cell_id)) loccellQuery[1:(ncol(dataframe)-1)*4-3, 1:length(cell_id)] <- rep(cell_id, each = ncol(dataframe) - 1) loccellQuery[1:(ncol(dataframe)-1)*4-2, 1:length(cell_id)] <- "," loccellQuery[1:(ncol(dataframe)-1)*4-1, 1:length(cell_id)] <- locidmatrix loccellQuery[1:(ncol(dataframe)-1)*4, 1:length(cell_id)] <- "),(" loccellQuery[(ncol(dataframe)-1)*4, length(cell_id)] <- "" if (length(cell_id)>=maxrows) { for (i in 1:(length(cell_id)%/%maxrows)) { loccellQuery[(ncol(dataframe)-1)*4, i*maxrows] <- "" sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ), ')', sep = )) } } sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(length(cell_id)%/%maxrows*maxrows+1):length(cell_id)], collapse = ), ')', sep = )) }
Usage
op_baseWriteArray(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL)
Db and array must be defined, the rest if not defined is prompted for by the function.