Opasnet Base Connection for R: Difference between revisions

From Opasnet
Jump to navigation Jump to search
Line 317: Line 317:
==Uploading data==
==Uploading data==


Tested and works, except for probabilistic data.  
Tested and works. Cell writing is efficient with even with huge data, index writing requires more work. Probabilistic data not supported.  


===Low level===
===Low level===


library(utils)
library(RODBC)
  dbSimpleRead <- function(db, table, column, filtercol, filter) {
  dbSimpleRead <- function(db, table, column, filtercol, filter) {
  sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "',  
  sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', filter, '"', sep = ''))[1,1]
filter, '"', sep = ''))
  }
  }
   
   
  dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) {
  dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) {
  sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "',  
  sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ''))[1,1]
filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ''))
  }
  }
#dbSimpleWrite <- function(db, table, data) {
# sqlUpdate(db, data, table, test = TRUE) #append = TRUE, rownames = FALSE, test = TRUE)
#}
   
   
  SimplePrompt <- function(question) {
  SimplePrompt <- function(question) {
  readline(paste(question, "\n", sep = ""))
  readline(paste(question, "\n", sep = ""))
  }
  }
#Example of writing query
#sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id)',
#'VALUES ("RTest", "Write test on R", "", 0, 0, 0)))


===High level===
===High level===
Line 347: Line 340:
  op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) {
  op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) {
 
 
  dataframe <- as.data.frame(as.table(array))
  originaldataframe <- as.data.frame(as.table(array))
values <- originaldataframe[,"Freq"]
cond <- (is.na(values) == FALSE)
dataframe <- originaldataframe[cond,]
 
 
  #Add page to database (if it doesn't already exist)
  #Add page to database (if it doesn't already exist)
 
 
#To be done: prompt for ident
  if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?")
  if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?")
  wiki_name <- substring(ident, 1, 5)
  wiki_name <- substring(ident, 1, 5)
  if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {
  if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
  obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident)
  obj <- dbSimpleRead(db, "obj", "*", "ident", ident)
  if (is.na(obj_id)==TRUE) {
  if (nrow(obj)==0) {
  if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") else error
  if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?")
  if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") else error
  if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?")
  if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1], sep = " - "), collapse = ", "), collapse = " ")) else error
  if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?",  
  sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))
paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1],  
  obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident)
sep = " - "), collapse = ", "), collapse = " "))
  #obj <- data.frame(ident, name, unit, objtype_id, page, wiki_id, "")
<nowiki>sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(</nowiki>
<nowiki>ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))</nowiki>
  #dbSimpleWrite(db, "obj", obj)
  }
  }
obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident)[1,1]
 
 
  #Write act and actobj
  #Write act and actobj
 
 
  if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?")
  if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") else error
  #act <- data.frame(series_id = 0, acttype_id = 4, who, comments = NA, time = NA, temp_id = NA)
  sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = ''))
<nowiki>sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = ''))#dbSimpleWrite(db, "act", act)</nowiki>
  act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
  act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
#actobj <- data.frame(act_id, obj_id, act_id)
  sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = ''))
  sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')',  
  actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id)
<nowiki>sep = ''))#dbSimpleWrite(db, "actobj", actobj)</nowiki>
  actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id)[1,1]
  #Write indexes and locations
  #Write indexes and locations
  x <- 1
  x <- 1
  DimIds <- dimnames(array)
  DimIds <- list()
  IndId <- 0
  IndId <- 0
  Ind <- 0
  Ind <- 0
Line 389: Line 377:
  LocId <- 0
  LocId <- 0
  for (i in 1:(ncol(dataframe) - x)) {
  for (i in 1:(ncol(dataframe) - x)) {
  <nowiki>IndId <- dbSimpleRead(db, "obj", "id", "name", dimnames(dataframe)[[2]][i])[1,1]#First if multiple</nowiki>
  DimIds[[i]] <- rep(NA, length(levels(factor(dataframe[,i]))))
IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i])
  if (is.na(IndId)==TRUE) {
  if (is.na(IndId)==TRUE) {
  <nowiki>#Ind <- data.frame(dimnames(dataframe)[[2]][i], dimnames(dataframe)[[2]][i], "", 6, 0, 0, "")</nowiki>
  sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)[[2]][i], dimnames(dataframe)[[2]][i], sep = '","'), '",', 6, ')', sep = ''))
<nowiki>sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)[[2]][i], </nowiki>
  IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)[[2]][i])
<nowiki>dimnames(dataframe)[[2]][i], sep = '","'), '",', 6, ')', sep = ''))#dbSimpleWrite(db, "obj", Ind)</nowiki>
  <nowiki>IndId <- dbSimpleRead(db, "obj", "id", "name", dimnames(dataframe)[[2]][i])[1,1]</nowiki>
  }
  }
  <nowiki>names(DimIds)[i] <- IndId</nowiki>
  for (j in 1:length(levels(factor(dataframe[,i])))) { #1:length(dimnames(array)[[i]])) {  
<nowiki>for (j in 1:length(dimnames(array)[[i]])) {</nowiki>
  LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId)
  <nowiki>LocId <- dbLessSimpleRead(db, "loc", "id", "location", dimnames(array)[[i]][j], "obj_id_i", IndId)[1,1]</nowiki>
  if (is.na(LocId)==TRUE) {
  if (is.na(LocId)==TRUE) {
  <nowiki>#loc <- data.frame(0, IndId, dimnames(array)[[i]][j], 0, "")</nowiki>
  sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', levels(factor(dataframe[,i]))[j], '")', sep = ''))
<nowiki>sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', dimnames(array)[[i]][j], '")', </nowiki>
  LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId)
<nowiki>sep = ''))#dbSimpleWrite(db, "loc", loc)</nowiki>
  <nowiki>LocId <- dbLessSimpleRead(db, "loc", "id", "location", dimnames(array)[[i]][j], "obj_id_i", IndId)[1,1]</nowiki>
  }
  }
  <nowiki>DimIds[[i]][j] <- LocId</nowiki>
  DimIds[[i]][j] <- LocId
  }
  }
names(DimIds)[i] <- IndId
  }
  }
 
 
  #Write cells and loccells
  # Changing location names in table into ids
for (i in 1:(ncol(dataframe)-1)) {
dataframe[,i] <- factor(dataframe[,i])
levels(dataframe[,i]) <- DimIds[[i]]
}
maxrows <- 5000
#Writing cell
 
 
mean <- 0
sd <- 0
  n <- 1
  n <- 1
  LocMap <- 0
  cellQuery <- NA
  #dataframe[,ncol(dataframe)+1] <- rep(0, nrow(dataframe))
  cellQuery[1:nrow(dataframe)*6-5] <- actobj_id
  <nowiki>#dimnames(dataframe)[[2]][ncol(dataframe)] <- "cell_id"</nowiki>
cellQuery[1:nrow(dataframe)*6-4] <- ","
  for (i in 1:nrow(dataframe)) {
cellQuery[1:nrow(dataframe)*6-3] <- dataframe[,"Freq"]
  if (is.na(dataframe[i,"Freq"])==FALSE) {
  cellQuery[1:nrow(dataframe)*6-2] <- ","
  mean <- dataframe[i,"Freq"]
cellQuery[1:nrow(dataframe)*6-1] <- n
#cell <- data.frame(0,0,actobj_id, mean, sd, n, "")
cellQuery[1:(nrow(dataframe)-1)*6] <- "),("
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(actobj_id, mean, n, sep = ','), ')',  
  if (nrow(dataframe)>=maxrows) {
<nowiki>sep = ''))#dbSimpleWrite(db, "cell", cell)</nowiki>
  for (i in 1:(nrow(dataframe)%/%maxrows)) {
cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY id DESC LIMIT 1",  
  sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = ''))
sep = ""))[1,1]
}
  sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', cell_id, ',', dataframe[i,"Freq"], ')', sep = ''))
}
for (j in 1:length(names(DimIds))) {
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(nrow(dataframe)%/%maxrows*6*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = ''))
<nowiki>LocMap <- DimIds[[j]]</nowiki>
<nowiki>names(LocMap) <- dimnames(array)[[j]]</nowiki>
#Writing res
#loccell <- data.frame(cell_id, LocMap[paste(dataframe[i,j])])
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', cell_id, ',', LocMap[paste(dataframe[i,j])],  
cell_id <- rev(sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID DESC LIMIT ", nrow(dataframe), sep = ""))[,1])
<nowiki>')', sep = ''))#dbSimpleWrite(db, "loccell", loccell)</nowiki>
resQuery <- NA
}
resQuery[1:length(cell_id)*4-3] <- cell_id
resQuery[1:length(cell_id)*4-2] <- ","
resQuery[1:length(cell_id)*4-1] <- dataframe[,"Freq"]
resQuery[1:(length(cell_id)-1)*4] <- "),("
if (length(cell_id)>=maxrows) {
for (i in 1:(length(cell_id)%/%maxrows)) {
  sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ''), ')', sep = ''))
}
}
sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[(length(cell_id)%/%maxrows*4*maxrows+1):length(resQuery)], collapse = ''), ')', sep = ''))
#Writing loccell
locidmatrix <- rep(NA, (ncol(dataframe)-1)*length(cell_id))
dim(locidmatrix) <- c(ncol(dataframe)-1,length(cell_id))
for (i in 1:(ncol(dataframe)-1)) {
locidmatrix[i,] <- as.character(dataframe[,i])
}
loccellQuery <- rep(NA, 4*(ncol(dataframe)-1)*length(cell_id))
dim(loccellQuery) <- c(4*(ncol(dataframe)-1), length(cell_id))
loccellQuery[1:(ncol(dataframe)-1)*4-3, 1:length(cell_id)] <- rep(cell_id, each = ncol(dataframe) - 1)
loccellQuery[1:(ncol(dataframe)-1)*4-2, 1:length(cell_id)] <- ","
loccellQuery[1:(ncol(dataframe)-1)*4-1, 1:length(cell_id)] <- locidmatrix
loccellQuery[1:(ncol(dataframe)-1)*4, 1:length(cell_id)] <- "),("
loccellQuery[(ncol(dataframe)-1)*4, length(cell_id)] <- ""
if (length(cell_id)>=maxrows) {
for (i in 1:(length(cell_id)%/%maxrows)) {
loccellQuery[(ncol(dataframe)-1)*4, i*maxrows] <- ""
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = ''))
  }
  }
  }
  }
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(length(cell_id)%/%maxrows*maxrows+1):length(cell_id)], collapse = ''), ')', sep = ''))
  }
  }



Revision as of 06:42, 27 September 2010


Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R; this defines the functions, after which they can be called in R. I've tried to explain the code in comments (#). This is a work in progress, if you have comments or new ideas post them using the comment templates on a new line after the one you want to comment, I'm no expert in programming so input is appreciated.

Package dependencies

These packages are required for most of the code to work. Load these first.

  1. utils
  2. RODBC

Setup

Establishes the connection to the database. Copy paste first.

db <- odbcConnect("opasnet_base") 
  • Assuming "opasnet_base" is a correctly defined DSN.
    • Note that uploading requires writing permissions.

Downloading data

  • Tested and mostly works, in case of error with GetArray try GetDataFrame, it should work with data missing indexes and locations.

Low level functions (mainly sql queries)

Copy paste these second.

#Get latest series id
op_baseGetLatest <- function(ident) {
	series_list <- sqlQuery(db, paste('
		SELECT actobj.series_id 
		FROM obj 
		LEFT JOIN actobj 
		ON obj.id = actobj.obj_id 
		WHERE obj.ident = "', ident, '"', sep = ''))
	series_list[nrow(series_list),1]
}

#Get results (cell id, result, obs)
op_baseGetData <- function(page_ident, series_id) {
	sqlQuery(db, paste('
		SELECT cell.id, res.result, res.obs 
		FROM obj 
		LEFT JOIN actobj 
		ON obj.id = actobj.obj_id
		LEFT JOIN cell 
		ON actobj.id = cell.actobj_id 
		LEFT JOIN res 
		ON cell.id = res.cell_id 
		WHERE actobj.series_id = ', series_id, 
		' AND obj.ident = "', page_ident, '"', sep = ''))
}

#Not in use: Get indexes and locations per cell
op_baseGetDims <- function(cell_id_list) {
	sqlQuery(db, paste('
		SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		LEFT JOIN obj 
		ON loc.obj_id_i = obj.id 
		WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
}

#Experimental: match locations and indexes
#tapply(dims_table[,3], dims_table[,5], paste)

#Not in use: Get indexes from dims_table
op_baseFindInd <- function(dims_table) {
	factor(op_baseDims[,4]) 
}

#Get indexes
op_baseGetInd <- function (cell_id_list) {
	indexes <- sqlQuery(db, paste('
		SELECT DISTINCT loc.obj_id_i
		FROM cell 
		LEFT JOIN loccell 
		ON cell.id = loccell.cell_id
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE cell.n <> 0 AND
		cell.id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1],
		' ORDER BY loc.obj_id_i', sep = ''))
	as.numeric(indexes[,1])
}

#Get locations
op_baseGetLoc <- function(index, cell_id_list) {
	locations <- sqlQuery(db, paste('
		SELECT DISTINCT loc.id 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', 
		cell_id_list[nrow(cell_id_list),1], sep = ''))
	as.numeric(locations[,1])
}

#Not in use: Get location names
op_baseGetLocNames <- function(index, cell_id_list) {
	locations <- sqlQuery(db, paste('
		SELECT DISTINCT loc.location, loc.obj_id_i 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', 
		cell_id_list[nrow(cell_id_list),1], sep = ''))
	as.numeric(locations[,1])
}

#Get location name (simple)
op_baseGetLocName <- function(loc_id) {
	sqlQuery(db, paste('
		SELECT location 
		FROM loc 
		WHERE id = ', loc_id, sep = ''))
}

#Get index name
op_baseGetIndName <- function(ind_id) {
	sqlQuery(db, paste('
		SELECT name 
		FROM obj 
		WHERE id = ', ind_id, sep = '')) 
}

#Get locations for a specific cell
op_baseGetLocs <- function(cell_id) {
	sqlQuery(db, paste('
		SELECT DISTINCT loc.location, loc.id, loc.obj_id_i 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loccell.cell_id = ', cell_id,
		' ORDER BY loc.obj_id_i', sep = ''))
}

#Get cell.n (cell id, cell.n)
op_baseGetCelln <- function(cell_id_list) {
	sqlQuery(db, paste('
		SELECT cell.id, cell.n
		FROM cell 
		WHERE cell.id 
		BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
}

#Get results from probabilistic data (cell id, result, obs)
op_baseGetDataProb <- function(page_ident, series_id, Iterations) {
	sqlQuery(db, paste('
		SELECT cell.id, res.result, res.obs 
		FROM obj 
		LEFT JOIN actobj 
		ON obj.id = actobj.obj_id
		LEFT JOIN cell 
		ON actobj.id = cell.actobj_id 
		LEFT JOIN res 
		ON cell.id = res.cell_id 
		WHERE actobj.series_id = ', series_id, 
		' AND obj.ident = "', page_ident, '"', 
		' AND res.obs > 0 AND res.obs <= ', Iterations, sep = ''))
}

#Prompt user for amount of iterations to download
PromptIterations <- function(op_baseCelln) {
	as.numeric(readline(paste("There are ", max(op_baseCelln[,2]), " iterations of this data.", 
	" How many iterations would you like to download?\n")))
}

High Level function

Copy paste this last.

op_baseGetArray <- function(page_ident, Iterations) {
	
	#Basic data downloading
	
	op_baseLatest <- op_baseGetLatest(page_ident)
	op_baseData <- op_baseGetData(page_ident, op_baseLatest) #cell,result, obs
	
	#Differentiate between probabilistic and non-probabilistic
	
	probabilistic <- FALSE #variable indicator of whether the object is probabilistic
	for (i in 1:length(op_baseData)) {
		if (op_baseData[i,3] > 0) probabilistic <- TRUE
	}
	
	#final_array <- op_baseData[,2] #ideal case (data in proper order, achievable by tweaking upload order)
	#op_baseDims <- op_baseGetDims(op_baseData) #with further understanding of R number of queries could be reduced
	#op_baseIndexes <- op_baseFindInd(op_baseDims) #improved code here
	op_baseInd <- op_baseGetInd(op_baseData)
	if (probabilistic == TRUE) op_baseCelln <- op_baseGetCelln(op_baseData)
	
	#Array structure building
	
	IndLengths <- 1 #blank vector for index lengths
	IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and 
	#locations for naming the dimensions of the array
	IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes
	IndNames <- c(1) #blank vector for index names
	for (i in 1:length(op_baseInd)) {
		IndIdList[[i]] <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, 
 		#assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells
		IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector
		IndLengths[i] <- length(IndIdList[[i]]) #backup: op_baseGetLoc(op_baseInd[i], op_baseData))
		for (j in 1:IndLengths[i]) { 
			IndList[[i]][j] <- paste(op_baseGetLocName(IndIdList[[i]][j])[1,1])
		}
	}
	if (probabilistic == TRUE) { #Probabilistic exceptions (add index: Iteration)
		Iterations <- if (interactive()) PromptIterations(op_baseCelln) else 
		if (Iterations < 1 && Iterations > max(op_baseCelln[,2])) max(op_baseCelln[,2])
		#if not an interactive session, returns max
		while (Iterations < 1 && Iterations > max(op_baseCelln[,2])) Iterations <- PromptIterations(op_baseCelln)
		IndLengths[length(IndLengths) + 1] <- Iterations
		IndList[[length(IndList) + 1]] <- 1:Iterations
		IndNames[length(IndNames) + 1] <- "Iteration"
		op_baseData <- op_baseGetDataProb(page_ident, op_baseLatest, Iterations) #cell,result, obs
	}
	final_array <- rep(NA, prod(IndLengths))
	dim(final_array) <- IndLengths
	names(IndList) <- IndNames
	names(IndLengths) <- op_baseInd
	dimnames(final_array) <- IndList
	
	#Cell population
	
	IndexNumber <- 1:length(op_baseInd)
	names(IndexNumber) <- op_baseInd
	LocList <- c(1) #blank vector for locations per cell
	LocMap <- c(1) #blank vector for location to position mapping
	LocMapNames <- c(1)
	CellPosition <- rep(1, length(IndLengths)) #blank vector for cell position
	dim(CellPosition) <- c(1, length(IndLengths)) #form into matrix of appropriate form
	for (i in 1:length(op_baseData[,2])) { #for each result
		LocList <- op_baseGetLocs(op_baseData[i,1]) #get list of locations
		for (j in 1:length(LocList[,1])) { #for each location per cell
			LocMap <- 1:IndLengths[paste(LocList[j,3])] #get available positions in given index 
			names(LocMap) <- dimnames(final_array)[[paste(names(IndList)[IndexNumber[paste(LocList[j,3])]])]] 
			#add names (locations) of positions in given index
			CellPosition[1,IndexNumber[paste(LocList[j,3])]] <- LocMap[paste(LocList[j,1])] 
			#match cell specific locations to positions in index
		}
		if (probabilistic == TRUE) CellPosition[1,length(CellPosition)] <- op_baseData[i,3]
		final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition
	}
	final_array #print/return the final array
}

ArraytoDataFrame <- function(array) {
	as.data.frame(as.table(array))
}

op_baseGetDataFrame <- function(page_ident) {
	
	#Basic data downloading
	
	op_baseLatest <- op_baseGetLatest(page_ident)
	op_baseData <- op_baseGetData(page_ident, op_baseLatest) #cell,result, obs
	
	#Differentiate between probabilistic and non-probabilistic
	
	probabilistic <- FALSE #variable indicator of whether the object is probabilistic
	for (i in 1:length(op_baseData)) {
		if (op_baseData[i,3] > 0) probabilistic <- TRUE
	}
	if (probabilistic == TRUE) op_baseData <- op_baseGetDataProb(page_ident, op_baseLatest) #cell,result, obs
	
	op_baseInd <- op_baseGetInd(op_baseData)
	
	#Table building
	
	for (i in 1:length(op_baseData[,2])) { #for each result
		LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations
		if (length(LocList) != 0) {
			for (j in 1:length(LocList)) { #for each location per cell
				op_baseData[i,(3+j)] <- LocList[j] #put in place in table
			}
		}
	}
	final_table <- data.frame(1:length(op_baseData[,1]))
	#final_table[,1] <- op_baseData[,1]
	x <- 0
	if (probabilistic == TRUE) {
		x <- 1
		dimnames(final_table)[[2]][1] <- "Iteration"
		final_table[,1] <- op_baseData[,3]
	}
	if (is.na(op_baseInd[1]) == TRUE) y <- 0 else y <- length(op_baseInd)
	if (y > 0) {
		for (i in 1:y) {
			final_table[,x + i] <- op_baseData[,3+i]
		}
	}
	final_table[,(x + y + 1)] <- op_baseData[,2]
	if (y > 0) {
		for (i in 1:y) {
			dimnames(final_table)[[2]][x+i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1])
		}
	}
	dimnames(final_table)[[2]][(x + y + 1)] <- "Result"
	final_table
}

Usage

variable <- op_baseGetArray("page identifier")
variable <- ArraytoDataFrame(array name)
variable <- op_baseGetDataFrame("page identifier")

Uploading data

Tested and works. Cell writing is efficient with even with huge data, index writing requires more work. Probabilistic data not supported.

Low level

library(utils)
library(RODBC)

dbSimpleRead <- function(db, table, column, filtercol, filter) {
	sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol, ' = "', filter, '"', sep = ))[1,1]
}

dbLessSimpleRead <- function(db, table, column, filtercol1, filter1, filtercol2, filter2) {
	sqlQuery(db, paste('SELECT ', column, ' FROM ', table, ' WHERE ', filtercol1, ' = "', filter1, '" AND ', filtercol2, ' = "', filter2, '"', sep = ))[1,1]
}

SimplePrompt <- function(question) {
	readline(paste(question, "\n", sep = ""))
}

High level

op_baseWriteArray <- function(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL) {
	
	originaldataframe <- as.data.frame(as.table(array))
	
	values <- originaldataframe[,"Freq"]
	cond <- (is.na(values) == FALSE)
	
	dataframe <- originaldataframe[cond,]
	
	#Add page to database (if it doesn't already exist)
	
	if (is.null(ident)==TRUE) if (interactive()) ident <- SimplePrompt("What is the identifier of this object?")
	wiki_name <- substring(ident, 1, 5)
	if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
	obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident)
	if (is.na(obj_id)==TRUE) {
		if (is.null(name)==TRUE) if (interactive()) name <- SimplePrompt("What is the name of this object?") else error
		if (is.null(unit)==TRUE) if (interactive()) unit <- SimplePrompt("What is the unit of this object?") else error
		if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- SimplePrompt(paste("What type of object is this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, "SELECT objtype FROM objtype")[,1], sep = " - "), collapse = ", "), collapse = " ")) else error
		sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ))
		obj_id <- dbSimpleRead(db, "obj", "id", "ident", ident)
	}
	
	#Write act and actobj
	
	if (is.null(who)==TRUE) if (interactive()) who <- SimplePrompt("What is the name of the uploader?") else error
	sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (4, "', who, '","R upload")', sep = ))
	act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
	sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, act_id, sep = ','), ')', sep = ))
	actobj_id <- dbSimpleRead(db, "actobj", "id", "act_id", act_id)

	#Write indexes and locations
	x <- 1
	DimIds <- list()
	IndId <- 0
	Ind <- 0
	loc <- 0
	LocId <- 0
	for (i in 1:(ncol(dataframe) - x)) {
		DimIdsi <- rep(NA, length(levels(factor(dataframe[,i]))))
		IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)2[i])
		if (is.na(IndId)==TRUE) {
			sqlQuery(db, paste('INSERT INTO obj (ident, name, objtype_id) VALUES ("', paste(dimnames(dataframe)2[i], dimnames(dataframe)2[i], sep = '","'), '",', 6, ')', sep = ))
			IndId <- dbSimpleRead(db, "obj", "id", "ident", dimnames(dataframe)2[i])
		}
		for (j in 1:length(levels(factor(dataframe[,i])))) { #1:length(dimnames(array)i)) { 
			LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId)
			if (is.na(LocId)==TRUE) {
				sqlQuery(db, paste('INSERT INTO loc (obj_id_i, location) VALUES (', IndId, ',"', levels(factor(dataframe[,i]))[j], '")', sep = ))
				LocId <- dbLessSimpleRead(db, "loc", "id", "location", levels(factor(dataframe[,i]))[j], "obj_id_i", IndId)
			}
			DimIdsi[j] <- LocId
		}
		names(DimIds)[i] <- IndId
	}
	
	# Changing location names in table into ids
	
	for (i in 1:(ncol(dataframe)-1)) {
		dataframe[,i] <- factor(dataframe[,i])
		levels(dataframe[,i]) <- DimIdsi
	}
	
	maxrows <- 5000
	
	#Writing cell
	
	n <- 1
	cellQuery <- NA
	cellQuery[1:nrow(dataframe)*6-5] <- actobj_id
	cellQuery[1:nrow(dataframe)*6-4] <- ","
	cellQuery[1:nrow(dataframe)*6-3] <- dataframe[,"Freq"]
	cellQuery[1:nrow(dataframe)*6-2] <- ","
	cellQuery[1:nrow(dataframe)*6-1] <- n
	cellQuery[1:(nrow(dataframe)-1)*6] <- "),("
	if (nrow(dataframe)>=maxrows) {
		for (i in 1:(nrow(dataframe)%/%maxrows)) {
			sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[((i-1)*6*maxrows+1):(i*6*maxrows-1)], collapse = ), ')', sep = ))
		}
	}
	sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, n) VALUES (', paste(cellQuery[(nrow(dataframe)%/%maxrows*6*maxrows+1):length(cellQuery)], collapse = ), ')', sep = ))
	
	#Writing res
	
	cell_id <- rev(sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID DESC LIMIT ", nrow(dataframe), sep = ""))[,1])
	resQuery <- NA
	resQuery[1:length(cell_id)*4-3] <- cell_id
	resQuery[1:length(cell_id)*4-2] <- ","
	resQuery[1:length(cell_id)*4-1] <- dataframe[,"Freq"]
	resQuery[1:(length(cell_id)-1)*4] <- "),("
	if (length(cell_id)>=maxrows) {
		for (i in 1:(length(cell_id)%/%maxrows)) {
			sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[((i-1)*4*maxrows+1):(i*4*maxrows-1)], collapse = ), ')', sep = ))
		}
	}
	sqlQuery(db, paste('INSERT INTO res (cell_id, result) VALUES (', paste(resQuery[(length(cell_id)%/%maxrows*4*maxrows+1):length(resQuery)], collapse = ), ')', sep = ))
	
	#Writing loccell
	
	locidmatrix <- rep(NA, (ncol(dataframe)-1)*length(cell_id))
	dim(locidmatrix) <- c(ncol(dataframe)-1,length(cell_id))
	for (i in 1:(ncol(dataframe)-1)) {
		locidmatrix[i,] <- as.character(dataframe[,i])
	}
	loccellQuery <- rep(NA, 4*(ncol(dataframe)-1)*length(cell_id))
	dim(loccellQuery) <- c(4*(ncol(dataframe)-1), length(cell_id))
	loccellQuery[1:(ncol(dataframe)-1)*4-3, 1:length(cell_id)] <- rep(cell_id, each = ncol(dataframe) - 1)
	loccellQuery[1:(ncol(dataframe)-1)*4-2, 1:length(cell_id)] <- ","
	loccellQuery[1:(ncol(dataframe)-1)*4-1, 1:length(cell_id)] <- locidmatrix
	loccellQuery[1:(ncol(dataframe)-1)*4, 1:length(cell_id)] <- "),("
	loccellQuery[(ncol(dataframe)-1)*4, length(cell_id)] <- ""
	if (length(cell_id)>=maxrows) {
		for (i in 1:(length(cell_id)%/%maxrows)) {
			loccellQuery[(ncol(dataframe)-1)*4, i*maxrows] <- ""
			sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,((i-1)*maxrows+1):(i*maxrows)], collapse = ), ')', sep = ))
		}
	}
	sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,(length(cell_id)%/%maxrows*maxrows+1):length(cell_id)], collapse = ), ')', sep = ))
}

Usage

op_baseWriteArray(db, array, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL)

Db and array must be defined, the rest if not defined is prompted for by the function.