Opasnet Base Connection for R: Difference between revisions
Jump to navigation
Jump to search
(Created page with 'Category:Opasnet Category:Opasnet Base Category:R tool Category:SQL tool Category:Open assessment {{tool|moderator=Teemu R}} Code for R for the purpose of int…') |
mNo edit summary |
||
Line 30: | Line 30: | ||
#Get latest series id | #Get latest series id | ||
op_baseGetLatest <- function(ident) { | op_baseGetLatest <- function(ident) { | ||
series_list <- sqlQuery(db, paste('SELECT actobj.series_id FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id WHERE obj.ident = "', ident, '"', sep = '')) | series_list <- sqlQuery(db, paste(' | ||
SELECT actobj.series_id | |||
FROM obj | |||
LEFT JOIN actobj | |||
ON obj.id = actobj.obj_id | |||
WHERE obj.ident = "', ident, '"', sep = '')) | |||
series_list[nrow(series_list),1] | series_list[nrow(series_list),1] | ||
} | } | ||
Line 36: | Line 41: | ||
#Get results (cell id, result) | #Get results (cell id, result) | ||
op_baseGetData <- function(series_id) { | op_baseGetData <- function(series_id) { | ||
sqlQuery(db, paste('SELECT cell.id, res.result FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id = res.cell_id WHERE actobj.series_id = ', series_id, sep = '')) | sqlQuery(db, paste(' | ||
SELECT cell.id, res.result | |||
FROM actobj | |||
LEFT JOIN cell | |||
ON actobj.id = cell.actobj_id | |||
LEFT JOIN res | |||
ON cell.id = res.cell_id | |||
WHERE actobj.series_id = ', series_id, sep = '')) | |||
} | } | ||
#Not in use: Get indexes and locations per cell | #Not in use: Get indexes and locations per cell | ||
op_baseGetDims <- function(cell_id_list) { | op_baseGetDims <- function(cell_id_list) { | ||
sqlQuery(db, paste('SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | sqlQuery(db, paste(' | ||
SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name | |||
FROM loccell | |||
LEFT JOIN loc | |||
ON loccell.loc_id = loc.id | |||
LEFT JOIN obj | |||
ON loc.obj_id_i = obj.id | |||
WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | |||
} | } | ||
Line 54: | Line 73: | ||
#Get indexes | #Get indexes | ||
op_baseGetInd <- function (cell_id_list) { | op_baseGetInd <- function (cell_id_list) { | ||
indexes <- sqlQuery(db, paste('SELECT DISTINCT loc.obj_id_i FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | indexes <- sqlQuery(db, paste(' | ||
SELECT DISTINCT loc.obj_id_i | |||
FROM loccell | |||
LEFT JOIN loc | |||
ON loccell.loc_id = loc.id | |||
WHERE loccell.cell_id | |||
BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | |||
as.numeric(indexes[,1]) | as.numeric(indexes[,1]) | ||
} | } | ||
Line 60: | Line 85: | ||
#Get locations | #Get locations | ||
op_baseGetLoc <- function(index, cell_id_list) { | op_baseGetLoc <- function(index, cell_id_list) { | ||
locations <- sqlQuery(db, paste('SELECT DISTINCT loc.id FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | locations <- sqlQuery(db, paste(' | ||
SELECT DISTINCT loc.id | |||
FROM loccell | |||
LEFT JOIN loc | |||
ON loccell.loc_id = loc.id | |||
WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | |||
as.numeric(locations[,1]) | as.numeric(locations[,1]) | ||
} | } | ||
Line 66: | Line 96: | ||
#Get location names (will not work properly if different indexes have locations with similar name) | #Get location names (will not work properly if different indexes have locations with similar name) | ||
op_baseGetLocNames <- function(index, cell_id_list) { | op_baseGetLocNames <- function(index, cell_id_list) { | ||
locations <- sqlQuery(db, paste('SELECT DISTINCT loc.location FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | locations <- sqlQuery(db, paste(' | ||
SELECT DISTINCT loc.location | |||
FROM loccell | |||
LEFT JOIN loc | |||
ON loccell.loc_id = loc.id | |||
WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = '')) | |||
as.numeric(locations[,1]) | as.numeric(locations[,1]) | ||
} | } | ||
#Get location name (simple) | |||
op_baseGetLocName <- function(loc_id) { | |||
sqlQuery(db, paste(' | |||
SELECT location | |||
FROM loc | |||
WHERE id = ', loc_id, sep = '')) | |||
} | |||
#Get index name | |||
op_baseGetIndName <- function(ind_id) { | |||
sqlQuery(db, paste(' | |||
SELECT name | |||
FROM obj | |||
WHERE id = ', ind_id, sep = '')) } | |||
#Get locations for a specific cell | |||
op_baseGetLocs <- function(cell_id) { | |||
sqlQuery(db, paste(' | |||
SELECT DISTINCT loc.location, loc.id | |||
FROM loccell | |||
LEFT JOIN loc | |||
ON loccell.loc_id = loc.id | |||
WHERE loccell.cell_id = ', cell_id, sep = '')) #backup: 'AND loc.obj_id_i BETWEEN ', <<<INDEX>>>, ' AND ', <<<INDEX>>> | |||
} | |||
===High Level function=== | |||
Copy paste this last. | |||
# | op_baseArray <- function(page_ident) { | ||
#Basic data downloading | |||
} | |||
op_baseLatest <- op_baseGetLatest(page_ident) | |||
op_baseData <- op_baseGetData(op_baseLatest) #cell,result | |||
#final_array <- op_baseData[,2] #ideal case (data in proper order) | |||
#op_baseDims <- op_baseGetDims(op_baseData) | |||
#op_baseIndexes <- op_baseFindInd(op_baseDims) | |||
op_baseInd <- op_baseGetInd(op_baseData) | |||
#Array structure building | |||
IndLengths <- 1:3 #blank vector for index lengths | |||
IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and locations for naming the dimensions of the array | |||
IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes | |||
IndNames <- c(1) #blank vector for index names | |||
#temp <- c(1) #temporary vector | |||
for (i in 1:length(op_baseInd)) { | |||
IndIdList[[i]] <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells | |||
IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector | |||
IndLengths[i] <- length(IndIdList[[i]]) #backup: op_baseGetLoc(op_baseInd[i], op_baseData)) | |||
for (j in 1:IndLengths[i]) { | |||
#temp[[i]][j] <- op_baseGetLocName(IndIdList[[i]][j]) | |||
IndList[[i]][j] <- paste(op_baseGetLocName(IndIdList[[i]][j])[1,1]) | |||
} | |||
#IndList[[i]] <- temp[[i]] | |||
} | |||
final_array <- rep(NA, prod(IndLengths)) | |||
dim(final_array) <- IndLengths | |||
names(IndList) <- IndNames | |||
dimnames(final_array) <- IndList | |||
#Cell population | |||
#IndLocNumbers <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length | |||
#for (i in 1:length(op_baseInd)) { | |||
# IndLocNumbers[[i]] <- 1:IndLengths[i] | |||
#} | |||
LocList <- c(1) #blank vector for locations per cell | |||
LocMap <- c(1) #blank vector for location to position mapping | |||
CellPosition <- c(1) #blank vector for cell position | |||
for (i in 1:length(op_baseData[,1])) { #for each cell | |||
LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations | |||
for (j in 1:length(LocList)) { #for each location per cell | |||
LocMap <- 1:IndLengths[j] #get available positions in given index, assumed correct order of indexes in LocList | |||
names(LocMap) <- dimnames(final_array)[[j]] #add names (locations) of positions in given index | |||
CellPosition[j] <- LocMap[LocList[j]] #match cell specific locations to positions in index | |||
} | |||
final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition | |||
} | |||
final_array #print/return the final array | |||
} | |||
====Usage==== | |||
== | |||
variable <- op_baseArray("<page identifier>") | |||
Revision as of 09:43, 20 August 2010
Moderator:Teemu R (see all) |
|
Upload data
|
Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R, this defines the functions, which can then be called in R.
Package dependencies
These packages are required for most of the code to work. Load these first.
- utils
- RODBC
Setup
Establishes the connection to the database. Copy paste first.
db <- odbcConnect("opasnet_base") #assuming "opasnet_base" is the defined DSN
Downloading data
At the moment very sensitive to glitches in the database (multiple locations for the same index for a single cell), and does not handle large quantities of data well. Ideally should work, testing needed.
Low level functions (mainly sql queries)
Copy paste these second.
#Get latest series id op_baseGetLatest <- function(ident) { series_list <- sqlQuery(db, paste(' SELECT actobj.series_id FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id WHERE obj.ident = "', ident, '"', sep = )) series_list[nrow(series_list),1] } #Get results (cell id, result) op_baseGetData <- function(series_id) { sqlQuery(db, paste(' SELECT cell.id, res.result FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id = res.cell_id WHERE actobj.series_id = ', series_id, sep = )) } #Not in use: Get indexes and locations per cell op_baseGetDims <- function(cell_id_list) { sqlQuery(db, paste(' SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = )) } #Experimental: Sort locations by index #tapply(dims_table[,3], dims_table[,5], paste) #Not in use: Get indexes from dims_table op_baseFindInd <- function(dims_table) { factor(op_baseDims[,4]) #could also use "obj_id_i"(obj.name for index) instead of 5 for column name } #Get indexes op_baseGetInd <- function (cell_id_list) { indexes <- sqlQuery(db, paste(' SELECT DISTINCT loc.obj_id_i FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = )) as.numeric(indexes[,1]) } #Get locations op_baseGetLoc <- function(index, cell_id_list) { locations <- sqlQuery(db, paste(' SELECT DISTINCT loc.id FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = )) as.numeric(locations[,1]) } #Get location names (will not work properly if different indexes have locations with similar name) op_baseGetLocNames <- function(index, cell_id_list) { locations <- sqlQuery(db, paste(' SELECT DISTINCT loc.location FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = )) as.numeric(locations[,1]) } #Get location name (simple) op_baseGetLocName <- function(loc_id) { sqlQuery(db, paste(' SELECT location FROM loc WHERE id = ', loc_id, sep = )) } #Get index name op_baseGetIndName <- function(ind_id) { sqlQuery(db, paste(' SELECT name FROM obj WHERE id = ', ind_id, sep = )) } #Get locations for a specific cell op_baseGetLocs <- function(cell_id) { sqlQuery(db, paste(' SELECT DISTINCT loc.location, loc.id FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id = ', cell_id, sep = )) #backup: 'AND loc.obj_id_i BETWEEN ', <<<INDEX>>>, ' AND ', <<<INDEX>>> }
High Level function
Copy paste this last.
op_baseArray <- function(page_ident) { #Basic data downloading op_baseLatest <- op_baseGetLatest(page_ident) op_baseData <- op_baseGetData(op_baseLatest) #cell,result #final_array <- op_baseData[,2] #ideal case (data in proper order) #op_baseDims <- op_baseGetDims(op_baseData) #op_baseIndexes <- op_baseFindInd(op_baseDims) op_baseInd <- op_baseGetInd(op_baseData) #Array structure building IndLengths <- 1:3 #blank vector for index lengths IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and locations for naming the dimensions of the array IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes IndNames <- c(1) #blank vector for index names #temp <- c(1) #temporary vector for (i in 1:length(op_baseInd)) { IndIdListi <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector IndLengths[i] <- length(IndIdListi) #backup: op_baseGetLoc(op_baseInd[i], op_baseData)) for (j in 1:IndLengths[i]) { #tempi[j] <- op_baseGetLocName(IndIdListi[j]) IndListi[j] <- paste(op_baseGetLocName(IndIdListi[j])[1,1]) } #IndListi <- tempi } final_array <- rep(NA, prod(IndLengths)) dim(final_array) <- IndLengths names(IndList) <- IndNames dimnames(final_array) <- IndList #Cell population #IndLocNumbers <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length #for (i in 1:length(op_baseInd)) { # IndLocNumbersi <- 1:IndLengths[i] #} LocList <- c(1) #blank vector for locations per cell LocMap <- c(1) #blank vector for location to position mapping CellPosition <- c(1) #blank vector for cell position for (i in 1:length(op_baseData[,1])) { #for each cell LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations for (j in 1:length(LocList)) { #for each location per cell LocMap <- 1:IndLengths[j] #get available positions in given index, assumed correct order of indexes in LocList names(LocMap) <- dimnames(final_array)j #add names (locations) of positions in given index CellPosition[j] <- LocMap[LocList[j]] #match cell specific locations to positions in index } final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition } final_array #print/return the final array }
Usage
variable <- op_baseArray("<page identifier>")