Opasnet Base Connection for R: Difference between revisions

From Opasnet
Jump to navigation Jump to search
(Created page with 'Category:Opasnet Category:Opasnet Base Category:R tool Category:SQL tool Category:Open assessment {{tool|moderator=Teemu R}} Code for R for the purpose of int…')
 
mNo edit summary
Line 30: Line 30:
  #Get latest series id
  #Get latest series id
  op_baseGetLatest <- function(ident) {
  op_baseGetLatest <- function(ident) {
  series_list <- sqlQuery(db, paste('SELECT actobj.series_id FROM obj LEFT JOIN actobj ON obj.id = actobj.obj_id WHERE obj.ident = "', ident, '"', sep = ''))
  series_list <- sqlQuery(db, paste('
SELECT actobj.series_id  
FROM obj  
LEFT JOIN actobj  
ON obj.id = actobj.obj_id  
WHERE obj.ident = "', ident, '"', sep = ''))
  series_list[nrow(series_list),1]
  series_list[nrow(series_list),1]
  }
  }
Line 36: Line 41:
  #Get results (cell id, result)
  #Get results (cell id, result)
  op_baseGetData <- function(series_id) {
  op_baseGetData <- function(series_id) {
  sqlQuery(db, paste('SELECT cell.id, res.result FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id = res.cell_id WHERE actobj.series_id = ', series_id, sep = ''))
  sqlQuery(db, paste('
SELECT cell.id, res.result  
FROM actobj  
LEFT JOIN cell  
ON actobj.id = cell.actobj_id  
LEFT JOIN res  
ON cell.id = res.cell_id  
WHERE actobj.series_id = ', series_id, sep = ''))
  }
  }
   
   
  #Not in use: Get indexes and locations per cell
  #Not in use: Get indexes and locations per cell
  op_baseGetDims <- function(cell_id_list) {
  op_baseGetDims <- function(cell_id_list) {
  sqlQuery(db, paste('SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  sqlQuery(db, paste('
SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name  
FROM loccell  
LEFT JOIN loc  
ON loccell.loc_id = loc.id  
LEFT JOIN obj  
ON loc.obj_id_i = obj.id  
WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  }
  }
   
   
Line 54: Line 73:
  #Get indexes
  #Get indexes
  op_baseGetInd <- function (cell_id_list) {
  op_baseGetInd <- function (cell_id_list) {
  indexes <- sqlQuery(db, paste('SELECT DISTINCT loc.obj_id_i FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  indexes <- sqlQuery(db, paste('
SELECT DISTINCT loc.obj_id_i  
FROM loccell  
LEFT JOIN loc  
ON loccell.loc_id = loc.id  
WHERE loccell.cell_id  
BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  as.numeric(indexes[,1])
  as.numeric(indexes[,1])
  }
  }
Line 60: Line 85:
  #Get locations
  #Get locations
  op_baseGetLoc <- function(index, cell_id_list) {
  op_baseGetLoc <- function(index, cell_id_list) {
  locations <- sqlQuery(db, paste('SELECT DISTINCT loc.id FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  locations <- sqlQuery(db, paste('
SELECT DISTINCT loc.id  
FROM loccell  
LEFT JOIN loc  
ON loccell.loc_id = loc.id  
WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  as.numeric(locations[,1])
  as.numeric(locations[,1])
  }
  }
Line 66: Line 96:
  #Get location names (will not work properly if different indexes have locations with similar name)
  #Get location names (will not work properly if different indexes have locations with similar name)
  op_baseGetLocNames <- function(index, cell_id_list) {
  op_baseGetLocNames <- function(index, cell_id_list) {
  locations <- sqlQuery(db, paste('SELECT DISTINCT loc.location FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  locations <- sqlQuery(db, paste('
SELECT DISTINCT loc.location  
FROM loccell  
LEFT JOIN loc  
ON loccell.loc_id = loc.id  
WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ''))
  as.numeric(locations[,1])
  as.numeric(locations[,1])
  }
  }
#Get location name (simple)
op_baseGetLocName <- function(loc_id) {
sqlQuery(db, paste('
SELECT location
FROM loc
WHERE id = ', loc_id, sep = ''))
}
#Get index name
op_baseGetIndName <- function(ind_id) {
sqlQuery(db, paste('
SELECT name
FROM obj
WHERE id = ', ind_id, sep = '')) }
#Get locations for a specific cell
op_baseGetLocs <- function(cell_id) {
sqlQuery(db, paste('
SELECT DISTINCT loc.location, loc.id
FROM loccell
LEFT JOIN loc
ON loccell.loc_id = loc.id
WHERE loccell.cell_id = ', cell_id, sep = '')) #backup: 'AND loc.obj_id_i BETWEEN ', <<<INDEX>>>, ' AND ', <<<INDEX>>>
}
===High Level function===


#Get location name (simple)
Copy paste this last.
op_baseGetLocName <- function(loc_id) {
sqlQuery(db, paste('SELECT location FROM loc WHERE id = ', loc_id, sep = ''))
}


#Get index name
op_baseArray <- function(page_ident) {
op_baseGetIndName <- function(ind_id) {
sqlQuery(db, paste('SELECT name FROM obj WHERE id = ', ind_id, sep = ''))
#Basic data downloading
}
op_baseLatest <- op_baseGetLatest(page_ident)
op_baseData <- op_baseGetData(op_baseLatest) #cell,result
#final_array <- op_baseData[,2] #ideal case (data in proper order)
#op_baseDims <- op_baseGetDims(op_baseData)
#op_baseIndexes <- op_baseFindInd(op_baseDims)
op_baseInd <- op_baseGetInd(op_baseData)
#Array structure building
IndLengths <- 1:3 #blank vector for index lengths
IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and locations for naming the dimensions of the array
IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes
IndNames <- c(1) #blank vector for index names
#temp <- c(1) #temporary vector
for (i in 1:length(op_baseInd)) {
IndIdList[[i]] <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells
IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector
IndLengths[i] <- length(IndIdList[[i]]) #backup: op_baseGetLoc(op_baseInd[i], op_baseData))
for (j in 1:IndLengths[i]) {
#temp[[i]][j] <- op_baseGetLocName(IndIdList[[i]][j])
IndList[[i]][j] <- paste(op_baseGetLocName(IndIdList[[i]][j])[1,1])
}
#IndList[[i]] <- temp[[i]]
}
final_array <- rep(NA, prod(IndLengths))
dim(final_array) <- IndLengths
names(IndList) <- IndNames
dimnames(final_array) <- IndList
#Cell population
#IndLocNumbers <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length
#for (i in 1:length(op_baseInd)) {
# IndLocNumbers[[i]] <- 1:IndLengths[i]
#}
LocList <- c(1) #blank vector for locations per cell
LocMap <- c(1) #blank vector for location to position mapping
CellPosition <- c(1) #blank vector for cell position
for (i in 1:length(op_baseData[,1])) { #for each cell
LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations
for (j in 1:length(LocList)) { #for each location per cell
LocMap <- 1:IndLengths[j] #get available positions in given index, assumed correct order of indexes in LocList
names(LocMap) <- dimnames(final_array)[[j]] #add names (locations) of positions in given index
CellPosition[j] <- LocMap[LocList[j]] #match cell specific locations to positions in index
}
final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition
}
final_array #print/return the final array
}


#Get locations for a specific cell
====Usage====
op_baseGetLocs <- function(cell_id) {
sqlQuery(db, paste('SELECT DISTINCT loc.location, loc.id,  FROM loccell LEFT JOIN loc ON loccell.loc_id = loc.id WHERE loccell.cell_id = ', cell_id, sep = '')) #backup: 'AND loc.obj_id_i BETWEEN ', <<<INDEX>>>, ' AND ', <<<INDEX>>>
}
 
===High Level function===


op_baseArray <- function(page_ident) {
variable <- op_baseArray("<page identifier>")
#Basic data downloading
op_baseLatest <- op_baseGetLatest(page_ident)
op_baseData <- op_baseGetData(op_baseLatest) #cell,result
#final_array <- op_baseData[,2] #ideal case (data in proper order)
#op_baseDims <- op_baseGetDims(op_baseData)
#op_baseIndexes <- op_baseFindInd(op_baseDims)
op_baseInd <- op_baseGetInd(op_baseData)
#Array structure building
IndLengths <- 1:3 #blank vector for index lengths
IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and locations for naming the dimensions of the array
IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes
IndNames <- c(1) #blank vector for index names
#temp <- c(1) #temporary vector
for (i in 1:length(op_baseInd)) {
IndIdList[[i]] <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells
IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector
IndLengths[i] <- length(IndIdList[[i]]) #backup: op_baseGetLoc(op_baseInd[i], op_baseData))
for (j in 1:IndLengths[i]) {
#temp[[i]][j] <- op_baseGetLocName(IndIdList[[i]][j])
IndList[[i]][j] <- paste(op_baseGetLocName(IndIdList[[i]][j])[1,1])
}
#IndList[[i]] <- temp[[i]]
}
final_array <- rep(NA, prod(IndLengths))
dim(final_array) <- IndLengths
names(IndList) <- IndNames
dimnames(final_array) <- IndList
#Cell population
#IndLocNumbers <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length
#for (i in 1:length(op_baseInd)) {
# IndLocNumbers[[i]] <- 1:IndLengths[i]
#}
LocList <- c(1) #blank vector for locations per cell
LocMap <- c(1) #blank vector for location to position mapping
CellPosition <- c(1) #blank vector for cell position
for (i in 1:length(op_baseData[,1])) { #for each cell
LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations
for (j in 1:length(LocList)) { #for each location per cell
LocMap <- 1:IndLengths[j] #get available positions in given index, assumed correct order of indexes in LocList
names(LocMap) <- dimnames(final_array)[[j]] #add names (locations) of positions in given index
CellPosition[j] <- LocMap[LocList[j]] #match cell specific locations to positions in index
}
final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition
}
final_array #print/return the final array
}

Revision as of 09:43, 20 August 2010


Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code to R, this defines the functions, which can then be called in R.

Package dependencies

These packages are required for most of the code to work. Load these first.

  1. utils
  2. RODBC

Setup

Establishes the connection to the database. Copy paste first.

db <- odbcConnect("opasnet_base") #assuming "opasnet_base" is the defined DSN

Downloading data

At the moment very sensitive to glitches in the database (multiple locations for the same index for a single cell), and does not handle large quantities of data well. Ideally should work, testing needed.

Low level functions (mainly sql queries)

Copy paste these second.

#Get latest series id
op_baseGetLatest <- function(ident) {
	series_list <- sqlQuery(db, paste('
		SELECT actobj.series_id 
		FROM obj 
		LEFT JOIN actobj 
		ON obj.id = actobj.obj_id 
		WHERE obj.ident = "', ident, '"', sep = ))
	series_list[nrow(series_list),1]
}

#Get results (cell id, result)
op_baseGetData <- function(series_id) {
	sqlQuery(db, paste('
		SELECT cell.id, res.result 
		FROM actobj 
		LEFT JOIN cell 
		ON actobj.id = cell.actobj_id 
		LEFT JOIN res 
		ON cell.id = res.cell_id 
		WHERE actobj.series_id = ', series_id, sep = ))
}

#Not in use: Get indexes and locations per cell
op_baseGetDims <- function(cell_id_list) {
	sqlQuery(db, paste('
		SELECT loccell.cell_id, loc.id, loc.location, loc.obj_id_i, obj.name 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		LEFT JOIN obj 
		ON loc.obj_id_i = obj.id 
		WHERE loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ))
}

#Experimental: Sort locations by index
#tapply(dims_table[,3], dims_table[,5], paste)

#Not in use: Get indexes from dims_table
op_baseFindInd <- function(dims_table) {
	factor(op_baseDims[,4]) #could also use "obj_id_i"(obj.name for index) instead of 5 for column name
}

#Get indexes
op_baseGetInd <- function (cell_id_list) {
	indexes <- sqlQuery(db, paste('
		SELECT DISTINCT loc.obj_id_i 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loccell.cell_id 
		BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ))
	as.numeric(indexes[,1])
}

#Get locations
op_baseGetLoc <- function(index, cell_id_list) {
	locations <- sqlQuery(db, paste('
		SELECT DISTINCT loc.id 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ))
	as.numeric(locations[,1])
}

#Get location names (will not work properly if different indexes have locations with similar name)
op_baseGetLocNames <- function(index, cell_id_list) {
	locations <- sqlQuery(db, paste('
		SELECT DISTINCT loc.location 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loc.obj_id_i = ', index, ' AND loccell.cell_id BETWEEN ', cell_id_list[1,1], ' AND ', cell_id_list[nrow(cell_id_list),1], sep = ))
	as.numeric(locations[,1])
}

#Get location name (simple)
op_baseGetLocName <- function(loc_id) {
	sqlQuery(db, paste('
		SELECT location 
		FROM loc 
		WHERE id = ', loc_id, sep = ))
}

#Get index name
op_baseGetIndName <- function(ind_id) {
	sqlQuery(db, paste('
		SELECT name 
		FROM obj 
		WHERE id = ', ind_id, sep = )) }

#Get locations for a specific cell
op_baseGetLocs <- function(cell_id) {
	sqlQuery(db, paste('
		SELECT DISTINCT loc.location, loc.id 
		FROM loccell 
		LEFT JOIN loc 
		ON loccell.loc_id = loc.id 
		WHERE loccell.cell_id = ', cell_id, sep = )) #backup: 'AND loc.obj_id_i BETWEEN ', <<<INDEX>>>, ' AND ', <<<INDEX>>>
}

High Level function

Copy paste this last.

op_baseArray <- function(page_ident) {
	
	#Basic data downloading
	
	op_baseLatest <- op_baseGetLatest(page_ident)
	op_baseData <- op_baseGetData(op_baseLatest) #cell,result
	#final_array <- op_baseData[,2] #ideal case (data in proper order)
	#op_baseDims <- op_baseGetDims(op_baseData)
	#op_baseIndexes <- op_baseFindInd(op_baseDims)
	op_baseInd <- op_baseGetInd(op_baseData)
	
	#Array structure building
	
	IndLengths <- 1:3 #blank vector for index lengths
	IndList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for indexes and locations for naming the dimensions of the array
	IndIdList <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length for ids of indexes
	IndNames <- c(1) #blank vector for index names
	#temp <- c(1) #temporary vector
	for (i in 1:length(op_baseInd)) {
		IndIdListi <- op_baseGetLoc(op_baseInd[i], op_baseData) #fetch location ids in index to a list, assumed every cell has locations for all unique index id values in loc.obj_id_i in the range of cells
		IndNames[i] <- paste(op_baseGetIndName(op_baseInd[i])[1,1]) #fetch index name to a vector
		IndLengths[i] <- length(IndIdListi) #backup: op_baseGetLoc(op_baseInd[i], op_baseData)) 
		for (j in 1:IndLengths[i]) {
			#tempi[j] <- op_baseGetLocName(IndIdListi[j])
			IndListi[j] <- paste(op_baseGetLocName(IndIdListi[j])[1,1])
		}
		#IndListi <- tempi
	}
	final_array <- rep(NA, prod(IndLengths))
	dim(final_array) <- IndLengths
	names(IndList) <- IndNames
	dimnames(final_array) <- IndList
	
	#Cell population
	
	#IndLocNumbers <- rep(vector("list", 1), length(op_baseInd)) #blank list of appropriate length
	#for (i in 1:length(op_baseInd)) {
	#	IndLocNumbersi <- 1:IndLengths[i]
	#}
	LocList <- c(1) #blank vector for locations per cell
	LocMap <- c(1) #blank vector for location to position mapping
	CellPosition <- c(1) #blank vector for cell position
	for (i in 1:length(op_baseData[,1])) { #for each cell
		LocList <- paste(op_baseGetLocs(op_baseData[i,1])[,1]) #get list of locations
		for (j in 1:length(LocList)) { #for each location per cell
			LocMap <- 1:IndLengths[j] #get available positions in given index, assumed correct order of indexes in LocList
			names(LocMap) <- dimnames(final_array)j #add names (locations) of positions in given index
			CellPosition[j] <- LocMap[LocList[j]] #match cell specific locations to positions in index
		}
		final_array[CellPosition] <- op_baseData[i,2] #insert result in array in position determined by CellPosition
	}
	final_array #print/return the final array
}

Usage

variable <- op_baseArray("<page identifier>")