Opasnet Base Connection for R: Difference between revisions

From Opasnet
Jump to navigation Jump to search
(Improved dataframe to array conversion to allow any order of columns)
Line 18: Line 18:
===Functions===
===Functions===


  op_baseGetData <- function(dsn, ident, include = NULL, exclude = NULL, series_id = NULL) {
  <nowiki>
db <- odbcConnect(dsn)
op_baseGetData <- function(dsn, ident, include = NULL, exclude = NULL, series_id = NULL) {
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]</nowiki>
db <- odbcConnect(dsn)
if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id,  
obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]
<nowiki>' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}</nowiki>
if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id,  
sliced <- FALSE
' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}
locations <- NULL
sliced <- FALSE
x <- 1
locations <- NULL
basequery <- paste('SELECT loccell.cell_id FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN',  
x <- 1
' loccell ON cell.id = loccell.cell_id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ',  
basequery <- paste('SELECT loccell.cell_id FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN',  
<nowiki>series_id, ' AND loccell.loc_id IN(', sep = '')</nowiki>
' loccell ON cell.id = loccell.cell_id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ',  
if (length(include) != 0) {
series_id, ' AND loccell.loc_id IN(', sep = '')
sliced <- TRUE
if (length(include) != 0) {
locations[x] <- paste("IN(", basequery, paste(include, collapse = ","), ")", sep = "")
sliced <- TRUE
x <- x + 1
locations[x] <- paste("IN(", basequery, paste(include, collapse = ","), ")", sep = "")
}
x <- x + 1
if (length(exclude) != 0) {
}
sliced <- TRUE
if (length(exclude) != 0) {
locations[x] <- paste("NOT IN(", basequery, paste(exclude, collapse = ","), ")", sep = "")
sliced <- TRUE
}
locations[x] <- paste("NOT IN(", basequery, paste(exclude, collapse = ","), ")", sep = "")
if (sliced == FALSE) {
}
Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result',  
if (sliced == FALSE) {
' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =',  
Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result',  
' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =',  
' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id,  
' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
<nowiki>' AND actobj.series_id = ', series_id, sep = '')) } else {</nowiki>
' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id,  
Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result',  
' AND actobj.series_id = ', series_id, sep = '')) } else {
' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =',  
Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result',  
' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =',  
' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id,  
' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
' AND actobj.series_id = ', series_id, ' AND (cell.id ', paste(locations, collapse = ') AND cell.id '),  
' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id,  
<nowiki>'))', sep = ''))</nowiki>
' AND actobj.series_id = ', series_id, ' AND (cell.id ', paste(locations, collapse = ') AND cell.id '),  
}
'))', sep = ''))
odbcClose(db)
}
Data <- Data[order(Data[,1], Data[,2], Data[,3]),]
odbcClose(db)
nind <- length(levels(Data[,3]))
Data <- Data[order(Data[,1], Data[,2], Data[,3]),]
nres <- nrow(Data)/nind
nind <- length(levels(Data[,3]))
dataframe <- Data[1:nres*nind, c(1,2)]
nres <- nrow(Data)/nind
for (i in nind:1) {
dataframe <- Data[1:nres*nind, c(1,2)]
dataframe[,2 + nind - i + 1] <- Data[1:nres*nind - i + 1, 4]
for (i in nind:1) {
colnames(dataframe)[2 + nind - i + 1] <- as.character(Data[nind - i + 1, 3])
dataframe[,2 + nind - i + 1] <- Data[1:nres*nind - i + 1, 4]
}
colnames(dataframe)[2 + nind - i + 1] <- as.character(Data[nind - i + 1, 3])
dataframe[,3 + nind] <- Data[1:nres*nind, 5]
}
colnames(dataframe)[3 + nind] <- "Result"
dataframe[,3 + nind] <- Data[1:nres*nind, 5]
rownames(dataframe) <- 1:nres
colnames(dataframe)[3 + nind] <- "Result"
dataframe
rownames(dataframe) <- 1:nres
}
dataframe
}</nowiki>


====Usage====
====Usage====
Line 84: Line 85:
====Function====
====Function====


  op_baseGetLocs <- function(dsn, ident, series_id = NULL) {
  <nowiki>
db <- odbcConnect(dsn)
op_baseGetLocs <- function(dsn, ident, series_id = NULL) {
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]</nowiki>
db <- odbcConnect(dsn)
if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id,  
obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]
<nowiki>' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}</nowiki>
if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id,  
Locs <- sqlQuery(db, paste("SELECT obj.ident AS ind, loc.location AS loc, loc.id AS loc_id",  
' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}
" FROM actobj LEFT JOIN actloc ON actobj.id = actloc.actobj_id LEFT JOIN loc ON actloc.loc_id = loc.id",  
Locs <- sqlQuery(db, paste("SELECT obj.ident AS ind, loc.location AS loc, loc.id AS loc_id",  
" LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ", obj_id, " AND actobj.series_id = ",  
" FROM actobj LEFT JOIN actloc ON actobj.id = actloc.actobj_id LEFT JOIN loc ON actloc.loc_id = loc.id",  
series_id, sep = ""))
" LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ", obj_id, " AND actobj.series_id = ",  
odbcClose(db)
series_id, sep = ""))
Locs <- Locs[order(Locs[,1]),]
odbcClose(db)
rownames(Locs) <- 1:nrow(Locs)
Locs <- Locs[order(Locs[,1]),]
Locs
rownames(Locs) <- 1:nrow(Locs)
}
Locs
}</nowiki>


*Returns all indexes and locations and their ids in a table of format: ind, loc, loc_id.
*Returns all indexes and locations and their ids in a table of format: ind, loc, loc_id.
Line 105: Line 107:
===Functions===
===Functions===


  DataframeToArray <- function(dataframe) {
  <nowiki>
DimNames <- rep(vector("list", 1), ncol(dataframe) - 2)
DataframeToArray <- function(dataframe, rescol = NULL) {
names(DimNames) <- c(colnames(dataframe)[2:(ncol(dataframe) - 1)])
ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE])
nind <- length(names(DimNames))
if (length(ColNames[(ColNames == "obs")])>0) {if(length(levels(factor(dataframe[,"obs"]))) == 1) ColNames <- ColNames[(ColNames == "obs"
indlengths <- 0
) == FALSE]}
for (i in 1:nind) {
if (length(rescol)==0) {
<nowiki>DimNames[[i]] <- levels(factor(dataframe[,1 + i]))</nowiki>
rescol <- colnames(dataframe) == "Freq"
<nowiki>indlengths[i] <- length(DimNames[[i]])</nowiki>
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else {
}
rescol <- colnames(dataframe) == "Result"
array <- rep(NA, prod(indlengths))
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else {
dim(array) <- indlengths
rescol <- colnames(dataframe) == "result"
dimnames(array) <- DimNames
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result"
array[as.matrix(format(dataframe[,c(1:nind + 1)], trim = TRUE))] <- dataframe[,ncol(dataframe)]
}
array
}
}
} else {ColNames <- ColNames[(ColNames == rescol) == FALSE]}
ColNames <- ColNames[(ColNames == "Result") == FALSE]
ColNames <- ColNames[(ColNames == "result") == FALSE]
ColNames <- ColNames[(ColNames == "Freq") == FALSE]
nind <- length(ColNames)
DimNames <- rep(vector("list", 1), nind)
names(DimNames) <- ColNames
indlengths <- 0
for (i in 1:nind) {
DimNames[[i]] <- levels(factor(dataframe[,ColNames[i]]))
indlengths[i] <- length(DimNames[[i]])
}
array <- rep(NA, prod(indlengths))
dim(array) <- indlengths
dimnames(array) <- DimNames
array[as.matrix(format(dataframe[,ColNames], trim = TRUE))] <- dataframe[,rescol]
array
}</nowiki>


{{attack|# |This does not work for probabilistic data. ''as.matrix(dataframe[,c(1:nind + 1)])'' gives values like "&nbsp;&nbsp;&nbsp;1", "&nbsp;&nbsp;&nbsp;2" for obs, but the array entries for obs are "1", "2" etc. They do not match.|--[[User:Jouni|Jouni]] 07:00, 4 November 2010 (UTC)}}
{{attack|# |This does not work for probabilistic data. ''as.matrix(dataframe[,c(1:nind + 1)])'' gives values like "&nbsp;&nbsp;&nbsp;1", "&nbsp;&nbsp;&nbsp;2" for obs, but the array entries for obs are "1", "2" etc. They do not match.|--[[User:Jouni|Jouni]] 07:00, 4 November 2010 (UTC)}}
{{defend|# |<nowiki>Fixed: ''array[as.matrix(format(dataframe[,c(1:nind + 1)], trim = TRUE))] <- dataframe[,ncol(dataframe)]''</nowiki>|--[[User:Teemu R|Teemu R]] 08:01, 4 November 2010 (UTC)}}
{{defend|# |<nowiki>Fixed: ''array[as.matrix(format(dataframe[,c(1:nind + 1)], trim = TRUE))] <- dataframe[,ncol(dataframe)]''</nowiki>|--[[User:Teemu R|Teemu R]] 08:01, 4 November 2010 (UTC)}}


====Usage====
====Usage====


  variable2 <- DataframeToArray(variable1)
  variable2 <- DataframeToArray(variable1, rescol = NULL)


*variable1 must be in similar format as the result when downloading.
*variable1 must be in similar format as the result when downloading.
**first column (id) is ignored
*Columns named "id" and various versions of "Result" are ignored for dimension creation.
*variable1 <- function(variable1) is allowed in R
**"obs" column will also be ignored if there's only one.
*The column containing the values may be defined in the parameters, otherwise it is assumed to be either "Freq", "Result" or "result" in that order.


===Other useful stuff===
===Other useful stuff===
Line 158: Line 179:
===Functions===
===Functions===


  op_baseWrite <- function(dsn, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL, acttype = NULL, sort = TRUE) {
  <nowiki>
#Open database connection
op_baseWrite <- function(dsn, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL, acttype = NULL, sort = TRUE) {
db <- odbcConnect(dsn)
#Open database connection
db <- odbcConnect(dsn)
#Coerce input into a data frame if it isn't one already; get rid of empty cells
if (is.array(input)) dataframe <- as.data.frame(as.table(input)) else dataframe <- input
#Coerce input into a data frame if it isn't one already; get rid of empty cells
rescol <- colnames(dataframe) == "Freq"
if (is.array(input)) dataframe <- as.data.frame(as.table(input)) else dataframe <- input
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else {
rescol <- colnames(dataframe) == "Freq"
rescol <- colnames(dataframe) == "Result"
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else {
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else {
rescol <- colnames(dataframe) == "Result"
rescol <- colnames(dataframe) == "result"
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else {
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result"
rescol <- colnames(dataframe) == "result"
}
if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result"
}
}
values <- dataframe[,rescol]
}
cond <- (is.na(values) == FALSE)
dataframe <- dataframe[is.na(dataframe[,rescol]) == FALSE,]
dataframe <- dataframe[cond,]
#Add page to database (if it doesn't already exist)
#Add page to database (if it doesn't already exist)
if (is.null(ident)==TRUE) if (interactive()) ident <- readline(paste("What is the identifier of this object?",  
if (is.null(ident)==TRUE) if (interactive()) ident <- readline(paste("What is the identifier of this object?",  
"\n", sep = "")) else stop("indentifier of object no given")
"\n", sep = "")) else stop("indentifier of object no given")
wiki_name <- substring(ident, 1, 5)
wiki_name <- substring(ident, 1, 5)
if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {
if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {
wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]</nowiki>
if (is.na(obj_id)==TRUE) {
if (is.na(obj_id)==TRUE) {
if (is.null(name)==TRUE) if (interactive()) name <- readline(paste("What is the name of this object?",  
if (is.null(name)==TRUE) if (interactive()) name <- readline(paste("What is the name of this object?",  
"\n", sep = "")) else stop("object name not given")
"\n", sep = "")) else stop("object name not given")
if (is.null(unit)==TRUE) if (interactive()) unit <- readline(paste("What is the unit of this object?",  
if (is.null(unit)==TRUE) if (interactive()) unit <- readline(paste("What is the unit of this object?",  
"\n", sep = "")) else stop("unit not given")
"\n", sep = "")) else stop("unit not given")
if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- readline(paste("What type of object is",  
if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- readline(paste("What type of object is",  
" this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, paste("SELECT objtype",
" this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, paste("SELECT objtype",
" FROM objtype", sep = ""))[,1], sep = " - "), collapse = ", "), "\n", collapse = " ")) else {
" FROM objtype", sep = ""))[,1], sep = " - "), collapse = ", "), "\n", collapse = " ")) else {
stop("object type not given")}
stop("object type not given")}
sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident,  
sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident,  
name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))
<nowiki>name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))</nowiki>
obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]
<nowiki>obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]</nowiki>
}
}
#Write act and actobj
#Write act and actobj
if (is.null(who)==TRUE) if (interactive()) {who <- readline(paste("What is the name of the uploader?", "\n", sep = ""))  
if (is.null(who)==TRUE) if (interactive()) {who <- readline(paste("What is the name of the uploader?", "\n", sep = ""))  
} else stop("uploader name not given")
} else stop("uploader name not given")
series_id <- sqlQuery(db, paste("SELECT series_id FROM actobj WHERE obj_id = ", obj_id, " ORDER BY series_id DESC LIMIT 1",  
series_id <- sqlQuery(db, paste("SELECT series_id FROM actobj WHERE obj_id = ", obj_id, " ORDER BY series_id DESC LIMIT 1",  
sep = ""))[1,1]
sep = ""))[1,1]
if (is.na(series_id)==FALSE) {if (is.null(acttype)==TRUE) {if (interactive()) {acttype <- readline(paste("What type of upload",  
if (is.na(series_id)==FALSE) {if (is.null(acttype)==TRUE) {if (interactive()) {acttype <- readline(paste("What type of upload",  
" is this? 4 - new data to replace any existing, 5 - new data to be appended to existing data (must have the same",  
" is this? 4 - new data to replace any existing, 5 - new data to be appended to existing data (must have the same",  
" indices).", "\n", sep = ""))  
" indices).", "\n", sep = ""))  
} else acttype <- 4  
} else acttype <- 4  
}} else acttype <- 4
}} else acttype <- 4
if (acttype != 4 & acttype != 5) stop ("proper acttype not given")
if (acttype != 4 & acttype != 5) stop ("proper acttype not given")
sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (', acttype, ',"', who, '","R upload")', sep = ''))
<nowiki>sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (', acttype, ',"', who, '","R upload")', sep = ''))</nowiki>
act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
if (acttype == 4) series_id <- act_id
if (acttype == 4) series_id <- act_id
sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, series_id, sep = ','),  
sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, series_id, sep = ','),  
')', sep = ''))
<nowiki>')', sep = ''))</nowiki>
actobj_id <- sqlQuery(db, paste('SELECT id FROM actobj WHERE act_id = ', act_id, sep = ''))[1,1]
<nowiki>actobj_id <- sqlQuery(db, paste('SELECT id FROM actobj WHERE act_id = ', act_id, sep = ''))[1,1]</nowiki>
#Write indexes
#Write indexes
ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE])
ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE])
ColNames <- ColNames[(ColNames == "obs") == FALSE]
ColNames <- ColNames[(ColNames == "obs") == FALSE]
ColNames <- ColNames[(ColNames == "Result") == FALSE]
ColNames <- ColNames[(ColNames == "Result") == FALSE]
ColNames <- ColNames[(ColNames == "result") == FALSE]
ColNames <- ColNames[(ColNames == "result") == FALSE]
ColNames <- ColNames[(ColNames == "Freq") == FALSE]
ColNames <- ColNames[(ColNames == "Freq") == FALSE]
nind <- length(ColNames)
nind <- length(ColNames)
DimNames <- rep(vector("list", 1), nind)
DimNames <- rep(vector("list", 1), nind)
names(DimNames) <- ColNames
names(DimNames) <- ColNames
indlengths <- 0
indlengths <- 0
for (i in 1:nind) {
for (i in 1:nind) {
DimNames[[i]] <- levels(factor(dataframe[, ColNames[i]]))
<nowiki>DimNames[[i]] <- levels(factor(dataframe[, ColNames[i]]))</nowiki>
indlengths[i] <- length(DimNames[[i]])
<nowiki>indlengths[i] <- length(DimNames[[i]])</nowiki>
}
}
for (i in 1:length(ColNames)) {
for (i in 1:length(ColNames)) {
sqlQuery(db, paste('INSERT IGNORE INTO obj (ident, name, objtype_id) VALUES ("', gsub(' ', '_', ColNames[i]), '","',  
<nowiki>sqlQuery(db, paste('INSERT IGNORE INTO obj (ident, name, objtype_id) VALUES ("', gsub(' ', '_', ColNames[i]), '","', </nowiki>
ColNames[i], '", 6)', sep = ''))
<nowiki>ColNames[i], '", 6)', sep = ''))</nowiki>
}
}
IndIds <- sqlQuery(db, paste('SELECT id, ident FROM obj WHERE ident IN("', paste(gsub(" ", "_", ColNames),  
IndIds <- sqlQuery(db, paste('SELECT id, ident FROM obj WHERE ident IN("', paste(gsub(" ", "_", ColNames),  
collapse = '","'), '")', sep = ''))
<nowiki>collapse = '","'), '")', sep = ''))</nowiki>
DimIds <- DimNames
DimIds <- DimNames
DimN <- 1:nind
DimN <- 1:nind
names(DimN) <- tolower(gsub(" ", "_", ColNames))
names(DimN) <- tolower(gsub(" ", "_", ColNames))
for (i in 1:nrow(IndIds)) {
for (i in 1:nrow(IndIds)) {
names(DimIds)[DimN[tolower(IndIds[i,2])]] <- IndIds[i, 1]
names(DimIds)[DimN[tolower(IndIds[i,2])]] <- IndIds[i, 1]
}
}
#Write locations
#Write locations
y <- 1
y <- 1
LocNames <- as.data.frame(matrix(rep(NA, 2*sum(indlengths)), sum(indlengths), 2))
LocNames <- as.data.frame(matrix(rep(NA, 2*sum(indlengths)), sum(indlengths), 2))
for (i in 1:nind) {
for (i in 1:nind) {
LocNames[y:(y + indlengths[i] - 1),1:2] <- matrix(c(rep(names(DimIds)[i], indlengths[i]),  
LocNames[y:(y + indlengths[i] - 1),1:2] <- matrix(c(rep(names(DimIds)[i], indlengths[i]),  
DimNames[[i]]), indlengths[i], 2)
<nowiki>DimNames[[i]]), indlengths[i], 2)</nowiki>
y <- y + indlengths[i]
y <- y + indlengths[i]
}
}
for (i in 1:nrow(LocNames)) {
for (i in 1:nrow(LocNames)) {
sqlQuery(db, paste('INSERT IGNORE INTO loc (obj_id_i, location) VALUES (', LocNames[i,1], ',"', LocNames[i,2], '")',  
<nowiki>sqlQuery(db, paste('INSERT IGNORE INTO loc (obj_id_i, location) VALUES (', LocNames[i,1], ',"', LocNames[i,2], '")', </nowiki>
sep = ''))
<nowiki>sep = ''))</nowiki>
}
}
LocIds <- data.frame(matrix(NA, nrow(LocNames), 3))
LocIds <- data.frame(matrix(NA, nrow(LocNames), 3))
for (i in 1:nind) {
for (i in 1:nind) {
LocIds[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i]),1:3] <- format(sqlQuery(db, paste('SELECT id, obj_id_i',  
LocIds[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i]),1:3] <- format(sqlQuery(db, paste('SELECT id, obj_id_i, location',  
', location FROM loc WHERE location IN("', paste(LocNames[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i]  
' FROM loc WHERE location IN("', paste(LocNames[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i]), 2],  
), 2], collapse = '","'), '") AND obj_id_i IN(', paste(names(DimIds)[i], collapse = ','), ')', sep = '')))
<nowiki>collapse = '","'), '") AND obj_id_i IN(', paste(names(DimIds)[i], collapse = ','), ')', sep = '')))</nowiki>
}
}
LocMap <- NA
LocMap <- NA
y <- 1
y <- 1
for (i in 1:nind) {
for (i in 1:nind) {
LocMap <- LocIds[LocIds[,2]==names(DimIds)[i], 1]
LocMap <- LocIds[LocIds[,2]==names(DimIds)[i], 1]
names(LocMap) <- tolower(LocIds[LocIds[,2]==names(DimIds)[i], 3])
names(LocMap) <- tolower(LocIds[LocIds[,2]==names(DimIds)[i], 3])
DimIds[[i]] <- LocMap[tolower(DimNames[[i]])]
<nowiki>DimIds[[i]] <- LocMap[tolower(DimNames[[i]])]</nowiki>
}
}
#Writing actloc
#Writing actloc
sqlQuery(db, paste("INSERT INTO actloc (actobj_id, loc_id) VALUES (", paste(actobj_id, LocIds[,1], sep = ",",  
sqlQuery(db, paste("INSERT INTO actloc (actobj_id, loc_id) VALUES (", paste(actobj_id, LocIds[,1], sep = ",",  
collapse = "),("), ")", sep = ""))
collapse = "),("), ")", sep = ""))
#Changing location names in table into ids
#Changing location names in table into ids
for (i in 1:nind) {
for (i in 1:nind) {
dataframe[,ColNames[i]] <- factor(dataframe[,ColNames[i]])
dataframe[,ColNames[i]] <- factor(dataframe[,ColNames[i]])
levels(dataframe[,ColNames[i]]) <- DimIds[[i]]
<nowiki>levels(dataframe[,ColNames[i]]) <- DimIds[[i]]</nowiki>
}
}
#A hidden parameter for adjusting query packet sizes, the higher the faster, though crash becomes likelier
#A hidden parameter for adjusting query packet sizes, the higher the faster, though crash becomes likelier
maxrows <- 50000
maxrows <- 50000
#Writing cell
#Writing cell
obscol <- colnames(dataframe) == "obs"
obscol <- colnames(dataframe) == "obs"
if (length(obscol[obscol==TRUE]) >= 1) {
if (length(obscol[obscol==TRUE]) >= 1) {
obscol <- TRUE
obscol <- TRUE
n <- length(levels(factor(dataframe[,"obs"])))} else {
n <- length(levels(factor(dataframe[,"obs"])))} else {
obscol <- FALSE
obscol <- FALSE
n <- 1
n <- 1
}
}
if (obscol) if (sort) {
if (obscol) if (sort) {
dataframe <- dataframe[order(dataframe[,"obs"]),]
dataframe <- dataframe[order(dataframe[,"obs"]),]
for (i in 1:nind) {
for (i in 1:nind) {
dataframe <- dataframe[order(dataframe[,ColNames[i]]),]
dataframe <- dataframe[order(dataframe[,ColNames[i]]),]
}
}
}
}
ncell <- nrow(dataframe)/n
ncell <- nrow(dataframe)/n
cellQuery <- NA
cellQuery <- NA
cellQuery[1:ncell*8-7] <- actobj_id
cellQuery[1:ncell*8-7] <- actobj_id
cellQuery[1:ncell*8-6] <- ","
cellQuery[1:ncell*8-6] <- ","
if (n == 1) {
if (n == 1) {
cellQuery[1:ncell*8-5] <- dataframe[,rescol]
cellQuery[1:ncell*8-5] <- dataframe[,rescol]
cellQuery[1:ncell*8-3] <- 0} else {
cellQuery[1:ncell*8-3] <- 0} else {
cellQuery[1:ncell*8-5] <- apply(matrix(dataframe[,rescol], n, ncell), 2, mean)
cellQuery[1:ncell*8-5] <- apply(matrix(dataframe[,rescol], n, ncell), 2, mean)
cellQuery[1:ncell*8-3] <- sd(matrix(dataframe[,rescol], n, ncell))
cellQuery[1:ncell*8-3] <- sd(matrix(dataframe[,rescol], n, ncell))
}
}
cellQuery[1:ncell*8-4] <- ","
cellQuery[1:ncell*8-4] <- ","
cellQuery[1:ncell*8-2] <- ","
cellQuery[1:ncell*8-2] <- ","
cellQuery[1:ncell*8-1] <- n
cellQuery[1:ncell*8-1] <- n
cellQuery[1:(ncell-1)*8] <- "),("
cellQuery[1:(ncell-1)*8] <- "),("
if (ncell >= maxrows) {
if (ncell >= maxrows) {
for (i in 1:(ncell%/%maxrows)) {
for (i in 1:(ncell%/%maxrows)) {
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[
sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[
((i-1)*8*maxrows+1):(i*8*maxrows-1)], collapse = ''), ')', sep = ''))
<nowiki>((i-1)*8*maxrows+1):(i*8*maxrows-1)], collapse = ''), ')', sep = ''))</nowiki>
}
}
}
}
if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[(ncell%/%
if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[(ncell%/%
maxrows*8*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = ''))
<nowiki>maxrows*8*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = ''))</nowiki>
#Writing res
#Writing res
cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID", sep = ""))[,1]
cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID", sep = ""))[,1]
if (ncell != length(cell_id)) stop("Number of cells written does not match with expected value")
if (ncell != length(cell_id)) stop("Number of cells written does not match with expected value")
resQuery <- NA
resQuery <- NA
resQuery[1:nrow(dataframe)*6-5] <- rep(cell_id, each = n)
resQuery[1:nrow(dataframe)*6-5] <- rep(cell_id, each = n)
resQuery[1:nrow(dataframe)*6-4] <- ","
resQuery[1:nrow(dataframe)*6-4] <- ","
if (n==1) resQuery[1:nrow(dataframe)*6-3] <- "0" else resQuery[1:nrow(dataframe)*6-3] <- dataframe[,"obs"]
if (n==1) resQuery[1:nrow(dataframe)*6-3] <- "0" else resQuery[1:nrow(dataframe)*6-3] <- dataframe[,"obs"]
resQuery[1:nrow(dataframe)*6-2] <- ","
resQuery[1:nrow(dataframe)*6-2] <- ","
resQuery[1:nrow(dataframe)*6-1] <- dataframe[,rescol]
resQuery[1:nrow(dataframe)*6-1] <- dataframe[,rescol]
resQuery[1:(nrow(dataframe)-1)*6] <- "),("
resQuery[1:(nrow(dataframe)-1)*6] <- "),("
if (nrow(dataframe) >= maxrows) {
if (nrow(dataframe) >= maxrows) {
for (i in 1:(nrow(dataframe)%/%maxrows)) {
for (i in 1:(nrow(dataframe)%/%maxrows)) {
sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[((i-1)
sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[((i-1)
*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = ''))
<nowiki>*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = ''))</nowiki>
}
}
}
}
if (nrow(dataframe)%%maxrows != 0) sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[
if (nrow(dataframe)%%maxrows != 0) sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[
(nrow(dataframe)%/%maxrows*6*maxrows+1):length(resQuery)], collapse = ''), ')', sep = ''))
<nowiki>(nrow(dataframe)%/%maxrows*6*maxrows+1):length(resQuery)], collapse = ''), ')', sep = ''))</nowiki>
#Writing loccell
#Writing loccell
locidmatrix <- rep(NA, nind*ncell)
locidmatrix <- rep(NA, nind*ncell)
dim(locidmatrix) <- c(nind,ncell)
dim(locidmatrix) <- c(nind,ncell)
for (i in 1:nind) {
for (i in 1:nind) {
locidmatrix[i,] <- as.character(dataframe[1:ncell*n - n + 1,ColNames[i]])
locidmatrix[i,] <- as.character(dataframe[1:ncell*n - n + 1,ColNames[i]])
}
}
loccellQuery <- rep(NA, 4*nind*ncell)
loccellQuery <- rep(NA, 4*nind*ncell)
dim(loccellQuery) <- c(4*nind, ncell)
dim(loccellQuery) <- c(4*nind, ncell)
loccellQuery[1:nind*4-3, 1:ncell] <- rep(cell_id, each = nind)
loccellQuery[1:nind*4-3, 1:ncell] <- rep(cell_id, each = nind)
loccellQuery[1:nind*4-2, 1:ncell] <- ","
loccellQuery[1:nind*4-2, 1:ncell] <- ","
loccellQuery[1:nind*4-1, 1:ncell] <- locidmatrix
loccellQuery[1:nind*4-1, 1:ncell] <- locidmatrix
loccellQuery[1:nind*4, 1:ncell] <- "),("
loccellQuery[1:nind*4, 1:ncell] <- "),("
loccellQuery[nind*4, ncell] <- ""
loccellQuery[nind*4, ncell] <- ""
if (ncell >= maxrows) {
if (ncell >= maxrows) {
for (i in 1:(ncell%/%maxrows)) {
for (i in 1:(ncell%/%maxrows)) {
loccellQuery[nind*4, i*maxrows] <- ""
loccellQuery[nind*4, i*maxrows] <- ""
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = ''))
<nowiki>((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = ''))</nowiki>
}
}
}
}
if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
(ncell%/%maxrows*maxrows+1):ncell], collapse = ''), ')', sep = ''))
<nowiki>(ncell%/%maxrows*maxrows+1):ncell], collapse = ''), ')', sep = ''))</nowiki>
#Close database connection
#Close database connection
odbcClose(db)
odbcClose(db)
}</nowiki>
}


====Usage====
====Usage====

Revision as of 11:48, 17 November 2010


Code for R for the purpose of interacting with the Opasnet Base is collected on this page. To use it, copy paste the code you need to the R console; this defines the functions, after which they can be called for in that R session.

Package dependencies

These packages are required for most of the code to work. To install: from the top bar menu Packages>Install. To load: copy-paste.

library(utils)
library(RODBC)

Downloading data

Functions

op_baseGetData <- function(dsn, ident, include = NULL, exclude = NULL, series_id = NULL) {
	db <- odbcConnect(dsn)
	obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]
	if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id, 
		' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}
	sliced <- FALSE
	locations <- NULL
	x <- 1
	basequery <- paste('SELECT loccell.cell_id FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN', 
			' loccell ON cell.id = loccell.cell_id WHERE actobj.obj_id = ', obj_id, ' AND actobj.series_id = ', 
			series_id, ' AND loccell.loc_id IN(', sep = '')
	if (length(include) != 0) {
		sliced <- TRUE
		locations[x] <- paste("IN(", basequery, paste(include, collapse = ","), ")", sep = "")
		x <- x + 1
	}
	if (length(exclude) != 0) {
		sliced <- TRUE
		locations[x] <- paste("NOT IN(", basequery, paste(exclude, collapse = ","), ")", sep = "")
	}
	if (sliced == FALSE) {
		Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result', 
			' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =', 
			' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
			' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id, 
			' AND actobj.series_id = ', series_id, sep = '')) } else {
		Data <- sqlQuery(db, paste('SELECT cell.id, res.obs, obj.ident AS ind, loc.location AS loc, res.result', 
			' FROM actobj LEFT JOIN cell ON actobj.id = cell.actobj_id LEFT JOIN res ON cell.id =', 
			' res.cell_id LEFT JOIN loccell ON cell.id = loccell.cell_id LEFT JOIN loc ON loccell.loc_id',
			' = loc.id LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ', obj_id, 
			' AND actobj.series_id = ', series_id, ' AND (cell.id ', paste(locations, collapse = ') AND cell.id '), 
			'))', sep = ''))
	}
	odbcClose(db)
	Data <- Data[order(Data[,1], Data[,2], Data[,3]),]
	nind <- length(levels(Data[,3]))
	nres <- nrow(Data)/nind
	dataframe <- Data[1:nres*nind, c(1,2)]
	for (i in nind:1) {
		dataframe[,2 + nind - i + 1] <- Data[1:nres*nind - i + 1, 4]
		colnames(dataframe)[2 + nind - i + 1] <- as.character(Data[nind - i + 1, 3])
	}
	dataframe[,3 + nind] <- Data[1:nres*nind, 5]
	colnames(dataframe)[3 + nind] <- "Result"
	rownames(dataframe) <- 1:nres
	dataframe
}

Usage

variable <- op_baseGetData("opasnet_base", "page identifier", include = vector_of_loc_ids, exclude = vector_of_loc_ids)
  • Assuming "opasnet_base" is a correctly defined DSN (Data Service Name; in Windows XP: Control Panel\Administrative tools\Data Sources (ODBC)).
  • Include and exclude are optional.
    • Include picks all cells in the locations given.
      • The clearest case is when all the included locations belong to the same index: Any cells in the non-included locations of the index will be left out.
      • In case given locations are in multiple indices: The effect produced will be the same as picking separately for each index and removing duplicates.
    • Exclude unpicks any cells which are indexed by the locations given. Slower than include.
    • They can be used in unison.
  • Result will be in a table format with columns: id, obs, ind1, ind2 ... indn, Result.
  • series_id is an optional parameter, if it is not given the most current upload of the data will be downloaded.

Finding index data

Function

op_baseGetLocs <- function(dsn, ident, series_id = NULL) {
	db <- odbcConnect(dsn)
	obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep=''))[1,1]
	if (length(series_id) == 0) {series_id <- sqlQuery(db, paste('SELECT series_id FROM actobj WHERE obj_id = ', obj_id, 
		' ORDER BY series_id DESC LIMIT 1', sep = ''))[1,1]}
	Locs <- sqlQuery(db, paste("SELECT obj.ident AS ind, loc.location AS loc, loc.id AS loc_id", 
		" FROM actobj LEFT JOIN actloc ON actobj.id = actloc.actobj_id LEFT JOIN loc ON actloc.loc_id = loc.id", 
		" LEFT JOIN obj ON loc.obj_id_i = obj.id WHERE actobj.obj_id = ", obj_id, " AND actobj.series_id = ", 
		series_id, sep = ""))
	odbcClose(db)
	Locs <- Locs[order(Locs[,1]),]
	rownames(Locs) <- 1:nrow(Locs)
	Locs
}
  • Returns all indexes and locations and their ids in a table of format: ind, loc, loc_id.

Manipulating data

Functions

DataframeToArray <- function(dataframe, rescol = NULL) {
	ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE])
	if (length(ColNames[(ColNames == "obs")])>0) {if(length(levels(factor(dataframe[,"obs"]))) == 1) ColNames <- ColNames[(ColNames == "obs"
	) == FALSE]}
	if (length(rescol)==0) {
	rescol <- colnames(dataframe) == "Freq"
	if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else {
		rescol <- colnames(dataframe) == "Result"
		if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else {
			rescol <- colnames(dataframe) == "result"
			if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result"
		}
	}
	} else {ColNames <- ColNames[(ColNames == rescol) == FALSE]}
	ColNames <- ColNames[(ColNames == "Result") == FALSE]
	ColNames <- ColNames[(ColNames == "result") == FALSE]
	ColNames <- ColNames[(ColNames == "Freq") == FALSE]
	nind <- length(ColNames)
	DimNames <- rep(vector("list", 1), nind)
	names(DimNames) <- ColNames
	indlengths <- 0
	for (i in 1:nind) {
		DimNames[[i]] <- levels(factor(dataframe[,ColNames[i]]))
		indlengths[i] <- length(DimNames[[i]])
	}
	array <- rep(NA, prod(indlengths))
	dim(array) <- indlengths
	dimnames(array) <- DimNames
	array[as.matrix(format(dataframe[,ColNames], trim = TRUE))] <- dataframe[,rescol]
	array
}

⇤--#: . This does not work for probabilistic data. as.matrix(dataframe[,c(1:nind + 1)]) gives values like "   1", "   2" for obs, but the array entries for obs are "1", "2" etc. They do not match. --Jouni 07:00, 4 November 2010 (UTC) (type: truth; paradigms: science: attack)

←--#: . Fixed: ''array[as.matrix(format(dataframe[,c(1:nind + 1)], trim = TRUE))] <- dataframe[,ncol(dataframe)]'' --Teemu R 08:01, 4 November 2010 (UTC) (type: truth; paradigms: science: defence)

Usage

variable2 <- DataframeToArray(variable1, rescol = NULL)
  • variable1 must be in similar format as the result when downloading.
  • Columns named "id" and various versions of "Result" are ignored for dimension creation.
    • "obs" column will also be ignored if there's only one.
  • The column containing the values may be defined in the parameters, otherwise it is assumed to be either "Freq", "Result" or "result" in that order.

Other useful stuff

as.data.frame(as.table(array))
dataframe[is.na(dataframe[,ncol(dataframe)])==FALSE,]
  • First line returns an array from a data frame.
  • Second line returns rows without empty values.
dataframe[grep("location", dataframe$index, ignore.case = TRUE),]
  • Returns all rows of a data frame where column "index" value is "location".
dataframe[order(dataframe[,"col1"],dataframe[,"col2"], ... ,dataframe[,"coln"]),]
  • Returns dataframe ordered by col1, col2, ... , coln.
read.csv("table.csv", sep = ",")
  • Returns a data.frame from a .csv file, sep is the separator used in the file.

Uploading data

  • Varying numbers of iterations between cells not supported.

Functions

op_baseWrite <- function(dsn, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL, acttype = NULL, sort = TRUE) {
	#Open database connection
	db <- odbcConnect(dsn)
	
	#Coerce input into a data frame if it isn't one already; get rid of empty cells
	if (is.array(input)) dataframe <- as.data.frame(as.table(input)) else dataframe <- input
	rescol <- colnames(dataframe) == "Freq"
	if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Freq" else {
		rescol <- colnames(dataframe) == "Result"
		if (length(rescol[rescol==TRUE]) >= 1) rescol <- "Result" else {
			rescol <- colnames(dataframe) == "result"
			if (length(rescol[rescol==TRUE]) >= 1) rescol <- "result"
		}
	}
	dataframe <- dataframe[is.na(dataframe[,rescol]) == FALSE,]
	
	#Add page to database (if it doesn't already exist)
	if (is.null(ident)==TRUE) if (interactive()) ident <- readline(paste("What is the identifier of this object?", 
		"\n", sep = "")) else stop("indentifier of object no given")
	wiki_name <- substring(ident, 1, 5)
	if (wiki_name=="Op_en") {wiki_id <- 1; page <- substring(ident, 6, nchar(ident))} else if (wiki_name=="Op_fi") {
		wiki_id <- 2; page <- substring(ident, 6, nchar(ident))} else {wiki_id <- 0; page <- 0}
	obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]
	if (is.na(obj_id)==TRUE) {
		if (is.null(name)==TRUE) if (interactive()) name <- readline(paste("What is the name of this object?", 
			"\n", sep = "")) else stop("object name not given")
		if (is.null(unit)==TRUE) if (interactive()) unit <- readline(paste("What is the unit of this object?", 
			"\n", sep = "")) else stop("unit not given")
		if (is.null(objtype_id)==TRUE) if (interactive()) objtype_id <- readline(paste("What type of object is", 
			" this (id)?", paste(paste(sqlQuery(db, "SELECT id FROM objtype")[,1], sqlQuery(db, paste("SELECT objtype",
			" FROM objtype", sep = ""))[,1], sep = " - "), collapse = ", "), "\n", collapse = " ")) else {
			stop("object type not given")}
		sqlQuery(db, paste('INSERT INTO obj (ident, name, unit, objtype_id, page, wiki_id) VALUES ("', paste(ident, 
			name, unit, sep = '","'), '",', paste(objtype_id, page, wiki_id, sep = ','), ')', sep = ''))
		obj_id <- sqlQuery(db, paste('SELECT id FROM obj WHERE ident = "', ident, '"', sep = ''))[1,1]
	}
	
	#Write act and actobj
	if (is.null(who)==TRUE) if (interactive()) {who <- readline(paste("What is the name of the uploader?", "\n", sep = "")) 
		} else stop("uploader name not given")
	series_id <- sqlQuery(db, paste("SELECT series_id FROM actobj WHERE obj_id = ", obj_id, " ORDER BY series_id DESC LIMIT 1", 
		sep = ""))[1,1]
	if (is.na(series_id)==FALSE) {if (is.null(acttype)==TRUE) {if (interactive()) {acttype <- readline(paste("What type of upload", 
		" is this? 4 - new data to replace any existing, 5 - new data to be appended to existing data (must have the same", 
		" indices).", "\n", sep = "")) 
		} else acttype <- 4 
		}} else acttype <- 4
	if (acttype != 4 & acttype != 5) stop ("proper acttype not given")
	sqlQuery(db, paste('INSERT INTO act (acttype_id, who, comments) VALUES (', acttype, ',"', who, '","R upload")', sep = ''))
	act_id <- sqlQuery(db, "SELECT id FROM act ORDER BY id DESC LIMIT 1")[1,1]
	if (acttype == 4) series_id <- act_id
	sqlQuery(db, paste('INSERT INTO actobj (act_id, obj_id, series_id) VALUES (', paste(act_id, obj_id, series_id, sep = ','), 
		')', sep = ''))
	actobj_id <- sqlQuery(db, paste('SELECT id FROM actobj WHERE act_id = ', act_id, sep = ''))[1,1]
	
	#Write indexes
	ColNames <- colnames(dataframe[(colnames(dataframe) == "id") == FALSE])
	ColNames <- ColNames[(ColNames == "obs") == FALSE]
	ColNames <- ColNames[(ColNames == "Result") == FALSE]
	ColNames <- ColNames[(ColNames == "result") == FALSE]
	ColNames <- ColNames[(ColNames == "Freq") == FALSE]
	nind <- length(ColNames)
	DimNames <- rep(vector("list", 1), nind)
	names(DimNames) <- ColNames
	indlengths <- 0
	for (i in 1:nind) {
		DimNames[[i]] <- levels(factor(dataframe[, ColNames[i]]))
		indlengths[i] <- length(DimNames[[i]])
	}
	for (i in 1:length(ColNames)) {
		sqlQuery(db, paste('INSERT IGNORE INTO obj (ident, name, objtype_id) VALUES ("', gsub(' ', '_', ColNames[i]), '","', 
			ColNames[i], '", 6)', sep = ''))
	}
	IndIds <- sqlQuery(db, paste('SELECT id, ident FROM obj WHERE ident IN("', paste(gsub(" ", "_", ColNames), 
		collapse = '","'), '")', sep = ''))
	DimIds <- DimNames
	DimN <- 1:nind
	names(DimN) <- tolower(gsub(" ", "_", ColNames))
	for (i in 1:nrow(IndIds)) {
		names(DimIds)[DimN[tolower(IndIds[i,2])]] <- IndIds[i, 1]
	}
	
	#Write locations
	y <- 1
	LocNames <- as.data.frame(matrix(rep(NA, 2*sum(indlengths)), sum(indlengths), 2))
	for (i in 1:nind) {
		LocNames[y:(y + indlengths[i] - 1),1:2] <- matrix(c(rep(names(DimIds)[i], indlengths[i]), 
			DimNames[[i]]), indlengths[i], 2)
		y <- y + indlengths[i]
	}
	for (i in 1:nrow(LocNames)) {
		sqlQuery(db, paste('INSERT IGNORE INTO loc (obj_id_i, location) VALUES (', LocNames[i,1], ',"', LocNames[i,2], '")', 
			sep = ''))
	}
	LocIds <- data.frame(matrix(NA, nrow(LocNames), 3))
	for (i in 1:nind) {
		LocIds[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i]),1:3] <- format(sqlQuery(db, paste('SELECT id, obj_id_i', 
			', location FROM loc WHERE location IN("', paste(LocNames[(sum(indlengths[1:i]) - indlengths[i] + 1):sum(indlengths[1:i] 
			), 2], collapse = '","'), '") AND obj_id_i IN(', paste(names(DimIds)[i], collapse = ','), ')', sep = '')))
	}
	LocMap <- NA
	y <- 1
	for (i in 1:nind) {
		LocMap <- LocIds[LocIds[,2]==names(DimIds)[i], 1]
		names(LocMap) <- tolower(LocIds[LocIds[,2]==names(DimIds)[i], 3])
		DimIds[[i]] <- LocMap[tolower(DimNames[[i]])]
	}
	
	#Writing actloc
	sqlQuery(db, paste("INSERT INTO actloc (actobj_id, loc_id) VALUES (", paste(actobj_id, LocIds[,1], sep = ",", 
	collapse = "),("), ")", sep = ""))
	
	#Changing location names in table into ids
	for (i in 1:nind) {
		dataframe[,ColNames[i]] <- factor(dataframe[,ColNames[i]])
		levels(dataframe[,ColNames[i]]) <- DimIds[[i]]
	}
	
	#A hidden parameter for adjusting query packet sizes, the higher the faster, though crash becomes likelier
	maxrows <- 50000
	
	#Writing cell
	obscol <- colnames(dataframe) == "obs"
	if (length(obscol[obscol==TRUE]) >= 1) {
		obscol <- TRUE
		n <- length(levels(factor(dataframe[,"obs"])))} else {
		obscol <- FALSE
		n <- 1
	}
	if (obscol) if (sort) {
		dataframe <- dataframe[order(dataframe[,"obs"]),]
		for (i in 1:nind) {
			dataframe <- dataframe[order(dataframe[,ColNames[i]]),]
		}
	}
	ncell <- nrow(dataframe)/n
	cellQuery <- NA
	cellQuery[1:ncell*8-7] <- actobj_id
	cellQuery[1:ncell*8-6] <- ","
	if (n == 1) {
		cellQuery[1:ncell*8-5] <- dataframe[,rescol]
	 	cellQuery[1:ncell*8-3] <- 0} else {
		cellQuery[1:ncell*8-5] <- apply(matrix(dataframe[,rescol], n, ncell), 2, mean)
		cellQuery[1:ncell*8-3] <- sd(matrix(dataframe[,rescol], n, ncell))
	}
	cellQuery[1:ncell*8-4] <- ","
	cellQuery[1:ncell*8-2] <- ","
	cellQuery[1:ncell*8-1] <- n
	cellQuery[1:(ncell-1)*8] <- "),("
	if (ncell >= maxrows) {
		for (i in 1:(ncell%/%maxrows)) {
			sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[
				((i-1)*8*maxrows+1):(i*8*maxrows-1)], collapse = ''), ')', sep = ''))
		}
	}
	if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO cell (actobj_id, mean, sd, n) VALUES (', paste(cellQuery[(ncell%/%
		maxrows*8*maxrows+1):length(cellQuery)], collapse = ''), ')', sep = ''))
	
	#Writing res
	cell_id <- sqlQuery(db, paste("SELECT id FROM cell WHERE actobj_id = ", actobj_id, " ORDER BY ID", sep = ""))[,1]
	if (ncell != length(cell_id)) stop("Number of cells written does not match with expected value")
	resQuery <- NA
	resQuery[1:nrow(dataframe)*6-5] <- rep(cell_id, each = n)
	resQuery[1:nrow(dataframe)*6-4] <- ","
	if (n==1) resQuery[1:nrow(dataframe)*6-3] <- "0" else resQuery[1:nrow(dataframe)*6-3] <- dataframe[,"obs"]
	resQuery[1:nrow(dataframe)*6-2] <- ","
	resQuery[1:nrow(dataframe)*6-1] <- dataframe[,rescol]
	resQuery[1:(nrow(dataframe)-1)*6] <- "),("
	if (nrow(dataframe) >= maxrows) {
		for (i in 1:(nrow(dataframe)%/%maxrows)) {
			sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[((i-1)
				*6*maxrows+1):(i*6*maxrows-1)], collapse = ''), ')', sep = ''))
		}
	}
	if (nrow(dataframe)%%maxrows != 0) sqlQuery(db, paste('INSERT INTO res (cell_id, obs, result) VALUES (', paste(resQuery[
		(nrow(dataframe)%/%maxrows*6*maxrows+1):length(resQuery)], collapse = ''), ')', sep = ''))
	
	#Writing loccell
	locidmatrix <- rep(NA, nind*ncell)
	dim(locidmatrix) <- c(nind,ncell)
	for (i in 1:nind) {
		locidmatrix[i,] <- as.character(dataframe[1:ncell*n - n + 1,ColNames[i]])
	}
	loccellQuery <- rep(NA, 4*nind*ncell)
	dim(loccellQuery) <- c(4*nind, ncell)
	loccellQuery[1:nind*4-3, 1:ncell] <- rep(cell_id, each = nind)
	loccellQuery[1:nind*4-2, 1:ncell] <- ","
	loccellQuery[1:nind*4-1, 1:ncell] <- locidmatrix
	loccellQuery[1:nind*4, 1:ncell] <- "),("
	loccellQuery[nind*4, ncell] <- ""
	if (ncell >= maxrows) {
		for (i in 1:(ncell%/%maxrows)) {
			loccellQuery[nind*4, i*maxrows] <- ""
			sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
				((i-1)*maxrows+1):(i*maxrows)], collapse = ''), ')', sep = ''))
		}
	}
	if (ncell%%maxrows != 0) sqlQuery(db, paste('INSERT INTO loccell (cell_id, loc_id) VALUES (', paste(loccellQuery[,
		(ncell%/%maxrows*maxrows+1):ncell], collapse = ''), ')', sep = ''))
	
	#Close database connection
	odbcClose(db)
}

Usage

op_baseWrite(dsn, input, ident = NULL, name = NULL, unit = NULL, objtype_id = NULL, who = NULL, acttype = NULL, sort = TRUE)
  • dsn and input must be defined, the rest of the object and act parameters if not defined are prompted for by the function as needed.
    • For uploading the DSN defined must have writers permissions.
  • sort (default = TRUE), sorts the data in the required order, if necessary (probabilistic data)
Restrictions
  • Input may only be given in either array or data.frame form.
    • Indexes used may not exceed the character limit of 20.
      • Indexes should preferably match an earlier entry: Special:OpasnetBaseIndices.
      • Indexes are treated as identifiers for indexes in the database, spaces in the indexes are converted to _. This ensures maximum compatibility and ease in operations in which data is downloaded and uplaoded again. Names and more specific details can be edited into the indexes separately.
    • Input in data.frame form must contain a "Freq", "Result" or "result" column, this is where the numerical value is read.
    • Probabilistic input must contain a dimension (in case of array) or a column (in case of data.frame) named "obs". Also: all cells must have values for each "obs" value. By default the function sorts the data as it requires, however, for some performance boost this may be disabled if the sorting order is taken into account elsewhere: indices first, obs last; for arrays this means that the first index must be "obs". Data downloaded by the op_baseGetData function is sorted correctly by default.