+ Show code- Hide code
library(OpasnetUtils)
library(ggplot2)
# Fetch the stored raw data (requires a password to open it).
objects.get("mg2Gf5eoR5FVZ8xw")
data <- objects.decode(etable, key)
# Rename columns.
titles <- c(
"Gender",
"Age",
"Country",
"Education",
"Expertise.Anthropology",
"Expertise.Animals",
"Expertise.Ecology",
"Expertise.Evolution",
"Expertise.Genetics",
"Expertise.Biology",
"Expertise.Geology",
"Expertise.Cardiovascular",
"Expertise.Musculoskeletal",
"Expertise.Nervous",
"Expertise.Nurtition",
"Expertise.Human",
"Expertise.Paleoanthropology",
"Expertise.Paleontology",
"Expertise.Other",
"Publications.scientific",
"Publications.general",
"EvolutionPublications.scientific",
"EvolutionPublications.general",
"Courses",
"Familiarity",
"Bipedalism.Running",
"Bipedalism.Orangutang",
"Bipedalism.Wading",
"Bipedalism.Thermoregulation",
"Bipedalism.Seeing",
"Bipedalism.Foraging",
"Bipedalism.CarryingFood",
"Bipedalism.CarryingOffspring",
"Bipedalism.Tools",
"Bipedalism.Sexual",
"Encephalization.Meat",
"Encephalization.Fish",
"Encephalization.Fire",
"Encephalization.Social",
"Encephalization.Hunting",
"Encephalization.Speech",
"Encephalization.Warfare",
"Encephalization.Neoteny",
"Encephalization.Bipedalism",
"Encephalization.Nakedness",
"Naked.OffspringContact",
"Naked.SexContact",
"Naked.Hygiene",
"Naked.Extoparasite",
"Naked.Swimming",
"Naked.Cooling",
"Naked.Size",
"Naked.Clothes",
"SubcutaneousFat.Storage",
"SubcutaneousFat.Insulation",
"SubcutaneousFat.Thermoregulation",
"SubcutaneousFat.Sexual",
"Larynx.Speech",
"Larynx.SexualVoice",
"Larynx.Diving",
"Speech.Larynx",
"Speech.WaterCommunication",
"Speech.VoluntaryBreathing",
"Speech.Reassuring",
"Speech.Social",
"Speech.Hunting",
"Speech.Culture",
"BabySwim",
"Nose",
"Smell",
"Fingerweb",
"EccrineGlands",
"Sweating",
"Swimming",
"DivingReflex",
"Bathing",
"AAH.AgainstProcess",
"AAH.EnvironDeterminism",
"AAH.Redundant",
"AAH.ComparativeAnatomy",
"AAH.FurryAquatics",
"AAH.Concidence",
"AAH.ApesSwim",
"AAH.SimplyFalse",
"AAH.NoFossils",
"AAH.FossilsAreTerrestrial",
"AAH.WhenAndWhere",
"AAH.NoTime",
"AAH.Simplistic",
"AAH.LessParsimonius",
"AAH.LessConsistent",
"AAH.Nonpredictive",
"AAH.Feministic",
"AAH.NotPeerReviewed",
"AAH.NotProfessional",
"AAH.Pseudoscience",
"AAHFamiliarity",
"AAHSource.Articles",
"AAHSource.Morgan",
"AAHSource.Books",
"AAHSource.Media",
"AAHSource.Courses",
"AAHSource.Personal",
"AAHSource.Blogs",
"AAHSource.Wikipedia",
"AAHAttitude.Rejected",
"AAHAttitude.Described",
"AAHAttitude.Plausible"
)
colnames(data) <- titles
data <- data[-(1:2) , ] # Remove old heading rows.
# Replace answer numbers with actual responses, and turn them into factors.
leve <- list()
leve[[1]] <- c("Male", "Female")
leve[[2]] <- c("29 or less", "30-39", "40-49", "50-59", "60 or more")
leve[[3]] <- c("None", "Bachelor's degree", "Master's degree", "Doctor's degree")
leve[[4]] <- c("Anthropology or archaeology", "Biology (animal physiology, anatomy or morphology)", "Biology (ecology)",
"Biology (evolution)", "Biology (genetics or molecular biology)", "Biology (other, please specify)", "Geology",
"Human cardiovascular or respiratory system", "Human musculoskeletal system", "Human nervous system", "Human nutrition",
"Other aspects of human biology (please specify)", "Paleoanthropology", "Paleontology", "Other, please specify")
leve[[5]] <- c("none", "1-10", "11-40", "41 or more")
leve[[6]] <- c("Yes", "No")
leve[[7]] <- c("Not at all", "I have some idea", "I know the hypotheses well")
leve[[8]] <- c("Very likely", "Moderately likely", "No opinion", "Moderately unlikely", "Very unlikely")
leve[[9]] <- c("Fully agree", "Mostly agree", "No opinion", "Mostly disagree", "Strongly disagree")
leve[[10]] <- c("No", "Yes")
leve[[11]] <- c("Considerably", "A little", "Not at all")
leve[[12]] <- c("Common", "Rare", "Not seen")
# Which answer list is used for which question (NA: do not use an answer list. This applies to country).
chooselevel <- c(1, 2, NA, 3, rep(4, 15), rep(5, 4), 6, 7, rep(8, 51), rep(9, 20), 10, rep(11, 8), rep(12, 3))
for(i in 1:length(chooselevel)) {
if(!is.na(chooselevel[i])) data[[i]] <- as.factor(leve[[chooselevel[i]]][as.numeric(as.character(data[[i]]))])
}
## New function for making uppercase Initials. See http://127.0.0.1:21251/library/base/html/chartr.html
capwords <- function(s, strict = FALSE) {
cap <- function(s) paste(toupper(substring(s,1,1)),
{s <- substring(s,2); if(strict) tolower(s) else s},
sep = "", collapse = " " )
sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}
# Rename written country names into standard names and turn into a factor.
data[[3]] <- ifelse(is.na(data[[3]]), "", as.character(data[[3]]))
data[[3]] <- capwords(data[[3]], strict = TRUE)
data[[3]] <- ifelse(data[[3]] == "Brasil", "Brazil", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Canada And France", "Canada", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Ch", "Switzerland", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Czech Republich", "Czech Republic", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Iyaly", "Italy", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Korea, South", "Korea", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Mexi", "Mexico", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Northern Ireland. U.k.", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Scotland Uk", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Sweden (currently, But Of U.s. Origin)", "Sweden", data[[3]])
data[[3]] <- ifelse(data[[3]] == "The Uk", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "United Kingdom", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "U,.s.", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "U. S. A.", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "U.s.a.", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "U.k.", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "United States", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "United States Of America", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "U.s.", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Us", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Usaf", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Wales, Uk", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Usa", "USA", data[[3]])
data[[3]] <- ifelse(data[[3]] == "Uk", "UK", data[[3]])
data[[3]] <- ifelse(data[[3]] == "", NA, data[[3]])
data[[3]] <- as.factor(data[[3]])
#MPF-731
#sininen farmariaudi
#16.40 10.3.2013
#Orivesi-Jämsä itään n. 150 km/h
# Print and plot examples of the cleaned data.
# oprint(head(data))
X <- colnames(data)[X]
Y <- colnames(data)[Y]
fill <- colnames(data)[fill]
ggplot(data, aes_string(x = X, weight = 1, fill = fill)) +
geom_bar(position = "fill", na.rm = TRUE) +
theme_grey(base_size = 24)
# Save the cleaned data table in a secure format.
etable <- objects.encode(data, key)
objects.put(etable)
cat("Cleaned table 'etable' successfully saved.\n")
| |