# N = "intervention-social-male-counter"
# O = "intervention-hobby-female-counter"
# P = "intervention-hobby-male-counter"
# run 1-participants.R first!
setwd("~/Documents/02 Career orientation/2 - Data/Analysis-ICER submission/")
folder = "data/"
answers_file = "d6rv660f6u4a3_public_participant_answers.csv"
answers_file_logfiles = "logfiles-answers-output.csv"
answers_anonymized <- read.csv(paste(folder, answers_file, sep=""))
answers_anonymized$X. <- NULL
# note that most answers were saved in the database directly,
# only for some participants demographics and some likert questions were not saved directly
answers_logfiles <- read.csv(paste(folder, answers_file_logfiles, sep=""))
answers_logfiles_subset <- subset(answers_logfiles, question_id==1 | question_id==2 | question_id==3 | question_id==4 | question_id==5 )
answers_anonymized <- rbind(answers_anonymized, answers_logfiles_subset)
answers_anonymized <- answers_anonymized[is.element(answers_anonymized$participant_id, participants_anonymized$id),]
participants_control <- subset(participants_anonymized,
participants_anonymized$quiz_version %in% c("A", "B", "C", "D", "I", "J", "K", "L"))
answers_control <- subset(answers_anonymized,
answers_anonymized$participant_id %in% participants_control$id)
# age category
participants_control$age <- as.numeric(participants_control$age)
participants_control$age_category <- c(-1)
for(part_id in participants_control$id){
age <- participants_control[participants_control$id==part_id,]$age
if(age==7 | age==8){
participants_control[participants_control$id==part_id,]$age_category <- 1
}
if(age==9 | age==10){
participants_control[participants_control$id==part_id,]$age_category <- 2
}
if(age==11 | age==12){
participants_control[participants_control$id==part_id,]$age_category <- 3
}
if(age==13 | age==14){
participants_control[participants_control$id==part_id,]$age_category <- 4
}
}
participants_control <- subset(participants_control, participants_control$age <= 14)
answers_control <- subset(answers_anonymized,
answers_anonymized$participant_id %in% participants_control$id)
# Question ID 2	= Ik ben gek op computers
# Question ID 3	= Ik vind het het leukst om (1-video, 5-tennis)
# Question ID 4	= Als ik dat zou willen, zou ik later programmeur kunnen worden
# Question ID 5	= Ik wil later programmeur worden
subset_question_5 = subset(answers_control, answers_control$question_id == 5)
participants_control$q2 <- c(NA)
participants_control$q3 <- c(NA)
participants_control$q4 <- c(NA)
participants_control$q5 <- c(NA)
participants_control$q6 <- c(NA)
participants_control$q7 <- c(NA)
participants_control$q8 <- c(NA)
participants_control$q9 <- c(NA)
participants_control$q10 <- c(NA)
participants_control$q11 <- c(NA)
participants_control$q12 <- c(NA)
participants_control$q13 <- c(NA)
for(part_id in subset_question_5$participant_id){
participants_control[participants_control$id==part_id,]$q2 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==2,]$answers
participants_control[participants_control$id==part_id,]$q3 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==3,]$answers
participants_control[participants_control$id==part_id,]$q4 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==4,]$answers
participants_control[participants_control$id==part_id,]$q5 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==5,]$answers
# explicit
participants_control[participants_control$id==part_id,]$q6 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==6,]$answers
participants_control[participants_control$id==part_id,]$q7 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==7,]$answers
participants_control[participants_control$id==part_id,]$q8 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==8,]$answers
participants_control[participants_control$id==part_id,]$q9 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==9,]$answers
participants_control[participants_control$id==part_id,]$q10 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==10,]$answers
participants_control[participants_control$id==part_id,]$q11 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==11,]$answers
participants_control[participants_control$id==part_id,]$q12 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==12,]$answers
participants_control[participants_control$id==part_id,]$q13 <- answers_anonymized[answers_anonymized$participant_id==part_id &
answers_anonymized$question_id==13,]$answers
}
IAT_algorithm <- function(consistent_id, inconsistent_id, participants_subset, answers_subset){
answers_subset <- subset(answers_subset, answers_subset$question_id == consistent_id
| answers_subset$question_id == inconsistent_id)
# Divide block in 'practice' and 'test'
answers_subset <- answers_subset[order(answers_subset$id),]
print(length(answers_subset$id)/64 == length(participants_subset$id))
answers_subset$block <- c(rep('practice', 12), rep('test', 20))
# check if ID's are useful for order:
## Is it true that timestamp is always higher for higher ID number?
for(part_id in participants_subset$id){
subset_on_part_id <- subset(answers_subset, answers_subset$participant_id == part_id)
if (!is.na(NA) && subset_on_part_id$timestamp[1] != ""){
for(i in 1:(length(subset_on_part_id$timestamp)-1)){
if (subset_on_part_id$timestamp[i] > subset_on_part_id$timestamp[i+1]){
print.warnings('order is not reliable')
}
}
}
}
remove(part_id, subset_on_part_id)
# Step two: a. Eliminate trials with latencies >10,000 ms
answers_subset <- subset(answers_subset, answers_subset$response_time<=10000)
# Step two: b. Eliminate subjects for whom more than 10% of trials have latency less than 300ms
# we have 32 trials in block 3, 32 trials in block 4. So more than 10% is at least 7 trials <300ms
# We go over each user id, make a subset with answers of specific user
# and delete the answers when more than 6 trials <300 ms
for(part_id in participants_subset$id){
subset_on_part_id <- subset(answers_subset, answers_subset$participant_id == part_id)
response_fast <- sum(subset_on_part_id$response_time < 300)
if(response_fast>6){
answers_subset <- subset(answers_subset, answers_subset$participant_id != part_id)
participants_subset <- subset(participants_subset, participants_subset$id != part_id)
}
}
remove(subset_on_part_id, response_fast, part_id)
# Extra step: calculate diff response time and check errors made
answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'practice')
answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'practice')
answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'test')
answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'test')
answers_consistent <- subset(answers_subset, answers_subset$question_id == consistent_id)
answers_inconsistent <- subset(answers_subset, answers_subset$question_id == inconsistent_id)
t.test(answers_consistent_practice$response_time, answers_inconsistent_practice$response_time)
t.test(answers_consistent_test$response_time, answers_inconsistent_test$response_time)
t.test(answers_consistent$response_time, answers_inconsistent$response_time)
t.test(answers_consistent_practice$response_time, answers_consistent_test$response_time)
t.test(answers_inconsistent_practice$response_time, answers_inconsistent_test$response_time)
mean(answers_consistent$response_time)
median(answers_consistent$response_time)
mean(answers_inconsistent$response_time)
median(answers_inconsistent$response_time)
t.test(answers_consistent_practice$answers, answers_inconsistent_practice$answers)
t.test(answers_consistent_test$answers, answers_inconsistent_test$answers)
t.test(answers_consistent$answers, answers_inconsistent$answers)
t.test(answers_consistent_practice$answers, answers_consistent_test$answers)
t.test(answers_inconsistent_practice$answers, answers_inconsistent_test$answers)
mean(answers_consistent$answers)
median(answers_consistent$answers)
mean(answers_inconsistent$answers)
median(answers_inconsistent$answers)
mean(answers_consistent_practice$answers)
median(answers_consistent_test$answers)
mean(answers_inconsistent_practice$answers)
median(answers_inconsistent_test$answers)
rm(answers_consistent_practice, answers_inconsistent_practice, answers_consistent_test,
answers_inconsistent_test, answers_consistent, answers_inconsistent)
# Step 3+4+5 not needed in improved algorithm without error correction
# Step 6: a. Compute SD for trials in practice and test blocks
answers_practice <- subset(answers_subset, answers_subset$block == 'practice')
answers_test <- subset(answers_subset, answers_subset$block == 'test')
sd_practice = sd(answers_practice$response_time)
sd_test = sd(answers_test$response_time)
participants_subset$sd_practice <- c(-1)
participants_subset$sd_test <- c(-1)
for(part_id in participants_subset$id){
answers_practice <- subset(answers_subset, answers_subset$block == 'practice'
& answers_subset$participant_id == part_id)
answers_test <- subset(answers_subset, answers_subset$block == 'test'
& answers_subset$participant_id == part_id)
participants_subset[participants_subset$id==part_id,]$sd_practice <- sd(answers_practice$response_time)
participants_subset[participants_subset$id==part_id,]$sd_test <- sd(answers_test$response_time)
}
rm(part_id)
rm(answers_practice, answers_test)
# Step 7 Replace error latency with block mean + 600 ms: not for this research
# Step 8: No transformation
# Step 9: Average per block
answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'practice')
answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'test')
answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'practice')
answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'test')
mean_consistent_practice <- mean(answers_consistent_practice$response_time)
mean_consistent_test <- mean(answers_consistent_test$response_time)
mean_inconsistent_practice <- mean(answers_inconsistent_practice$response_time)
mean_inconsistent_test <- mean(answers_inconsistent_test$response_time)
participants_subset$mean_consistent_practice <- c(-1)
participants_subset$mean_consistent_test <- c(-1)
participants_subset$mean_inconsistent_practice <- c(-1)
participants_subset$mean_inconsistent_test <- c(-1)
for(part_id in participants_subset$id){
answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'practice'
& answers_subset$participant_id == part_id)
answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
& answers_subset$block == 'test'
& answers_subset$participant_id == part_id)
answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'practice'
& answers_subset$participant_id == part_id)
answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
& answers_subset$block == 'test'
& answers_subset$participant_id == part_id)
participants_subset[participants_subset$id==part_id,]$mean_consistent_practice <-
mean(answers_consistent_practice$response_time)
participants_subset[participants_subset$id==part_id,]$mean_consistent_test <-
mean(answers_consistent_test$response_time)
participants_subset[participants_subset$id==part_id,]$mean_inconsistent_practice <-
mean(answers_inconsistent_practice$response_time)
participants_subset[participants_subset$id==part_id,]$mean_inconsistent_test <-
mean(answers_inconsistent_test$response_time)
}
rm(answers_consistent_practice, answers_inconsistent_practice, answers_consistent_test, answers_inconsistent_test)
rm(part_id)
# Step 10 differences between blocks
diff_practice = mean_inconsistent_practice - mean_consistent_practice
diff_test = mean_inconsistent_test - mean_consistent_test
participants_subset$diff_practice <- c(-1)
participants_subset$diff_test <- c(-1)
for(part_id in participants_subset$id){
participants_subset[participants_subset$id==part_id,]$diff_practice <-
participants_subset[participants_subset$id==part_id,]$mean_inconsistent_practice - participants_subset[participants_subset$id==part_id,]$mean_consistent_practice
participants_subset[participants_subset$id==part_id,]$diff_test <-
participants_subset[participants_subset$id==part_id,]$mean_inconsistent_test - participants_subset[participants_subset$id==part_id,]$mean_consistent_test
}
# Step 11 divide differences by SD
score_part_practice = diff_practice / sd_practice
score_test_practice = diff_test / sd_test
participants_subset$d_practice <- c(-1)
participants_subset$d_test <- c(-1)
for(part_id in participants_subset$id){
participants_subset[participants_subset$id==part_id,]$d_practice <-
participants_subset[participants_subset$id==part_id,]$diff_practice / participants_subset[participants_subset$id==part_id,]$sd_practice
participants_subset[participants_subset$id==part_id,]$d_test <-
participants_subset[participants_subset$id==part_id,]$diff_test / participants_subset[participants_subset$id==part_id,]$sd_test
}
# Step 12 Average the two quotients from step 11
result = mean(c(score_part_practice, score_test_practice))
participants_subset$d_result <- c(-1)
for(part_id in participants_subset$id){
participants_subset[participants_subset$id==part_id,]$d_result <-
mean(c(participants_subset[participants_subset$id==part_id,]$d_practice, participants_subset[participants_subset$id==part_id,]$d_test))
}
mean(participants_subset$d_practice)
mean(participants_subset$d_test)
mean(participants_subset$d_result)
median(participants_subset$d_result)
t.test(participants_subset$d_result, mu=0)
return(participants_subset)
}
#Calculate IAT values per stereotype
gender_all <- IAT_algorithm(28, 26, participants_control, answers_control)
social_all <- IAT_algorithm(34, 36, participants_control, answers_control)
interests_all <- IAT_algorithm(32, 30, participants_control, answers_control)
#combine D_measures of each stereotype in one dataset
participants_control$gender_d <- c(NA)
participants_control$social_d <- c(NA)
participants_control$interests_d <- c(NA)
for(part_id in participants_control$id){
# add D measure to participant dataset
if(part_id %in% gender_all$id){
participants_control[participants_control$id==part_id,]$gender_d <- gender_all[gender_all$id==part_id,]$d_result
}
if(part_id %in% social_all$id){
participants_control[participants_control$id==part_id,]$social_d <- social_all[social_all$id==part_id,]$d_result
}
if(part_id %in% interests_all$id){
participants_control[participants_control$id==part_id,]$interests_d <- interests_all[interests_all$id==part_id,]$d_result
}
}
rm(part_id, gender_all, interests_all, social_all)
participants_girls = subset(participants_control, participants_control$gender=="Meisje")
participants_boys = subset(participants_control, participants_control$gender=="Jongen")
participants_experience <- function(participants, experience){
if (experience == "No") {
subset_experience = subset(participants, participants$experience == "{Nee}"
| grepl("Ik weet niet wat programmeren is",participants$experience)
)
}
if (experience == "Yes" | experience == "Yes-school" | experience == "Yes-home" | experience == "Yes-outofschool") {
subset_experience = subset(participants, participants$experience != "{Nee}"
& participants$experience != "{Ik weet niet wat programmeren is}"
& participants$experience != "{Nee,Ik weet niet wat programmeren is}"
& participants$experience != "{Ja, op school,Ik weet niet wat programmeren is}"
& participants$experience != "{Ja, op school,Nee,Ik weet niet wat programmeren is}"
& participants$experience != "{Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub,Ja, bij familie, vrienden of thuis,Ik weet niet wat programmeren is}"
& participants$experience != "{Ja, op school,Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub,Ja, bij familie, vrienden of thuis,Ik weet niet wat programmeren is}"
)
}
if (experience == "Yes-school") {
subset_experience = subset(subset_experience, grepl("Ja, op school",subset_experience$experience))
}
if (experience == "Yes-home") {
subset_experience = subset(subset_experience, grepl("Ja, bij familie, vrienden of thuis",subset_experience$experience))
}
if (experience == "Yes-outofschool") {
subset_experience = subset(subset_experience, grepl("Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub",subset_experience$experience))
}
return(subset_experience)
}
data = participants_control
participants_control_exp_yes = participants_experience(data, "Yes")
participants_control_exp_yes_school = participants_experience(data, "Yes-school")
participants_control_exp_yes_home = participants_experience(data, "Yes-home")
participants_control_exp_yes_outofschool = participants_experience(data, "Yes-outofschool")
participants_control_exp_no = participants_experience(data, "No")
#rm(data, participants_school_exp_yes, participants_school_exp_yes_school, participants_school_exp_yes_home, participants_school_exp_yes_outofschool, participants_school_exp_no)
data = participants_boys
participants_boys_exp_yes = participants_experience(data, "Yes")
participants_boys_exp_yes_school = participants_experience(data, "Yes-school")
participants_boys_exp_yes_home = participants_experience(data, "Yes-home")
participants_boys_exp_yes_outofschool = participants_experience(data, "Yes-outofschool")
participants_boys_exp_no = participants_experience(data, "No")
#rm(data, participants_school_exp_yes, participants_school_exp_yes_school, participants_school_exp_yes_home, participants_school_exp_yes_outofschool, participants_school_exp_no)
data = participants_girls
participants_girls_exp_yes = participants_experience(data, "Yes")
participants_girls_exp_yes_school = participants_experience(data, "Yes-school")
participants_girls_exp_yes_home = participants_experience(data, "Yes-home")
participants_girls_exp_yes_outofschool = participants_experience(data, "Yes-outofschool")
participants_girls_exp_no = participants_experience(data, "No")
#rm(data, participants_school_exp_yes, participants_school_exp_yes_school, participants_school_exp_yes_home, participants_school_exp_yes_outofschool, participants_school_exp_no)
participants_age_7_8 = subset(participants_control, participants_control$age_category == 1)
participants_age_9_10 = subset(participants_control, participants_control$age_category == 2)
participants_age_11_12 = subset(participants_control, participants_control$age_category == 3)
participants_age_13_14 = subset(participants_control, participants_control$age_category == 4)
rm(answers_anonymized, participants_anonymized)
rm(answers_logfiles, answers_logfiles_subset)
rm(subset_question_5)
rm(answers_file, folder)
rm(answers_file_logfiles)
rm(participants_experience)
rm(IAT_algorithm)
rm(answers_control)
rm(data)
#24	Programmeur	Schrijver
#25	Meisje	Jongen
#26	Programmeur & Meisje	Schrijver & Jongen
#27	Jongen	Meisje
#28	Programmeur & Jongen	Schrijver & Meisje
#29	Tennissen	Videospelletjes spelen
#30	Programmeur & Tennis	Schrijver & Videospelletjes spelen
#31	Videospelletjes spelen	Tennissen
#32	Programmeur & Videospelletjes spelen	Schrijver & Tennissen
#33	Alleen	Samen
#34	Programmeur & Alleen	Schrijver & Samen
#35	Samen	Alleen
#36	Programmeur & Samen	Schrijver & Alleen
#37	Fruit	Groente
library(ggplot2)
dataset = participants_control
# age
dataset$age <- as.numeric(dataset$age)
table(dataset$age)
mean(dataset$age, na.rm=TRUE)
median(dataset$age, na.rm=TRUE)
plot_age_histogram <- function(dataset){
ageHistogram <- ggplot(dataset, aes(age)) +
geom_histogram(binwidth = 1, colour='grey', fill='gray88') +
stat_bin(binwidth= 1, geom='text', aes(label=..count..) ) +
theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14), legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(7, 18, 1))
plot(ageHistogram)
}
plot_age_histogram(dataset)
table = table(dataset$gender, dataset$age)
ages <- c(7,8,9,10,11,12,13,14,7,8,9,10,11,12,13,14, 7,8,9,10,11,12,13,14)
gender <-c(rep("Boys",8), rep("Girls",8), rep("Neither or not shared",8))
frequenty <- c(5, 7, 18, 21, 15, 12, 4, 3, 18, 11, 12, 16, 13, 17, 5, 5, 6,  2,  3,  5,  2,  0,  0,  0)
data <- data.frame(ages,gender,frequenty)
plot <- ggplot(data, aes(fill=gender, y=frequenty, x=ages)) +
geom_bar(position="stack", stat="identity") +
scale_fill_manual(values = c("#ffc107", "#004d40", "#d81b60")) +
scale_x_continuous(breaks = seq(7, 18, 1)) +
geom_text(aes(label=frequenty), position = position_stack(vjust= 0.5),
colour = "white", size = 12,  data = data[data$frequenty %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21),]) +
theme(legend.position = "top") +
guides(fill=guide_legend(title="Gender: ")) +
xlab("Ages") +
ylab("Number of participants") +
theme(text = element_text(size = 18), axis.title = element_text(size = 24), axis.text = element_text(size = 20))
plot(plot)
ggsave('plot.png', plot, width = 10, height = 10)
# gender
table(dataset$gender)
# ethnicity
table(dataset$ethnicity)
sum(dataset$ethnicity == '{Nederland}', na.rm=TRUE) # only NL
sum(dataset$ethnicity =='{Weet ik niet of zeg ik liever niet}', na.rm=TRUE) #unknown
sum(dataset$ethnicity == '{Nederland,Marokko}'
| dataset$ethnicity == '{Nederland,Indonesië}'
| dataset$ethnicity == '{Nederland,Polen}'
| dataset$ethnicity == '{Nederland,Suriname}'
| dataset$ethnicity == '{Nederland,Duitsland}'
| dataset$ethnicity == '{Nederland,Turkije}'
| dataset$ethnicity == '{Nederland,Weet ik niet of zeg ik liever niet}'
| dataset$ethnicity == '{Geen van bovenstaande, maar een ander land in Europa, Nederland}'
| dataset$ethnicity == '{Geen van bovenstaande, maar een ander land buiten Europa, Nederland}'
, na.rm=TRUE)
amount_eth_non_NL <- sum(dataset$ethnicity == '{Marokko, Suriname}'
| dataset$ethnicity == '{Indonesië}'
| dataset$ethnicity == '{Polen}'
| dataset$ethnicity == '{Suriname}'
| dataset$ethnicity == '{Duitsland}'
| dataset$ethnicity == '{Turkije}'
| dataset$ethnicity == '{Marokko}'
| dataset$ethnicity == '{Geen van bovenstaande, maar een ander land in Europa}'
| dataset$ethnicity == '{Geen van bovenstaande, maar een ander land buiten Europa}'
, na.rm=TRUE)
table(dataset$experience)
amount_experience_no <- sum(dataset$experience == "{Nee}" | grepl("Ik weet niet wat programmeren is", dataset$experience),na.rm = TRUE)
subset_experience_yes <- subset(dataset, dataset$experience != "{Nee}"
& dataset$experience != "{Ik weet niet wat programmeren is}"
& dataset$experience != "{Nee,Ik weet niet wat programmeren is}"
& dataset$experience != "{Ja, op school,Ik weet niet wat programmeren is}"
& dataset$experience != "{Ja, op school,Nee,Ik weet niet wat programmeren is}"
& dataset$experience != "{Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub,Ja, bij familie, vrienden of thuis,Ik weet niet wat programmeren is}"
& dataset$experience != "{Ja, op school,Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub,Ja, bij familie, vrienden of thuis,Ik weet niet wat programmeren is}"
)
amount_experience_yes_school <- sum(grepl("Ja, op school", subset_experience_yes$experience),na.rm = TRUE)
amount_experience_yes_home <- sum(grepl("Ja, bij familie, vrienden of thuis", subset_experience_yes$experience),na.rm = TRUE)
amount_experience_yes_activity_outside_school <- sum(grepl("Ja, op een activiteit buiten school bijvoorbeeld in de bibliotheek of bij een codeclub", subset_experience_yes$experience),na.rm = TRUE)
gender = c(rep(c("Girls"),5), rep(c("Boys"),5))
answers = rep(c('no', 'yes - all', 'yes - home', 'yes - out-of-school', ' yes-school') , 2)
percentage = c(48/97*100,49/97*100,14/97*100, 8/97*100,37/97*100,
27/85*100,58/85*100,19/85*100, 13/85*100, 38/85*100)
toplot <- data.frame(gender,answers,percentage)
barChartExperienceGender <- ggplot(toplot, aes(answers, percentage, fill=gender)) +
scale_fill_manual(values = c("#ffc107", "#004d40")) +
labs(x = "programming experience", y="percentage") +
scale_x_discrete(limits=c('no', 'yes - all', 'yes - home', 'yes - out-of-school', ' yes-school') ) +
theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20), legend.text = element_text(size = 14)) +
stat_summary(geom="bar", position="dodge")
plot(barChartExperienceGender)
ggsave('barChartExperienceGender.png', barChartExperienceGender, width = 10, height = 6)
#print('no experience incl unknown')
#print(amount_experience_no)
#print('yes')
#print(nrow(subset_experience_yes))
#print('yes,  school')
#print(amount_experience_yes_school)
#print('yes, home')
#print(amount_experience_yes_home)
#print('yes, outside school activity')
#print(amount_experience_yes_activity_outside_school)
rm(dataset)
rm(amount_eth_non_NL, plot_age_histogram)
library(ggplot2)
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
### Interest in becoming a programmer
table(participants_control$q5)
print(paste('mode on question 5 -',  getmode(participants_control$q5)))
print(paste('median on question 5 -', median(as.numeric(participants_control$q5))))
# gender
table(participants_girls$q5)
getmode(participants_girls$q5)
median(participants_girls$q5)
table(participants_boys$q5)
getmode(participants_boys$q5)
median(participants_boys$q5)
wilcox.test(participants_girls$q5, participants_boys$q5)
model <- wilcox.test(participants_girls$q5, participants_boys$q5)
N <- 97+85
z <- qnorm(model$p.value/2)
r <- z/sqrt(N)
gender = c(rep(c("Girls"),5), rep(c("Boys"),5))
answers = rep(c('Agree', 'Slightly agree', 'Neutral', 'Slightly disagree', 'Disagree') , 2)
percentages = c((table(participants_girls$q5)/101*100), (table(participants_boys$q5)/89*100))
toplot <- data.frame(gender,answers,percentages)
# age & gender
participants_girls_7_8 = subset(participants_girls, participants_girls$age_category == 1)
participants_girls_9_10 = subset(participants_girls, participants_girls$age_category == 2)
participants_girls_11_12 = subset(participants_girls, participants_girls$age_category == 3)
participants_girls_13_14 = subset(participants_girls, participants_girls$age_category == 4)
participants_boys_7_8 = subset(participants_boys, participants_boys$age_category == 1)
participants_boys_9_10 = subset(participants_boys, participants_boys$age_category == 2)
participants_boys_11_12 = subset(participants_boys, participants_boys$age_category == 3)
participants_boys_13_14 = subset(participants_boys, participants_boys$age_category == 4)
wilcox.test(participants_girls_7_8$q5, participants_girls_9_10$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_11_12$q5, participants_girls_13_14$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_9_10$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_9_10$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_11_12$q5, participants_girls_13_14$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_9_10$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_11_12$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_13_14$q5)
wilcox.test(participants_boys_9_10$q5, participants_boys_11_12$q5)
wilcox.test(participants_boys_9_10$q5, participants_boys_13_14$q5)
wilcox.test(participants_boys_11_12$q5, participants_boys_13_14$q5)
wilcox.test(participants_girls_7_8$q5, participants_boys_7_8$q5)
wilcox.test(participants_girls_7_8$q5, participants_boys_7_8$q5)
wilcox.test(participants_girls_7_8$q5, participants_boys_7_8$q5)
wilcox.test(participants_girls_9_10$q5, participants_boys_9_10$q5)
wilcox.test(participants_girls_11_12$q5, participants_boys_11_12$q5)
wilcox.test(participants_girls_13_14$q5, participants_boys_13_14$q5)
wilcox.test(participants_girls_7_8$q5, participants_boys_7_8$q5)
wilcox.test(participants_girls_9_10$q5, participants_boys_9_10$q5)
wilcox.test(participants_girls_11_12$q5, participants_boys_11_12$q5)
wilcox.test(participants_girls_13_14$q5, participants_boys_13_14$q5)
wilcox.test(participants_girls_11_12$q5, participants_boys_11_12$q5)
