library("ggplot2") library("readxl") library("dplyr") library("tidyr") evF <- function(ch, so) { # For each input vector, keeps track of running experienced EV for two possible gambles. # ch - choice (sampkey) # so - experienced outcome for a trial (sampout) # output is boolean, whether the larger EEV value is chosen. # To be used with dplyr's group_by and mutate. out <- rep(NA, length(ch)) ev <- rep(0, 2) den <- rep(0, 2) lr <- NA # risk is 1, safe is 2 for (i in 1:length(ch)) { lr <- ifelse(ch[i] == "R", 1, 2) out[i] <- ifelse((ev[1] > ev[2] & ch[i] == "R") | (ev[1] < ev[2] & ch[i] == "S"), 1, 0) den[lr] <- den[lr] + 1 ev[lr] <- (ev[lr] * (den[lr] - 1) + so[i]) / (den[lr]) } out } maxEVLarger <- function(ch, so, low, med, high, phigh) { # For each input vector, keeps track of running EEV for two possible gambles. # ch - choice (sampkey) # so - experienced outcome for a trial (sampout) # low - low payoff # med - certain, medium payoff # high - high payoff # phigh - probability of experiencing high payoff # output is boolean, whether the higher EEV is also the higher EV outcome. out <- rep(NA, length(ch)) ev <- rep(0, 2) # eev den <- rep(0, 2) lr <- NA ev2 <- c(high[1] * phigh[1] + low[1] * (1 - phigh[1]), med[1]) #ev # risk is 1, safe is 2 for (i in 1:length(ch)) { lr <- ifelse(ch[i] == "R", 1, 2) out[i] <- ifelse((ev[1] >= ev[2] & ev2[1] >= ev2[2]) | (ev[1] <= ev[2] & ev2[1] <= ev2[2]), 1, 0) ev[lr] <- (ev[lr] * den[lr] + so[i]) / (den[lr] + 1) den[lr] <- den[lr] + 1 } out } # load raw data from uncompressed file structure # assumes that the TPT data are the only .xls files in the working directory dat <- do.call("rbind", lapply(dir()[grepl(".xls", dir()) & !grepl("sx", dir())], readxl::read_excel)) dat <- dat %>% dplyr::group_by(ID, Problem, set) %>% dplyr::summarise(distinct = length(unique(sampout)), # count number of distinct outcomes d_poss = length(unique(c(med, high, low)))) %>% # number distinct outcomes dplyr::left_join(dat, .) %>% dplyr::filter(distinct >= d_poss) %>% dplyr::group_by(ID, Problem, set) %>% # remove questions with unsampled outcomes dplyr::mutate(cmax = evF(sampkey, sampout), orig = maxEVLarger(sampkey, sampout, low, med, high, phigh)) tmp <- dat %>% # remove small-sample trial numbers dplyr::filter(Trial < 134) %>% dplyr::group_by(ID, Problem, set) %>% dplyr::mutate(choice = ifelse(sampkey == "R", 1, 0), rcl = dplyr::lag(choice, 1)) %>% dplyr::group_by(Trial, ID, Problem) %>% # determine exploratory status of each choice dplyr::mutate(explore = abs(choice - rcl)) # munge the long data into proportions by trial. fig3 <- tmp %>% dplyr::group_by(Trial) %>% dplyr::summarise(arate = mean(explore, na.rm = TRUE), samph = mean(cmax), orig = mean(orig)) %>% tidyr::gather(measure, obs, -Trial) # plot figure 3 fig3 <- ggplot2::ggplot(fig3, ggplot2::aes(Trial, obs, color = measure)) + ggplot2::geom_line(ggplot2::aes(group = measure)) + ggplot2::labs(y = "Rate") + ggplot2::theme_bw() + ggplot2::ylim(c(0, 1)) + ggplot2::scale_color_brewer(palette = 4, type = "qual") # save pdf of plot ggplot2::ggsave(file = "fig3.pdf", plot = fig3)