# install.packages("tidyverse")
library(tidyverse)

# Data wrangling
library(tidyverse) # using tidyr
set.seed(1)
mouse.weights.sim <- data.frame(
  time = seq(as.Date("2017/1/1"), by = "month", length.out = 12),
  mickey = rnorm(12, 20, 1),
  minnie = rnorm(12, 20, 2),
  mighty = rnorm(12, 20, 4)
)

mouse.weights <- gather(data = mouse.weights.sim,
                        key = mouse, value = weight, mickey, minnie, mighty)
mouse.weights <- gather(data = mouse.weights.sim, key = mouse, value = weight, -time)
mouse.weights <- gather(data = mouse.weights.sim,
                        key = mouse, value = weight, mickey:mighty)

ggplot(mouse.weights, aes(x = mouse, y = weight)) +
  geom_boxplot(aes(fill = mouse))
ggplot(mouse.weights, aes(x = time, y = weight)) +
  geom_boxplot(aes(group = time))
ggplot(mouse.weights, aes(x = time, y = weight)) +
  geom_boxplot(aes(group = time)) + geom_point(aes(color = mouse))
ggplot(mouse.weights, aes(x = time, y = weight)) +
  geom_point(aes(color = mouse)) + geom_line(aes(group = mouse, color = mouse))

#rm(USPersonalExpenditure)
uspe.df <- as.data.frame(USPersonalExpenditure)
uspe.df$Category <- rownames(USPersonalExpenditure)
uspe <- gather(uspe.df, Year, Amount, -Category)

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", aes(fill = Category))

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", aes(fill = Category)) +
  theme(legend.justification = c(0,1), legend.position = c(0,1))

ggplot(uspe, aes(x = Year, y = Amount)) +
  geom_bar(stat = "identity", position = "dodge", aes(fill = Category)) +
  theme(legend.justification = c(0,1), legend.position = c(0,1))


spread(data = mouse.weights, key = mouse, value = weight)
spread(ablation, key = CellType, value = Score)
abl.united <- unite(ablation, expt_cell, Experiment, CellType, sep = ".")
spread(abl.united, expt_cell, Score)
separate(abl.united, expt_cell, c("Expt", "Cell"), sep = "\\.")

library(dplyr)
experiment.log <- data.frame(Experiment = c("E1909", "E1915", "E1921"),
                             Tech = c("Goneril", "Regan", "Cordelia"),
                             stringsAsFactors = TRUE)
str(experiment.log)
experiment.log
inner_join(ablation, experiment.log)

save(ablation, file = "ablation.Rdata")
load("ablation.Rdata")

head(select(msleep, name, sleep_total))
head(msleep[ , c("name", "sleep_total")])
class(msleep)
msleep %>% select(name, sleep_total) %>% head
msleep %>%
  select(name, sleep_total) %>%
  head
head(msleep[ , -1])
head(select(msleep, -name))
head(select(msleep, -c(name, sleep_total)))
msleep %>%
  select(-c(name, sleep_total)) %>%
  head

msleep %>%
  select(starts_with("sl")) %>%
  head
head(msleep[ , startsWith(names(msleep), "sl")])

msleep[msleep$sleep_total >= 16, ]
msleep %>%
  filter(sleep_total >= 16)

msleep %>%
  filter(order %in% c("Perissodactyla", "Primates"))
msleep[msleep$order %in% c("Perissodactyla", "Primates"), ]

msleep %>%
  filter(sleep_total >= 16, bodywt >= 1)
msleep %>%
  filter(sleep_total >= 16 & bodywt >= 1)
msleep[msleep$sleep_total >= 16 & msleep$bodywt >=1, ]

msleep %>% arrange(order) %>% head

msleep %>%
  arrange(desc(order)) %>%
  head

msleep %>%
  select(name, order, sleep_total) %>%
  arrange(order, sleep_total) %>%
  head

msleep %>%
  arrange(order, sleep_total) %>%
  select(name, order) %>%
  head

ToothGrowth %>%
  summarize(meanLen = mean(len))

ToothGrowth %>%
  group_by(supp) %>%
  summarize(meanLen = mean(len))

ToothGrowth %>%
  group_by(supp, dose) %>%
  summarize(meanLen = mean(len), n = n())

ToothGrowth %>%
  group_by(supp, dose) %>%
  mutate(norm.len = (len - mean(len))/sd(len), max = max(len)) %>%
  print(n = 60)

ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(mean.score = mean(Score)) %>%
  spread(CellType, mean.score)

ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(the.min = min(Score), the.max = max(Score))

ablation.mean.sd <- ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(the.mean = mean(Score), the.sd = sd(Score))

ggplot(ablation.mean.sd, aes(x = Time, y = the.mean)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = the.mean - the.sd, ymax = the.mean + the.sd), width = 0.4) +
  facet_grid(Measurement ~ CellType) +
  geom_line() +
  geom_point(data = ablation, aes(y = Score), color = "blue", shape = 1) +
  labs(title = "+/- 1 SD")

ablation.mean.ci <- ablation %>%
  select(Time, Measurement, CellType, Score) %>%
  group_by(Time, Measurement, CellType) %>%
  summarize(the.mean = mean(Score),
            lower.limit = t.test(Score)$conf.int[1],
            upper.limit = t.test(Score)$conf.int[2])

ggplot(ablation.mean.sd, aes(x = Time, y = the.mean)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = the.mean - the.sd,
                    ymax = the.mean + the.sd), width = 0.4) +
  facet_grid(Measurement ~ CellType) + geom_line() +
  geom_point(data = ablation %>%
               group_by(Measurement, CellType, Time) %>%
               mutate(outlier = abs((Score - mean(Score)) / sd(Score)) > 1),
             aes(y = Score, color = outlier), size = 4, shape = 1) +
  labs(title = "+/- 1 SD", y = "Mean") +
  scale_colour_discrete(name = "Outlier Status",
                        labels = c("Within 1 SD", "Outside 1 SD"))