#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#                   Intro to the Tidyverse by Colleen O'Briant
#                             Koan #10: ggplot geoms
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# In order to progress:
# 1. Read all instructions carefully.
# 2. When you come to an exercise, fill in the blank, un-comment the line
#    (Ctrl/Cmd Shift C), and execute the code in the console (Ctrl/Cmd Return).
#    If the piece of code spans multiple lines, highlight the whole chunk or
#    simply put your cursor at the end of the last line.
# 3. Save (Ctrl/Cmd S).
# 4. Test that your answers are correct (Ctrl/Cmd Shift T).

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# In this koan, you'll practice using some of the most commonly used 'geom's,
# one by one. Keep in mind that you can use multiple 'geoms' at a time by
# layering them on top of each other.

# Run this code to get started:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gapminder)

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# My favorite 'geom's for looking at the distribution of one variable:

#  'geom_histogram()',
#  'geom_freqpoly()',
#  'geom_area()',
#  'geom_density()'

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                           ----- geom_histogram -----

# 1. Draw a histogram to visualize lifeExp using geom_histogram(). -------------

#01@

# ggplot(
#   data = gapminder,
#   aes(x = lifeExp, color = continent, fill = continent)
#   ) +
#   __

#@01

# 2. Experiment with different binwidths for your histogram. -------------------

#02@

# ggplot(
#   data = gapminder,
#   aes(x = lifeExp, color = continent, fill = continent)
#   ) +
#   __(binwidth = __)

#@02

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                           ----- geom_freqpoly -----
#
# 3. geom_freqpoly() is just like a histogram, but it uses lines instead -------
# of bars to communicate the number of observations in each bin. Again,
# experiment with binwidth.

#03@

# ggplot(
#   data = gapminder, aes(x = lifeExp, color = continent)
#   ) +
#   __(binwidth = __)

#@03

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                             ----- geom_area -----
#
# 4. Make a filled frequency polygon using an area plot: geom_area(). In -------
# the blank, experiment with setting 'color' versus 'fill' as 'continent'.

#04@

# ggplot(data = gapminder, aes(x = lifeExp, __ = continent)) +
#   geom_area(stat = "bin")

#@04

# 5. Notice that the continents are stacked up on top of each other in ---------
# the previous plot. To change that behavior, set position = "dodge".

#05@

# ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
#   geom_area(stat = "bin", position = __)

#@05

# 6. The issue now is that there's overplotting. Data for the Americas ---------
# is totally hidden behind the other continents! One way to fix this is
# to adjust the transparency of points through 'alpha'. Setting 'alpha = .5'
# reduces the 'geom' transparency to 50%. Experiment with different alphas.

#06@

# ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
#   geom_area(stat = "bin", position = "dodge", alpha = __)

#@06

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                            ----- geom_density -----
#
# An alternative to bin visualizations is density visualizations. They smooth
# out the distributions and account for factors like the fact that there aren't
# very many observations from Oceania. The downside is what's measured on the
# y-axis is less intuitive than the 'count' we've seen in the other geoms.

# 7. Experiment with setting 'color' and 'fill' to 'continent'. ----------------

#07@

# ggplot(data = gapminder, aes(x = lifeExp, __)) +
#   __()

#@07

# 8 With geom_density and fill, experiment with different alpha's. -------------

#08@

# ggplot(data = gapminder, aes(x = lifeExp, __)) +
#   __(alpha = __)

#@08

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                    ----- Graphical primitives to know -----
#
# geom_vline() adds a vertical line
# geom_hline() adds a horizontal line
# geom_segment() adds a line segment
# geom_rect() adds a rectangle
# annotate() adds annotations inside the plot

# 9. Add a vertical line to this plot. -----------------------------------------
#     Hint: for a vertical line, you'll need to specify an x-intercept
#     (and for a horizontal line, you'd need to specify a y-intercept).
#     Do this with 'xintercept = 70'.

#09@

# ggplot(
#   data = gapminder,
#   aes(x = lifeExp, color = continent, fill = continent)
#   ) +
#   geom_density(alpha = .3) +
#   __

#@09


# 10. Include the vertical line you did above, and also note how the -----------
# annotation ('annotate()') labels the line.

#10@

# ggplot(
#   data = gapminder,
#   aes(x = lifeExp, color = continent, fill = continent)
#   ) +
#   geom_density(alpha = .3) +
#   __ +
#   annotate(
#     "text", x = 70, y = .075, label = "70 years", angle = 90
#     )

#@10

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                       ----- Variables on both axis -----
#
# My favorite 'geom's for plots comparing a variable on the x-axis to a
# variable on the y-axis:

# geom_point()
# geom_smooth()
# geom_line()
# geom_hex()
# geom_boxplot()
# geom_violin()

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# 11. We've already talked about basic scatterplots. Try to recall how to ------
# draw a ggplot that plots 'gdpPercap' on the x-axis and 'lifeExp' on the
# y-axis.

#11@

# __

#@11


# 12. Add a smoothed line layer to the plot you did above using ----------------
# geom_smooth().

#12@

# __

#@12


# 13. Remove the standard error ribbon using geom_smooth(se = FALSE). ----------

#13@

# __

#@13


# 14. Visualize the OLS fit by using geom_smooth(method = "lm"). ---------------
#    "lm" stands for "linear model".

#14@

# __

#@14

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# The visualization we made in 14) needs some work. The first issue is that a
# linear specification does not seem to fit very well. Let's try applying a log
# transformation to gdpPercap.

# 15. Instead of plotting 'gdpPercap' on the x-axis, plot the log of -----------
# 'gdpPercap' using log(). You should find that a linear model seems to fit this
# transformation much better.

#15@

# ggplot(data = gapminder, aes(x = __, y = lifeExp)) +
#   geom_point() +
#   geom_smooth(method = "lm")

#@15


# Notice that the units on the x-axis of the previous plot are in log terms,
# which is hard to interpret. I prefer this method: Do a log transformation of
# the x-axis using 'scale_x_log10()', and use 'labels = scales::comma' to
# suppress scientific notation on the labels for the x-axis.

ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
  scale_x_log10(labels = scales::comma) +
  geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

# 16. The second issue is that there's over-plotting near the center. I'd ------
# like a way to visualize how dense the points are getting. For this, we can
# replace 'geom_point()' with 'geom_hex()'.

#16@

# ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
#   scale_x_log10(labels = scales::comma) +
#   __ +
#   geom_smooth(method = "lm")

#@16

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# 17. Draw a line plot using 'geom_line'. Compare US 'gdpPercap' over time -----
# to two other countries of your choice.

#17@

# gapminder %>%
#   filter(country %in% c("United States", __, __)) %>%
#   ggplot(aes(x = year, y = gdpPercap, color = country)) +
#   geom_line()

#@17

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# 18. Draw a boxplot with geom_boxplot() that compares the gdpPercap (on -------
# the y-axis) of different continents (on the x-axis, also using color).
# Experiment with applying a log transformation to gdpPercap (now on the y
# axis).

#18@

# ggplot(
#   data = gapminder,
#   aes(x = continent, y = gdpPercap, color = continent)
#   ) +
#   __

#@18

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# 19. Change the plot from the previous question to be a violin plot -----------
# using 'geom_violin'. Also include 'fill'. Make sure you use 'scale_y_log10()'
# to transform the y-axis into log terms.

#19@

# ggplot(
#   data = gapminder,
#   aes(x = __, y = __, color = __, fill = __)
#   ) +
#   scale__ +
#   __

#@19

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# Finally, check out how simple it is to change your 'facet_wrap' to a slick
# animation with 'gganimate'! The 'facet_wrap' version:

ggplot(
  data = gapminder,
  aes(x = log(gdpPercap), y = lifeExp)
) +
  geom_point(aes(color = continent)) +
  geom_density2d(color = "grey", alpha = .5) +
  facet_wrap(~ year)

# The animated version (this may take a second to render, also some of you may
# have trouble installing gganimate. If that's you, don't stress over this,
# leave the code below commented out.)

# ggplot(
#   data = gapminder,
#   aes(x = log(gdpPercap), y = lifeExp)
#   ) +
#   geom_point(aes(color = continent)) +
#   geom_density2d(color = "grey", alpha = .5) +
#   gganimate::transition_time(year) +
#   labs(title = "Year: {frame_time}")

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# Great work! You're one step closer to tidyverse enlightenment. Make sure to
# return to this topic to meditate on it later.

# To keep learning about ggplot (a great skill for your resume!), this is an
# excellent manual: https://ggplot2-book.org/introduction.html