#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# Intro to the Tidyverse by Colleen O'Briant
# Koan #6: arrange and slice
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# In order to progress:
# 1. Read all instructions carefully.
# 2. When you come to an exercise, fill in the blank, un-comment the line
# (Ctrl/Cmd Shift C), and execute the code in the console (Ctrl/Cmd Return).
# If the piece of code spans multiple lines, highlight the whole chunk or
# simply put your cursor at the end of the last line.
# 3. Save (Ctrl/Cmd S).
# 4. Test that your answers are correct (Ctrl/Cmd Shift T).
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# In this koan, you'll learn the final two dplyr verbs: arrange() and slice().
# Run this code to get started.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(gapminder)
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# ----- arrange() -----
# arrange() rearranges the rows of your tibble by the variable you specify.
gapminder %>%
arrange(pop)
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Sao Tome and Principe Africa 1952 46.5 60011 880.
## 2 Sao Tome and Principe Africa 1957 48.9 61325 861.
## 3 Djibouti Africa 1952 34.8 63149 2670.
## 4 Sao Tome and Principe Africa 1962 51.9 65345 1072.
## 5 Sao Tome and Principe Africa 1967 54.4 70787 1385.
## 6 Djibouti Africa 1957 37.3 71851 2865.
## 7 Sao Tome and Principe Africa 1972 56.5 76595 1533.
## 8 Sao Tome and Principe Africa 1977 58.6 86796 1738.
## 9 Djibouti Africa 1962 39.7 89898 3021.
## 10 Sao Tome and Principe Africa 1982 60.4 98593 1890.
## # … with 1,694 more rows
# The code above rearranges 'gapminder' to put the observations with the
# lowest population first, so that the variable 'pop' is ascending.
# If instead you wanted 'pop' to be descending (the observations with the
# largest populations first), you can use 'desc()':
gapminder %>%
arrange(desc(pop))
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 2007 73.0 1318683096 4959.
## 2 China Asia 2002 72.0 1280400000 3119.
## 3 China Asia 1997 70.4 1230075000 2289.
## 4 China Asia 1992 68.7 1164970000 1656.
## 5 India Asia 2007 64.7 1110396331 2452.
## 6 China Asia 1987 67.3 1084035000 1379.
## 7 India Asia 2002 62.9 1034172547 1747.
## 8 China Asia 1982 65.5 1000281000 962.
## 9 India Asia 1997 61.8 959000000 1459.
## 10 China Asia 1977 64.0 943455000 741.
## # … with 1,694 more rows
# 1. Take all the observations in Asia and sort them from the lowest -----------
# life expectancies to the highest.
#1@
# __
#@1
# 2. Take all the observations in Asia and sort them from the highest ----------
# life expectancies to the lowest.
#2@
# __
#@2
# Read the qelp docs on arrange():
?qelp::arrange
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# ----- slice() -----
# slice() lets you select rows by their integer locations. So if you
# want to select the second row of 'gapminder':
gapminder %>%
slice(2)
## # A tibble: 1 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1957 30.3 9240934 821.
# If you want to select the second, third, and fourth rows:
gapminder %>%
slice(c(2, 3, 4))
## # A tibble: 3 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1957 30.3 9240934 821.
## 2 Afghanistan Asia 1962 32.0 10267083 853.
## 3 Afghanistan Asia 1967 34.0 11537966 836.
# Or equivalently:
gapminder %>%
slice(2:4)
## # A tibble: 3 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1957 30.3 9240934 821.
## 2 Afghanistan Asia 1962 32.0 10267083 853.
## 3 Afghanistan Asia 1967 34.0 11537966 836.
# 3. Select row 853 to row 864. ------------------------------------------------
#3@
# __
#@3
# Two related functions are slice_head() and slice_tail().
# slice_head() takes the first 'n' rows of a tibble:
gapminder %>%
slice_head(n = 2)
## # A tibble: 2 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
# slice_tail() takes the last 'n' rows of a tibble:
gapminder %>%
slice_tail(n = 2)
## # A tibble: 2 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Zimbabwe Africa 2002 40.0 11926563 672.
## 2 Zimbabwe Africa 2007 43.5 12311143 470.
# 4. Take all the observations in Asia and return the five with the ------------
# highest life expectancies (hint: use arrange()).
#4@
# __
#@4
# 5. Which (one) country has the highest life expectancy in each continent?-----
# (hint: use arrange() and also group_by()).
#5@
# __
#@5
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# Great work! You're one step closer to tidyverse enlightenment. Make sure to
# return to this topic to meditate on it later.
# If you're ready, you can move on to koan 7: left_join.