K06_dplyr3.R

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#                   Intro to the Tidyverse by Colleen O'Briant
#                           Koan #6: arrange and slice
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# In order to progress:
# 1. Read all instructions carefully.
# 2. When you come to an exercise, fill in the blank, un-comment the line
#    (Ctrl/Cmd Shift C), and execute the code in the console (Ctrl/Cmd Return).
#    If the piece of code spans multiple lines, highlight the whole chunk or
#    simply put your cursor at the end of the last line.
# 3. Save (Ctrl/Cmd S).
# 4. Test that your answers are correct (Ctrl/Cmd Shift T).

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# In this koan, you'll learn the final two dplyr verbs: arrange() and slice().

# Run this code to get started.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(gapminder)

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                             ----- arrange() -----

# arrange() rearranges the rows of your tibble by the variable you specify.

gapminder %>%
  arrange(pop)

## # A tibble: 1,704 × 6
##    country               continent  year lifeExp   pop gdpPercap
##    <fct>                 <fct>     <int>   <dbl> <int>     <dbl>
##  1 Sao Tome and Principe Africa     1952    46.5 60011      880.
##  2 Sao Tome and Principe Africa     1957    48.9 61325      861.
##  3 Djibouti              Africa     1952    34.8 63149     2670.
##  4 Sao Tome and Principe Africa     1962    51.9 65345     1072.
##  5 Sao Tome and Principe Africa     1967    54.4 70787     1385.
##  6 Djibouti              Africa     1957    37.3 71851     2865.
##  7 Sao Tome and Principe Africa     1972    56.5 76595     1533.
##  8 Sao Tome and Principe Africa     1977    58.6 86796     1738.
##  9 Djibouti              Africa     1962    39.7 89898     3021.
## 10 Sao Tome and Principe Africa     1982    60.4 98593     1890.
## # … with 1,694 more rows

# The code above rearranges 'gapminder' to put the observations with the
# lowest population first, so that the variable 'pop' is ascending.

# If instead you wanted 'pop' to be descending (the observations with the
# largest populations first), you can use 'desc()':

gapminder %>%
  arrange(desc(pop))

## # A tibble: 1,704 × 6
##    country continent  year lifeExp        pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>      <int>     <dbl>
##  1 China   Asia       2007    73.0 1318683096     4959.
##  2 China   Asia       2002    72.0 1280400000     3119.
##  3 China   Asia       1997    70.4 1230075000     2289.
##  4 China   Asia       1992    68.7 1164970000     1656.
##  5 India   Asia       2007    64.7 1110396331     2452.
##  6 China   Asia       1987    67.3 1084035000     1379.
##  7 India   Asia       2002    62.9 1034172547     1747.
##  8 China   Asia       1982    65.5 1000281000      962.
##  9 India   Asia       1997    61.8  959000000     1459.
## 10 China   Asia       1977    64.0  943455000      741.
## # … with 1,694 more rows

# 1. Take all the observations in Asia and sort them from the lowest -----------
# life expectancies to the highest.

#1@

# __

#@1

# 2. Take all the observations in Asia and sort them from the highest ----------
# life expectancies to the lowest.

#2@

# __

#@2

# Read the qelp docs on arrange():

?qelp::arrange

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

#                              ----- slice() -----

# slice() lets you select rows by their integer locations. So if you
# want to select the second row of 'gapminder':

gapminder %>%
  slice(2)

## # A tibble: 1 × 6
##   country     continent  year lifeExp     pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>   <int>     <dbl>
## 1 Afghanistan Asia       1957    30.3 9240934      821.

# If you want to select the second, third, and fourth rows:

gapminder %>%
  slice(c(2, 3, 4))

## # A tibble: 3 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1957    30.3  9240934      821.
## 2 Afghanistan Asia       1962    32.0 10267083      853.
## 3 Afghanistan Asia       1967    34.0 11537966      836.

# Or equivalently:

gapminder %>%
  slice(2:4)

## # A tibble: 3 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1957    30.3  9240934      821.
## 2 Afghanistan Asia       1962    32.0 10267083      853.
## 3 Afghanistan Asia       1967    34.0 11537966      836.

# 3. Select row 853 to row 864. ------------------------------------------------

#3@

# __

#@3

# Two related functions are slice_head() and slice_tail().

# slice_head() takes the first 'n' rows of a tibble:

gapminder %>%
  slice_head(n = 2)

## # A tibble: 2 × 6
##   country     continent  year lifeExp     pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>   <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8 8425333      779.
## 2 Afghanistan Asia       1957    30.3 9240934      821.

# slice_tail() takes the last 'n' rows of a tibble:

gapminder %>%
  slice_tail(n = 2)

## # A tibble: 2 × 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Zimbabwe Africa     2002    40.0 11926563      672.
## 2 Zimbabwe Africa     2007    43.5 12311143      470.

# 4. Take all the observations in Asia and return the five with the ------------
# highest life expectancies (hint: use arrange()).

#4@

# __

#@4

# 5. Which (one) country has the highest life expectancy in each continent?-----
# (hint: use arrange() and also group_by()).

#5@

# __

#@5

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# Great work! You're one step closer to tidyverse enlightenment. Make sure to
# return to this topic to meditate on it later.

# If you're ready, you can move on to koan 7: left_join.

K06_dplyr3.R

colleenobriant

2022-08-24