13 Data for Edmans, Garcia, and Norli (2007); DellaVigna and Pollet (2009)

Part 1: Edmans, Garcia, and Norli (2007)

There are two sources of data for this replication:

  1. International soccer match results
  2. Daily national stock-market returns

1.1 Soccer Results

We’ll use this data set: International soccer scores since 1872 (Kaggle/Github). It looks like this:

date home_team away_team home_score away_score tournament
1872-11-30 Scotland England 0 0 Friendly
1873-03-08 England Scotland 4 2 Friendly
1874-03-07 Scotland England 2 1 Friendly

But we need it in a “longer” format like this, with one row per team and whether they won or lost:

date team outcome tournament
1872-11-30 Scotland tie Friendly
1872-11-30 England tie Friendly
1873-03-08 England win Friendly
1873-03-08 Scotland lose Friendly
1874-03-07 Scotland win Friendly
1874-03-07 England lose Friendly

The solution is to use pivot_longer from dplyr:

library(tidyverse)

# We'll focus only on major tournaments:
tournaments <- c("FIFA World Cup", "UEFA Euro", "Copa América",
                 "AFC Asian Cup", "Africa Cup of Nations",
                 "CONCACAF Gold Cup")

soccer <- read_csv("https://raw.githubusercontent.com/martj42/international_results/master/results.csv") %>%
  mutate(
    home_outcome = case_when(
      home_score > away_score ~ "win",
      home_score < away_score ~ "lose",
      .default = "tie"
    ),
    away_outcome = case_when(
      home_score < away_score ~ "win",
      home_score > away_score ~ "lose",
      .default = "tie"
    )
  ) %>%
  pivot_longer(
    cols = c(home_team, away_team, home_outcome, away_outcome),
    names_to = c("home_away", ".value"),
    names_pattern = "(home|away)_(team|outcome)"
  ) %>%
  select(date, tournament, team, outcome) %>%
  filter(tournament %in% tournaments) %>%
  mutate(
    # Fix names for the join
    team = case_when(
      team == "China PR" ~ "China",
      team %in% c("England","Scotland","Wales","Northern Ireland") ~ "United Kingdom",
      team == "Republic of Ireland" ~ "Ireland",
      team == "Korea" ~ "South Korea",
      .default = team
    )
  )

# Write this dataset to your computer so you can
# use it on Wednesday.
soccer %>% write_csv("edmans-soccer.csv")

Dplyr and ggplot2

Here are a few dplyr and ggplot2 questions so you can gain familiarity with this soccer data set.

library(tidyverse)
soccer <- read_csv("edmans-soccer.csv")

# Question 1: What years had a FIFA World Cup?


# Question 2: How many different countries are in this data set?


# Question 3: How many different countries have at least 
# 40 games in this data set?


# Question 4: What teams won the most games in the 
# 1974 FIFA World Cup?


# Question 5: Visualize the distribution of outcomes
# (win, lose, and tie) for Brazil since 1970.

1.2 Stock Market Returns (1973-present)

Edmans, Garcia, and Norli (2007) is an analysis on how a country’s soccer wins and losses effect the stock markets in those countries. So the next thing to do is to get a daily national equity index for as many countries as possible.

library(DBI)
library(RPostgres)

# Enter your WRDS username and password. 
# Once you successfully connect and get the
# data, delete your username and password
# from this document: I don't want that
# information to be in the document you
# share and turn in.
wrds <- dbConnect(
  Postgres(),
  host = "wrds-pgdata.wharton.upenn.edu",
  port = 9737, # for WRDS PostgreSQL
  dbname = "wrds",
  sslmode = "require",
  user = "___",
  password = "___"
)

sql <- paste0("
  SELECT *
  FROM wrdsapps_windices.dwcountryreturns
  WHERE date BETWEEN '1973-01-01' AND '2025-12-31'
  ORDER BY fic, date
")

# Write this data set to your computer so you can use it on Wednesday.
dbGetQuery(wrds, sql) %>%
  as_tibble() %>%
  select(country, fic, date, ret = portret) %>%
  write_csv("edmans_stocks.csv")
stocks <- read_csv("edmans_stocks.csv")

# Question 6: What is the maximum and minimum date in `stocks`?


# Question 7: Visualize a time series of all world
# indices. What shocks were shared globally?


# Question 8: join `stocks` and `soccer`.
joined_data <- stocks %>%
  mutate(country = str_to_title(country)) %>%
  full_join(___, join_by(___))

# Question 9: Filter `joined_data` for Australia during FIFA World Cups.

Part 2: DellaVigna and Pollet (2009)

This paper is all about earnings announcements and how it impacts stock returns, especially with respect to whether the earnings announcement happened on a Friday.

We’ll get two sources of data from WRDS:

  1. Earnings announcements events
  2. Daily stock returns
# First: earnings announcement events.
sql_events <- paste(
  "SELECT",
  "    e.gvkey,",
  "    e.datadate,",
  "    e.fyr,",
  "    e.fqtr,",
  "    e.rdq AS announcement_date,",
  "    e.tic,",
  "    e.cusip,",
  "    e.conm,",
  "    e.epspxq,",
  "    e.epsfxq,",
  "    e.cshoq,",
  "    e.prccq,",
  "    e.atq,",
  "    e.saleq,",
  "    l.lpermno AS permno",
  "FROM comp.fundq e",
  "JOIN crsp.ccmxpf_linktable l",
  "  ON e.gvkey = l.gvkey",
  "WHERE e.indfmt = 'INDL'",
  "  AND e.datafmt = 'STD'",
  "  AND e.popsrc = 'D'",
  "  AND e.consol = 'C'",
  "  AND e.rdq IS NOT NULL",
  "  AND e.epspxq IS NOT NULL",
  "  AND e.datadate BETWEEN '2017-01-01' AND '2025-01-01'",
  "  AND l.lpermno IS NOT NULL",
  "  AND l.linktype IN ('LU','LC')",
  "  AND l.linkprim IN ('P','C')",
  "  AND e.rdq >= l.linkdt",
  "  AND e.rdq <= COALESCE(l.linkenddt, DATE '2100-01-01')",
  sep = "\n"
)

events <- dbGetQuery(wrds, sql_events)

write_csv(events, "dellavigna-announcements.csv")

# Second: daily stock returns
permnos <- events %>%
  pull(permno) %>%
  unique()

permno_chunks <- split(permnos, ceiling(seq_along(permnos) / 500))

min_date <- min(as.Date(events$announcement_date)) - 5
max_date <- max(as.Date(events$announcement_date)) + 75

# progress bar
pb <- progress::progress_bar$new(
  format = "  pulling CRSP [:bar] :current/:total (:percent) eta: :eta",
  total = length(permno_chunks),
  clear = FALSE,
  width = 80
)

daily <- imap_dfr(permno_chunks, function(chunk, i) {
  pb$tick()
  permnos_string <- paste(chunk, collapse = ",")
  sql_daily <- paste(
    "SELECT",
    "    permno,",
    "    date,",
    "    ret,",
    "    prc,",
    "    vol,",
    "    shrout",
    "FROM crsp.dsf",
    paste0("WHERE date BETWEEN '", min_date, "' AND '", max_date, "'"),
    paste0("  AND permno IN (", permnos_string, ")"),
    "  AND ABS(prc) >= 5",
    "  AND ret IS NOT NULL",
    sep = "\n"
  )
  out <- DBI::dbGetQuery(wrds, sql_daily)
  # be polite to WRDS
  Sys.sleep(0.25)
  out
}) %>%
  mutate(
    date = as.Date(date),
    price = abs(prc)
  )

write_csv(daily, "dellavigna-stocks.csv")

# You can now disconnect from WRDS.
dbDisconnect(wrds)

Joining earnings announcements with daily stocks

The join is fuzzy (daily stocks up to 5 days before and up to 75 days after an earnings announcement event), and the daily stocks data set is huge. For these reasons, we’ll use data.table() instead of the tidyverse for big gains in speed.

library(data.table)

events <- read_csv("dellavigna-announcements.csv") %>%
  select(
    permno,
    announcement_date,
    earnings_news = epspxq,
    fiscal_quarter = datadate,
    company_name = conm
  ) %>%
  mutate(
    window_start = announcement_date - 5,
    window_end   = announcement_date + 75
  ) %>%
  filter(wday(announcement_date) %in% 2:6) %>%
  as.data.table()

daily <- read_csv("dellavigna-stocks.csv") %>%
  filter(
    date >= min(events$announcement_date) - 5,
    date <= max(events$announcement_date) + 75
  ) %>%
  select(permno, date, ret, price) %>%
  as.data.table()

setkey(daily, permno, date)
setkey(events, permno, window_start, window_end)

joined_data <- daily[
  events,
  on = .(permno,
         date >= window_start,
         date <= window_end),
  nomatch = 0L,
  allow.cartesian = TRUE,
  .(
    permno = x.permno,
    date = x.date,
    ret = x.ret,
    price = x.price,
    announcement_date = i.announcement_date,
    earnings_news = i.earnings_news,
    fiscal_quarter = i.fiscal_quarter,
    company_name = i.company_name
  )
]

setorder(joined_data, permno, announcement_date, date)

joined_data[, event_time :=
  seq_len(.N) - match(announcement_date[1], date),
  by = .(permno, announcement_date)
]

# Write the joined data set to your computer.
joined_data %>% 
  as_tibble() %>% 
  write_csv("dellavigna_event_study.csv")

Taking a Look at the DellaVigna and Pollet (2009) Data

eventstudy <- read_csv("dellavigna_event_study.csv")

# Question 10: Visualize a time series of "ORACLE CORP" returns before 2018.
# Add red dots for earnings announcements. Is there usually 
# a strong and immediate price adjustment to the new information?
eventstudy %>%
  filter(___) %>%
  ggplot(aes(x = ___, y = ___)) +
  geom_line() +
    geom_point(
    data = ~ distinct(.x, announcement_date, earnings_news),
    aes(x = announcement_date, y = earnings_news/10),
    color = "red",
    size = 3
  )

# Question 11: Do the same analysis for ALPHABET INC (google's 
# parent company). Do you see a similar pattern?

Download this assignment

Here’s a link to download this assignment.