library(tidyverse)
# We'll focus only on major tournaments:
tournaments <- c("FIFA World Cup", "UEFA Euro", "Copa América",
"AFC Asian Cup", "Africa Cup of Nations",
"CONCACAF Gold Cup")
soccer <- read_csv("https://raw.githubusercontent.com/martj42/international_results/master/results.csv") %>%
mutate(
home_outcome = case_when(
home_score > away_score ~ "win",
home_score < away_score ~ "lose",
.default = "tie"
),
away_outcome = case_when(
home_score < away_score ~ "win",
home_score > away_score ~ "lose",
.default = "tie"
)
) %>%
pivot_longer(
cols = c(home_team, away_team, home_outcome, away_outcome),
names_to = c("home_away", ".value"),
names_pattern = "(home|away)_(team|outcome)"
) %>%
select(date, tournament, team, outcome) %>%
filter(tournament %in% tournaments) %>%
mutate(
# Fix names for the join
team = case_when(
team == "China PR" ~ "China",
team %in% c("England","Scotland","Wales","Northern Ireland") ~ "United Kingdom",
team == "Republic of Ireland" ~ "Ireland",
team == "Korea" ~ "South Korea",
.default = team
)
)
# Write this dataset to your computer so you can
# use it on Wednesday.
soccer %>% write_csv("edmans-soccer.csv")13 Data for Edmans, Garcia, and Norli (2007); DellaVigna and Pollet (2009)
Part 1: Edmans, Garcia, and Norli (2007)
There are two sources of data for this replication:
- International soccer match results
- Daily national stock-market returns
1.1 Soccer Results
We’ll use this data set: International soccer scores since 1872 (Kaggle/Github). It looks like this:
| date | home_team | away_team | home_score | away_score | tournament |
|---|---|---|---|---|---|
| 1872-11-30 | Scotland | England | 0 | 0 | Friendly |
| 1873-03-08 | England | Scotland | 4 | 2 | Friendly |
| 1874-03-07 | Scotland | England | 2 | 1 | Friendly |
But we need it in a “longer” format like this, with one row per team and whether they won or lost:
| date | team | outcome | tournament |
|---|---|---|---|
| 1872-11-30 | Scotland | tie | Friendly |
| 1872-11-30 | England | tie | Friendly |
| 1873-03-08 | England | win | Friendly |
| 1873-03-08 | Scotland | lose | Friendly |
| 1874-03-07 | Scotland | win | Friendly |
| 1874-03-07 | England | lose | Friendly |
The solution is to use pivot_longer from dplyr:
Dplyr and ggplot2
Here are a few dplyr and ggplot2 questions so you can gain familiarity with this soccer data set.
library(tidyverse)
soccer <- read_csv("edmans-soccer.csv")
# Question 1: What years had a FIFA World Cup?
# Question 2: How many different countries are in this data set?
# Question 3: How many different countries have at least
# 40 games in this data set?
# Question 4: What teams won the most games in the
# 1974 FIFA World Cup?
# Question 5: Visualize the distribution of outcomes
# (win, lose, and tie) for Brazil since 1970.1.2 Stock Market Returns (1973-present)
Edmans, Garcia, and Norli (2007) is an analysis on how a country’s soccer wins and losses effect the stock markets in those countries. So the next thing to do is to get a daily national equity index for as many countries as possible.
library(DBI)
library(RPostgres)
# Enter your WRDS username and password.
# Once you successfully connect and get the
# data, delete your username and password
# from this document: I don't want that
# information to be in the document you
# share and turn in.
wrds <- dbConnect(
Postgres(),
host = "wrds-pgdata.wharton.upenn.edu",
port = 9737, # for WRDS PostgreSQL
dbname = "wrds",
sslmode = "require",
user = "___",
password = "___"
)
sql <- paste0("
SELECT *
FROM wrdsapps_windices.dwcountryreturns
WHERE date BETWEEN '1973-01-01' AND '2025-12-31'
ORDER BY fic, date
")
# Write this data set to your computer so you can use it on Wednesday.
dbGetQuery(wrds, sql) %>%
as_tibble() %>%
select(country, fic, date, ret = portret) %>%
write_csv("edmans_stocks.csv")stocks <- read_csv("edmans_stocks.csv")
# Question 6: What is the maximum and minimum date in `stocks`?
# Question 7: Visualize a time series of all world
# indices. What shocks were shared globally?
# Question 8: join `stocks` and `soccer`.
joined_data <- stocks %>%
mutate(country = str_to_title(country)) %>%
full_join(___, join_by(___))
# Question 9: Filter `joined_data` for Australia during FIFA World Cups.Part 2: DellaVigna and Pollet (2009)
This paper is all about earnings announcements and how it impacts stock returns, especially with respect to whether the earnings announcement happened on a Friday.
We’ll get two sources of data from WRDS:
- Earnings announcements events
- Daily stock returns
# First: earnings announcement events.
sql_events <- paste(
"SELECT",
" e.gvkey,",
" e.datadate,",
" e.fyr,",
" e.fqtr,",
" e.rdq AS announcement_date,",
" e.tic,",
" e.cusip,",
" e.conm,",
" e.epspxq,",
" e.epsfxq,",
" e.cshoq,",
" e.prccq,",
" e.atq,",
" e.saleq,",
" l.lpermno AS permno",
"FROM comp.fundq e",
"JOIN crsp.ccmxpf_linktable l",
" ON e.gvkey = l.gvkey",
"WHERE e.indfmt = 'INDL'",
" AND e.datafmt = 'STD'",
" AND e.popsrc = 'D'",
" AND e.consol = 'C'",
" AND e.rdq IS NOT NULL",
" AND e.epspxq IS NOT NULL",
" AND e.datadate BETWEEN '2017-01-01' AND '2025-01-01'",
" AND l.lpermno IS NOT NULL",
" AND l.linktype IN ('LU','LC')",
" AND l.linkprim IN ('P','C')",
" AND e.rdq >= l.linkdt",
" AND e.rdq <= COALESCE(l.linkenddt, DATE '2100-01-01')",
sep = "\n"
)
events <- dbGetQuery(wrds, sql_events)
write_csv(events, "dellavigna-announcements.csv")
# Second: daily stock returns
permnos <- events %>%
pull(permno) %>%
unique()
permno_chunks <- split(permnos, ceiling(seq_along(permnos) / 500))
min_date <- min(as.Date(events$announcement_date)) - 5
max_date <- max(as.Date(events$announcement_date)) + 75
# progress bar
pb <- progress::progress_bar$new(
format = " pulling CRSP [:bar] :current/:total (:percent) eta: :eta",
total = length(permno_chunks),
clear = FALSE,
width = 80
)
daily <- imap_dfr(permno_chunks, function(chunk, i) {
pb$tick()
permnos_string <- paste(chunk, collapse = ",")
sql_daily <- paste(
"SELECT",
" permno,",
" date,",
" ret,",
" prc,",
" vol,",
" shrout",
"FROM crsp.dsf",
paste0("WHERE date BETWEEN '", min_date, "' AND '", max_date, "'"),
paste0(" AND permno IN (", permnos_string, ")"),
" AND ABS(prc) >= 5",
" AND ret IS NOT NULL",
sep = "\n"
)
out <- DBI::dbGetQuery(wrds, sql_daily)
# be polite to WRDS
Sys.sleep(0.25)
out
}) %>%
mutate(
date = as.Date(date),
price = abs(prc)
)
write_csv(daily, "dellavigna-stocks.csv")
# You can now disconnect from WRDS.
dbDisconnect(wrds)Joining earnings announcements with daily stocks
The join is fuzzy (daily stocks up to 5 days before and up to 75 days after an earnings announcement event), and the daily stocks data set is huge. For these reasons, we’ll use data.table() instead of the tidyverse for big gains in speed.
library(data.table)
events <- read_csv("dellavigna-announcements.csv") %>%
select(
permno,
announcement_date,
earnings_news = epspxq,
fiscal_quarter = datadate,
company_name = conm
) %>%
mutate(
window_start = announcement_date - 5,
window_end = announcement_date + 75
) %>%
filter(wday(announcement_date) %in% 2:6) %>%
as.data.table()
daily <- read_csv("dellavigna-stocks.csv") %>%
filter(
date >= min(events$announcement_date) - 5,
date <= max(events$announcement_date) + 75
) %>%
select(permno, date, ret, price) %>%
as.data.table()
setkey(daily, permno, date)
setkey(events, permno, window_start, window_end)
joined_data <- daily[
events,
on = .(permno,
date >= window_start,
date <= window_end),
nomatch = 0L,
allow.cartesian = TRUE,
.(
permno = x.permno,
date = x.date,
ret = x.ret,
price = x.price,
announcement_date = i.announcement_date,
earnings_news = i.earnings_news,
fiscal_quarter = i.fiscal_quarter,
company_name = i.company_name
)
]
setorder(joined_data, permno, announcement_date, date)
joined_data[, event_time :=
seq_len(.N) - match(announcement_date[1], date),
by = .(permno, announcement_date)
]
# Write the joined data set to your computer.
joined_data %>%
as_tibble() %>%
write_csv("dellavigna_event_study.csv")Taking a Look at the DellaVigna and Pollet (2009) Data
eventstudy <- read_csv("dellavigna_event_study.csv")
# Question 10: Visualize a time series of "ORACLE CORP" returns before 2018.
# Add red dots for earnings announcements. Is there usually
# a strong and immediate price adjustment to the new information?
eventstudy %>%
filter(___) %>%
ggplot(aes(x = ___, y = ___)) +
geom_line() +
geom_point(
data = ~ distinct(.x, announcement_date, earnings_news),
aes(x = announcement_date, y = earnings_news/10),
color = "red",
size = 3
)
# Question 11: Do the same analysis for ALPHABET INC (google's
# parent company). Do you see a similar pattern?Download this assignment
Here’s a link to download this assignment.