Chapter 9 Instrumental Variables

9.1 Libraries

library(AER)      # ivreg command
library(ivpack)   # robust and clustered standard errors
library(dplyr)    # data manipulation
library(ggplot2)  # graphs
library(tibble)   # nice dataframes
library(haven)    # dta files
library(stargazer)# tables
library(sandwich) # robust se
library(lmtest)   # for print robust

9.2 Functions We Might Want to Have

# function to compute robust standard errors for lm
compute_lm_rse = function(lmodel) {
  vcov = vcovHC(lmodel, "HC1")
  se = sqrt(diag(vcov))
}

# function to easily pass lm results to stargazer
lm_robust = function(lm_formula, data) {
  lm_result = lm(lm_formula, data)
  lm_se = compute_lm_rse(lm_result)
  return(list("result" = lm_result, "rse" = lm_se))
}

# summary of OLS with robust standard errors
print_lm_robust = function(lmodel){
  results_robust = coeftest(lmodel, vcov = vcovHC(lmodel, "HC1"))
  print(results_robust)
}

9.3 A Brief Review of OLS: Replicating Table 2

ajr_world = read_dta("data/ajr/maketable2/maketable2.dta") %>% 
  mutate(baseco = if_else(is.na(baseco), 0, 1)) %>% 
  mutate(africa = factor(africa),
         asia = factor(asia),
         other = factor(other))  

ajr_base = ajr_world %>% 
  filter(baseco == 1) 
lm_1 = lm_robust(logpgp95 ~ avexpr, data = ajr_world)
lm_2 = lm_robust(logpgp95 ~ avexpr, data = ajr_base)
lm_3 = lm_robust(logpgp95 ~ avexpr + lat_abst, data = ajr_world)
lm_4 = lm_robust(logpgp95 ~ avexpr + lat_abst, data = ajr_base)
lm_5 = lm_robust(logpgp95 ~ avexpr + lat_abst + africa + asia + other, 
                 data = ajr_world)
lm_6 = lm_robust(logpgp95 ~ avexpr + lat_abst + africa + asia + other, 
                 data = ajr_base)
stargazer(lm_1$result, lm_2$result, lm_3$result, lm_4$result, lm_5$result, lm_6$result, 
          type = 'latex',
          dep.var.labels = "Log GDP per Capita in 1995",
          covariate.labels = c("Expropriation Risk", "Latitude", "Africa", "Asia", "Other", "Constant"),
          omit.stat = c('f', 'ser'),
          se = list(lm_1$rse, lm_2$rse, lm_3$rse, lm_4$rse, lm_5$rse, lm_6$rse),
          title = "OLS Results")
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Mon, Sep 27, 2021 - 17:19:45

9.4 A Brief Review of Making Figures: Replicating Figure 2

ggplot(ajr_base, aes(x = avexpr, y = logpgp95)) + 
  geom_text(aes(label = shortnam), hjust = 0, vjust = 0, size = 3) + 
  geom_smooth(method = "lm", color = "Black", size = 0.5) + 
  labs(x = "Average Expropriation Risk 1985-95",
       y = "Log GDP per Capita, 1995") +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

9.5 IV: Replicating Table 4

9.5.1 Loading the data

ajr_world = read_dta("data/ajr/maketable4/maketable4.dta") %>% 
  mutate(rich4 = factor(rich4))
ajr_base = ajr_world %>% 
  filter(baseco == 1)

9.5.2 Doing it by hand…

# first stage
iv_1_fs = lm(avexpr ~ logem4, ajr_base)
print_lm_robust(iv_1_fs)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  9.34141    0.70429 13.2635 < 2.2e-16 ***
## logem4      -0.60678    0.15020 -4.0399 0.0001499 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# add predicted variable to the dataset
ajr_base = ajr_base %>% 
  mutate(avexpr_pred = iv_1_fs$fitted.values)

# second stage
iv_2_ss = lm(logpgp95 ~ avexpr_pred, ajr_base)
print_lm_robust(iv_2_ss)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  1.90967    0.75692  2.5229   0.01422 *  
## avexpr_pred  0.94428    0.12026  7.8522 7.142e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note that the robust standard errors are still not correct, because we did not account for the 2SLS standard error correction. The next command will do all of this for us.

9.5.3 The ivreg Command

iv_1 = ivreg(logpgp95 ~ avexpr | logem4, data = ajr_base)
summary(iv_1)
## 
## Call:
## ivreg(formula = logpgp95 ~ avexpr | logem4, data = ajr_base)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.44903 -0.56242  0.07311  0.69564  1.71752 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.9097     1.0267   1.860   0.0676 .  
## avexpr        0.9443     0.1565   6.033  9.8e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9483 on 62 degrees of freedom
## Multiple R-Squared: 0.187,   Adjusted R-squared: 0.1739 
## Wald test: 36.39 on 1 and 62 DF,  p-value: 9.799e-08

If we want to use robust or clustered standard errors, we can use the functionalities from the ivpack.

# print robust standard errors
robust.se(iv_1)
## [1] "Robust Standard Errors"
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  1.90967    1.17395  1.6267    0.1089    
## avexpr       0.94428    0.17610  5.3623 1.289e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Again, we can write a wrapper function that gives us both the model as well as the robust standard errors so that we can easily pass them to stargazer. Unfortunately, it is not perfect yet, because it also prints out some stuff.

# function to easily pass robust iv results to stargazer
iv_robust = function(iv_formula, data_iv) {
  iv_result = ivreg(formula = iv_formula, data = data_iv)
  iv_rse = robust.se(iv_result)[, 2]
  return(list("result" = iv_result, "rse" = iv_rse))
}

OK, now let’s replicate table 4.

iv_1 = iv_robust(logpgp95 ~ avexpr | logem4, ajr_base)
## [1] "Robust Standard Errors"
iv_2 = iv_robust(logpgp95 ~ avexpr + lat_abst | logem4  + lat_abst, ajr_base)
## [1] "Robust Standard Errors"
ajr_noneo = ajr_base %>% 
  filter(!rich4 == 1)

iv_3 = iv_robust(logpgp95 ~ avexpr | logem4, ajr_noneo)
## [1] "Robust Standard Errors"
iv_4 = iv_robust(logpgp95 ~ avexpr + lat_abst | logem4  + lat_abst, ajr_noneo)
## [1] "Robust Standard Errors"
stargazer(iv_1$result, iv_2$result, iv_3$result, iv_4$result, 
          se = list(iv_1$rse, iv_2$rse, iv_3$rse, iv_4$rse),
          dep.var.labels = "Log GDP in 1995",
          covariate.labels = c("Expropriation Risk", "Latitude", "Constant"),
          omit.stat = "ser",
          title = "Instrumental Variable Results", 
          type = "latex")
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Mon, Sep 27, 2021 - 17:19:46

9.6 Adding Statistics to Stargazer

So far, we have not talked at all about additional test statistics. To look at those for our first specification, we can do this as follows:

# generate value of robust f-test
iv_1_fs = lm_robust(avexpr ~ logem4, ajr_base)
iv_1_fs_null = lm_robust(avexpr ~ 1, ajr_base)
f_iv_1 = waldtest(iv_1_fs$result, iv_1_fs_null$result, 
                  vcov = vcovHC(iv_1_fs$result, type="HC1"))$F[2]
## Error in waldtest(iv_1_fs$result, iv_1_fs_null$result, vcov = vcovHC(iv_1_fs$result, : unused argument (vcov = vcovHC(iv_1_fs$result, type = "HC1"))
stargazer(iv_1_fs$result,
          se = list(iv_1_fs$rse),
          add.lines = list(c("Robust F", format(round(f_iv_1, 2), nsmall = 2))),
          dep.var.labels = "Expropriation Risk",
          covariate.labels = c("Log of Settler Mortality", "Constant"),
          title = "First Stage",
          omit.stat = c("ser", "f"),
          type = "latex")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'format': object 'f_iv_1' not found