Studying Policy Design Quality in Comparative Perspective

Xavier Fernández i Marín, Christoph Knill, Yves Steinebach

2021-03-11 09:19:16

Introduction

This report presents the data analysis for “Studying Policy Design Quality in Comparative Perspective”, published in the American Review of Political Science.

First it contains a brief chapter with the description of the data, and reproduces the figures in the article and the online appendix.

Then in contains two chapters on the analysis of environmental performance, one with the main model and another one with the robustness check from a variation of the main model.

Later, it contains four chapters on the analysis of portfolio diversity, one with the main model and three more with various robustness checks for different specifications.

Finally, one chapter compares the different robustness models of portfolio diversity against the main model.

Data description

library(PolicyPortfolios)
data(consensus)

Show the portfolios of selected countries / years for illustration purposes.

D <- consensus %>%
  filter(Sector == "Environmental") %>%
  droplevels() %>%
  pp_measures()

consensus.without.labels <- consensus %>%
  mutate(Target = as.factor(as.numeric(Target))) %>%
  mutate(Instrument = as.factor(as.numeric(Instrument)))


rM <- pp_array(filter(consensus, Sector == "Environmental" & Country == "France" & Year == 1976), return_matrix = TRUE)

manual.aid.fr.1976 <- round(D$value[D$Country == "France" & D$Year == 1976 & D$Sector == "Environmental" & D$Measure == "Div.aid"], 3)
manual.aid.fr.2005 <- round(D$value[D$Country == "France" & D$Year == 2005 & D$Sector == "Environmental" & D$Measure == "Div.aid"], 3)
manual.aid.us.1976 <- round(D$value[D$Country == "United States" & D$Year == 1976 & D$Sector == "Environmental" & D$Measure == "Div.aid"], 3)
manual.aid.us.2005 <- round(D$value[D$Country == "United States" & D$Year == 2005 & D$Sector == "Environmental" & D$Measure == "Div.aid"], 3)

# France: growth in size and diversity
f1 <- pp_plot(droplevels(filter(consensus.without.labels, Sector == "Environmental")), 
              id = list(Country = "France", Year = 1976),
              subtitle = FALSE, caption = "") +
  labs(subtitle = paste0("AID: ", manual.aid.fr.1976)) +
  theme(plot.subtitle = element_text(size = 3.0))
f2 <- pp_plot(droplevels(filter(consensus.without.labels, Sector == "Environmental")), 
              id = list(Country = "France", Year = 2005),
              subtitle = FALSE, caption = "") +
  labs(subtitle = paste0("AID: ", manual.aid.fr.2005)) +
  theme(plot.subtitle = element_text(size = 3.0))
#grid.arrange(f1, f2, ncol = 2)

f3 <- pp_plot(droplevels(filter(consensus.without.labels, Sector == "Environmental")), 
              id = list(Country = "United States", Year = 1976),
              subtitle = FALSE, caption = "") +
  labs(subtitle = paste0("AID: ", manual.aid.us.1976)) +
  theme(plot.subtitle = element_text(size = 3.0))
f4 <- pp_plot(droplevels(filter(consensus.without.labels, Sector == "Environmental")), 
              id = list(Country = "United States", Year = 2005),
              subtitle = FALSE, caption = "") +
  labs(subtitle = paste0("AID: ", manual.aid.us.2005)) +
  theme(plot.subtitle = element_text(size = 3.0))
#grid.arrange(f1, f2, ncol = 2)

grid.arrange(f1, f2, f3, f4, ncol = 2)
Example portfolios comparing Average Instrument Diversity. France: +size +diversity; United States: +size =diversity.

Figure 1: Example portfolios comparing Average Instrument Diversity. France: +size +diversity; United States: +size =diversity.

Description of the distribution of Average Instrument Diversity.

consensus %>%
  filter(Sector == "Environmental") %>%
  droplevels() %>%
  pp_measures() %>%
  filter(Measure == "Div.aid") %>%
  select(Country, Year, Measure.label, value) %>%
  spread(Measure.label, value) %>%
  ggplot(aes(y = reorder(Country, `Diversity (Average Instrument Diversity)`, mean))) +
  geom_boxplot(aes(x = `Diversity (Average Instrument Diversity)`)) +
  expand_limits(x = c(0, 1)) +
  xlab("Average Instrument Diversity") +
  ylab("Country")

Figure 2: Descriptive statistics for portfolio diversity (Average Instrument Diversity). Environmental sector.

Descriptive statistics for portfolio diversity (Average Instrument Diversity). Environmental sector.
consensus %>%
  filter(Sector == "Environmental") %>%
  droplevels() %>%
  pp_measures() %>%
  filter(Measure == "Div.aid") %>%
ggplot(aes(x = Year, y = value)) +
  geom_line() +
  ylab("Diversity (Average Instrument Diversity") +
  facet_wrap(~ Country) +
  ggtitle("Diversity (Average Instrument Diversity)")

Figure 3: Temporal evolution of average instrument diversity, by country.

Temporal evolution of average instrument diversity, by country.

Several measures of diversity, compared

consensus %>%
  filter(Sector == "Environmental") %>%
  droplevels() %>%
  pp_measures() %>%
  filter(Measure %in% c("Size", "Div.aid", "Div.gs", "C.eq", "Eq.sh")) %>%
  mutate(Measure = Measure.label) %>%
  select(Country, Sector, Year, Measure, value) %>%
  spread(Measure, value) %>%
  gather(Measure, Diversity, -c(Country, Sector, Year,
            `Diversity (Average Instrument Diversity)`,
            `Portfolio size`)) %>%
  ggplot(aes(x = `Diversity (Average Instrument Diversity)`,
             y = Diversity,
             color = Measure,
             size = `Portfolio size`)) +
  geom_point(alpha = 0.5) +
  facet_grid(Measure ~ Sector)
Average instrument diversity compared against other measures of diversity, by sector.

Figure 4: Average instrument diversity compared against other measures of diversity, by sector.

Explanatory model of performance

library(PolicyPortfolios)
data(consensus)

ca <- consensus %>%
  filter(Sector == "Environmental") %>%
  droplevels() %>%
  pp_measures() %>%
  filter(Measure %in% c("Div.aid", "Size")) %>%
  select(-Measure.label) %>%
  spread(Measure, value) %>%
  rename(Diversity = Div.aid)


countries <- country.coverage <- as.character(levels(ca$Country))
nC <- length(countries)
years <- range(ca$Year)
save(countries, years, country.coverage, file = "details.RData")

Performance

perf <- foreign::read.dta("jahn/PoEP_Replication_Data/Environmental_Performance_Chapter5.dta") %>%
  as_tibble() %>%
  select(Country = country, Year = year, 
         General = PolGen100,
         Water = PolWat100,
         Mundane = Mundane100,
         Successfully = Success100,
         Specific1980 = LUPI82_1200,
         Specific2010 = LUPI07_1200) %>%
  mutate(Country = as.character(Country)) %>%
  mutate(Country = ifelse(Country == "UK", "United Kingdom", Country)) %>%
  mutate(Country = ifelse(Country == "US", "United States", Country)) %>%
  filter(Year %in% 1980:2010)

Covariates

World Development Indicators - Revenue.

load("wdi/wdi-tax.RData")

tax.rev.l <- tax.rev %>%
  select(Country = country, tax.revenue = GC.TAX.TOTL.GD.ZS, Year = year) %>%
  group_by(Country) %>%
  summarize(tax.revenue = median(tax.revenue, na.rm = TRUE))

World Development Indicators:

countries <- as.character(levels(ca$Country))
load("wdi/wdi.RData")
wdi <- wdi[,c("country", "year", "gdp", "population", "gdp.capita", "trade")]
wdi <- subset(wdi, year >= 1976 & year <=2005)
wdi$country[wdi$country=="Korea, Rep."] <- "Korea, Republic of"
wdi <- subset(wdi, country %in% countries)

# GDP pc in Ireland is bad
ireland.wdi <- subset(wdi, country=="Ireland")

# So we use the combination of GDP and population 
# to make a regression against the observed GDP per capita 
# and impute accordingly.
ireland.wdi <- cbind(ireland.wdi, gdp.capita.div = ireland.wdi$gdp/ireland.wdi$population)
m.ireland <- lm(gdp.capita ~ gdp.capita.div, data=ireland.wdi)
wdi$gdp.capita[wdi$country=="Ireland" & is.na(wdi$gdp.capita)] <-
  predict(m.ireland, ireland.wdi)[1:(length(predict(m.ireland, ireland.wdi)) - (length(predict(m.ireland))))]

ireland.wdi <- subset(wdi, country=="Ireland")
ireland.wdi <- cbind(ireland.wdi, gdp.capita.div = ireland.wdi$gdp/ireland.wdi$population)
m.ireland <- lm(gdp.capita ~ year, data=ireland.wdi)
wdi$gdp.capita[wdi$country=="Ireland" & is.na(wdi$gdp.capita)] <-
  predict(m.ireland, ireland.wdi)[1:(length(predict(m.ireland, ireland.wdi)) - (length(predict(m.ireland))))]

# Switzerland is not so bad, but still problematic until 1979. 
# But the procedure does not work, because GDP is also missing. 
# So a simple imputation based on evolution over time is performed.
switzerland.wdi <- subset(wdi, country=="Switzerland")
switzerland.wdi <- cbind(switzerland.wdi, gdp.capita.div = switzerland.wdi$gdp/switzerland.wdi$population)
m.switzerland <- lm(gdp.capita ~ year, data=switzerland.wdi)
wdi$gdp.capita[wdi$country=="Switzerland" & is.na(wdi$gdp.capita)] <-
  predict(m.switzerland, switzerland.wdi)[1:(length(predict(m.switzerland, switzerland.wdi)) - (length(predict(m.switzerland))))]

#New Zealand only misses 1976' GDP per capita, 
# so the same procedure than with Switzerland is used.
newzealand.wdi <- subset(wdi, country=="New Zealand")
newzealand.wdi <- cbind(newzealand.wdi, gdp.capita.div = newzealand.wdi$gdp/newzealand.wdi$population)
m.newzealand <- lm(gdp.capita ~ year, data=newzealand.wdi)
wdi$gdp.capita[wdi$country=="New Zealand" & is.na(wdi$gdp.capita)] <-
  predict(m.newzealand, newzealand.wdi)[1:(length(predict(m.newzealand, newzealand.wdi)) - (length(predict(m.newzealand))))]


switzerland.wdi <- subset(wdi, country=="Switzerland")
m.switzerland <- lm(trade ~ year, data=switzerland.wdi)
wdi$trade[wdi$country=="Switzerland" & is.na(wdi$trade)] <-
  predict(m.switzerland, switzerland.wdi)[1:(length(predict(m.switzerland, switzerland.wdi)) - (length(predict(m.switzerland))))]

# GDP per capita growth
# Another way at looking at resources, is to calculate 
# how many times is the overall wealth per capita at the 
# end of the period compared to the beginning.
wdi.gdp.capita.ratio <- subset(wdi[,c("country", "year", "gdp.capita")], year==min(year) | year==max(year))
wdi.gcr.w <- wdi.gdp.capita.ratio %>%
  spread(year, gdp.capita)
wdi.gcr.w <- cbind(wdi.gcr.w, gdpc.ratio=wdi.gcr.w$`2005`/wdi.gcr.w$`1976`)


# Data is averaged by country through all years.
wdi.c <- wdi %>%
  gather(variable, value, -country, -year) %>%
  group_by(country, variable) %>%
  summarize(m = median(value, na.rm = TRUE)) %>%
  ungroup()


# Include GDP per capita growth, ratio
wdi.l <- wdi.gcr.w %>%
  select(country, gdpc.ratio) %>%
  mutate(variable = "gdpc.ratio") %>%
  rename(m = gdpc.ratio) %>%
  select(country, variable, m) %>%
  bind_rows(wdi.c)

V-Dem:

load("vdem/v_dem-ra-1950_2018.RData") # loads vdem

Government effectiveness. Data retrieved manually from the World Bank page on Governance indicators. Only the “Government Effectiveness: Estimation” is used. The data is only available between 1996 and 2005.

load("wgi/government_effectiveness.RData")

gov.eff.original <- gov.eff
gov.eff.original$country <- as.character(gov.eff.original$country)
gov.eff.original$country[gov.eff.original$country=="Korea, Rep."] <- "Korea, Republic of"
gov.eff.original$country <- as.character(gov.eff.original$country)

gov.eff.l <- gov.eff.original %>%
  rename(gov.eff = value) %>%
  gather(variable, value, -country, -year) %>%
  group_by(country, variable) %>%
  summarize(m = median(value, na.rm = TRUE))

Political constraints

load("./polcon/polcon2017.RData") # loads polcon

For the “Green parties”, data comes from Volkens (2013). It provides dates of elections as well as shares of seats of several families of parties. We generate two indicators for the “green” and “socialist” ideology of the countries, with a weighted average of the proportion of seats and the duration of each legislature.

load("./manifesto/cpm-consensus.RData")
cpm$country <- as.character(cpm$country)
cpm$country[cpm$country=="Korea"] <- "Korea, Republic of"
cpm$country[cpm$country=="Great Britain"] <- "United Kingdom"
cpm <- subset(cpm, country %in% countries)
cpm$country <- factor(cpm$country)
cpm <- subset(cpm, date>="1970-01-01" & date<="2005-12-31")

# Take only Green parties and Socialist=social democrats + communists
cpm$family <- as.character(cpm$family)
cpm$family[cpm$family=="Social democratic"] <- "Socialist"
cpm$family[cpm$family=="Communist"] <- "Socialist"
cpm <- subset(cpm, family=="Green" | family=="Socialist")
cpm$family <- factor(cpm$family)

# Aggregate duplications in Socialist
cpm <- cpm %>%
  group_by(country, date, family) %>%
  summarize(p.seats=sum(p.seats))

# Calculate the weighted means.
# Unfortunately, a ddply approach would be too complicated and a loop solves it quite quickly.
families <- c("Green", "Socialist")
wmsf <- data.frame(country=countries, Green=NA, Socialist=NA)
for (C in 1:nC) {
  for (F in 1:length(families)) {
    series <- subset(cpm, country==countries[C] & family==families[F])[,c(2, 4)]
    series <- series[order(series$date),]
    #v <- weighted.mean(series$p.seats, diff(c(as.Date("1976-01-01"), series$date)))
    v <- weighted.mean(series$p.seats, as.numeric(diff(c(as.Date("1976-01-01"), series$date))))
    v[is.nan(v)] <- 0
    wmsf[C, 1+F] <- v
  }
}

Save and arrange for analysis.

d.perf <- perf %>%
  # Delete Turkey and Korea
  filter(!Country %in% c("Korea, Republic of", "Turkey")) %>%
  # Delete Years for which we don't have data
  filter(Year >= 1980 & Year <= 2005) %>%
  gather(Indicator, Performance, -Country, -Year) %>%
  # Select specific performance indicators
  filter(Indicator %in% c("General", "Specific1980")) %>%
  droplevels()

Y <- reshape2::acast(d.perf, Indicator ~ Country ~ Year, value.var = "Performance")
nS <- dim(Y)[1]
nC <- dim(Y)[2]
nY <- dim(Y)[3]


country.label <- dimnames(Y)[[2]]
nC <- length(country.label)

indicator.label <- dimnames(Y)[[1]]
sector.label <- dimnames(Y)[[2]]
nI <- length(indicator.label)

year.label <- dimnames(Y)[[3]]
year.label.numeric <- as.integer(as.numeric(year.label))
nY <- length(year.label)



# Function to assign zeros to the fake countries (mean value)
zero.fk <- function(x, id = id.fake.countries) { # zero to fake countries
  x[id] <- 0
  return(x)
}

source("get-eu_time.R") # generates eu.ms

diversity <- ca %>%
  select(Sector, Country, Year, Diversity) %>%
  # Delete Turkey and Korea
  filter(!Country %in% c("South Korea", "Turkey")) %>%
  filter(Sector == "Environmental") %>%
  filter(Year >= 1980 & Year <= 2005) %>%
  select(-Sector) %>%
  droplevels() %>%
  reshape2::acast(Country ~ Year, value.var = "Diversity")
if ( length(which(!dimnames(diversity)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")


GDPpc <- wdi %>%
  select(country, year, gdp.capita) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(gdp.capita = std(gdp.capita)) %>%
  reshape2::acast(country ~ year, value.var = "gdp.capita")
if ( length(which(!dimnames(GDPpc)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

load("wdi/wdi-gdpgrowth.RData") # gdp.growth
gdp.growth <- gdp.growth %>%
  select(country, year, gdp.growth = NY.GDP.MKTP.KD.ZG) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(gdp.growth = std(gdp.growth)) %>%
  reshape2::acast(country ~ year, value.var = "gdp.growth")
if ( length(which(!dimnames(gdp.growth)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

load("wdi/wdi-urban.RData") # urban
urban <- urban %>%
  select(country, year, urban = SP.URB.TOTL.IN.ZS) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(urban = std(urban)) %>%
  reshape2::acast(country ~ year, value.var = "urban")
if ( length(which(!dimnames(urban)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

load("wdi/wdi-industry.RData") # industry
industry <- industry %>%
  select(country, year, industry = NV.IND.TOTL.ZS) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(industry = std(industry)) %>%
  reshape2::acast(country ~ year, value.var = "industry")
if ( length(which(!dimnames(industry)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")
industry.means <- apply(industry, 1, mean, na.rm = TRUE)

trade <- wdi %>%
  select(country, year, trade) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(trade = std(trade)) %>%
  reshape2::acast(country ~ year, value.var = "trade")
if ( length(which(!dimnames(trade)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

deliberation <- vdem %>%
  mutate(Country = as.character(Country)) %>%
  mutate(Country = ifelse(Country == "United States of America", "United States", Country)) %>%
  mutate(Country = ifelse(Country == "Germany (RDA)", "Germany", Country)) %>%
  filter(Democracy == "Deliberative") %>%
  filter(Country %in% country.label) %>%
  filter(Year >= 1980 & Year <= 2005) %>%
  mutate(value = std(value)) %>%
  select(Country, Year, value) %>%
  reshape2::acast(Country ~ Year, value.var = "value")
if ( length(which(!dimnames(deliberation)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

constraints <- polcon %>%
  mutate(country.polity = as.character(country.polity)) %>%
  mutate(country.polity = ifelse(country.polity == "Germany West", "Germany", country.polity)) %>%
  filter(country.polity %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(polcon = std(polcon)) %>%
  select(country.polity, year, polcon) %>%
  reshape2::acast(country.polity ~ year, value.var = "polcon")
if ( length(which(!dimnames(constraints)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")


gov.eff <- expand.grid(country = country.label, year = year.label.numeric) %>%
  left_join(gov.eff.original) %>%
  filter(country %in% country.label) %>%
  filter(year >= 1980 & year <= 2005) %>%
  mutate(value = std(value)) %>%
  select(country, year, value) %>%
  reshape2::acast(country ~ year, value.var = "value")
if ( length(which(!dimnames(gov.eff)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")


portfolio.size <- ca %>%
  filter(Sector == "Environmental") %>%
  filter(Country %in% country.label) %>%
  filter(Year >= 1980 & Year <= 2005) %>%
  mutate(Size = std(logit(Size))) %>%
  ungroup() %>%
  reshape2::acast(Country ~ Year, variable.var = "Size")
if ( length(which(!dimnames(portfolio.size)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

eu <- expand.grid(country = country.label, year = 1958:2020) %>%
  as_tibble() %>%
  mutate(eu = 0) %>%
  left_join(eu.ms, by = c("country" = "ms")) %>%
  mutate(eu = ifelse(year == ms.y, 1, eu)) %>%
  mutate(eu = ifelse(is.na(eu), 0, eu)) %>%
  group_by(country) %>%
  arrange(country, year) %>%
  mutate(eu = cumsum(eu)) %>%
  ungroup() %>%
  select(country, year, eu) %>%
  filter(country %in% country.label) %>%
  filter(year %in% year.label.numeric) %>%
  reshape2::acast(country ~ year, value.var = "eu")
if ( length(which(!dimnames(eu)[[1]] == country.label)) > 0) stop("Ep! There is a mistake here")

green.socialist <- cpm %>%
  ungroup() %>%
  mutate(calendar.year = as.numeric(format(date, "%Y"))) %>%
  mutate(calendar.month = as.numeric(format(date, "%m"))) %>%
  mutate(year = ifelse(calendar.month > 6, calendar.year + 1, calendar.year)) %>%
  # Manually delete some elections that are held too close
  # and assigned both to the same year
  # Delete the first one, as in case of instability it is unlikely 
  # that they have passed legislation
  filter(!(country == "Japan" & date == as.Date("1979-10-07"))) %>%
  filter(!(country == "Greece" & date == as.Date("1989-11-05"))) %>%
  filter(!(country == "Denmark" & date == as.Date("1987-09-08"))) %>%
  #
  select(country, year, family, p.seats) %>%
  full_join(expand.grid(country = country.label, 
                        year = (min(year.label.numeric)-5):max(year.label.numeric),
                        family = c("Socialist", "Green"))) %>%
  group_by(country, family) %>%
  arrange(country, family, year) %>%
  # A bit clumsy, but works
  mutate(p.seats = ifelse(is.na(p.seats), lag(p.seats), p.seats)) %>%
  mutate(p.seats = ifelse(is.na(p.seats), lag(p.seats), p.seats)) %>%
  mutate(p.seats = ifelse(is.na(p.seats), lag(p.seats), p.seats)) %>%
  mutate(p.seats = ifelse(is.na(p.seats), lag(p.seats), p.seats)) %>%
  mutate(p.seats = ifelse(is.na(p.seats), lag(p.seats), p.seats)) %>%
  ungroup() %>%
  select(country, year, family, p.seats) %>%
  filter(country %in% country.label) %>%
  filter(year %in% year.label.numeric) %>%
  mutate(p.seats = ifelse(is.na(p.seats), 0, p.seats)) %>%
  group_by(family) %>%
  mutate(p.seats = std(p.seats)) %>%
  ungroup() %>%
  unique() %>%
  reshape2::acast(family ~ country ~ year, value.var = "p.seats")
if ( length(which(!dimnames(green.socialist)[[2]] == country.label)) > 0) stop("Ep! There is a mistake here")
if (!dimnames(green.socialist)[[1]][1] == "Green" & sector.label[1] == "Environmental") stop("Ep! There is a mistake here")

# Use only the green dimension
green <- green.socialist[1,,]


DJ <- list(
  Y = unname(Y), 
  diversity = unname(diversity),
  GDPpc = unname(GDPpc),
  trade = unname(trade),
  gdp.growth = unname(gdp.growth),
  urban = unname(urban),
  industry = unname(industry),
  industry.means = industry.means,
  deliberation = unname(deliberation),
  constraints = unname(constraints),
  gov.eff = unname(gov.eff),
  gov.eff.mean.observed = unname(apply(gov.eff, 1, mean, na.rm = TRUE)),
  portfolio.size = unname(portfolio.size),
  eu = unname(eu),
  green = green,
  nC = nC,
  nI = nI,
  nY = nY)

nC   # Number of countries
## [1] 21
nS   # Number of sectors
## [1] 2
years  # Range of years
## [1] 1976 2005

Model

M <- "performance-tscs"
M.lab <- "Baseline"
m <- "model {
  #
  # Data part at the observational level
  #
  for (i in 1:nI) {
    for (c in 1:nC) {
      for (t in 2:nY) {
        Y[i,c,t] ~ dnorm(mu[i,c,t], tau[i,c,t])
        mu[i,c,t] <- #alpha[i]
                   delta[i,c] * Y[i,c,t-1]
                 + beta[1,i] * GDPpc[c,t-1]
                 + beta[2,i] * trade[c,t-1]
                 + beta[3,i] * eu[c,t-1]
                 + beta[5,i] * gdp.growth[c,t-1]
                 + beta[6,i] * urban[c,t-1]
                 + beta[7,i] * industry[c,t-1]
                 + theta[1,i] * portfolio.size[c,t]
                 + theta[2,i] * diversity[c,t]
                 + rho[i,c] * (Y[i,c,t-1] - mu[i,c,t-1] )
        tau[i,c,t] <- 1 / sigma.sq[i,c,t]
        sigma.sq[i,c,t] <- exp(
                                 lambda_c[i,c])
        resid[i,c,t] <- Y[i,c,t] - mu[i,c,t]
      }
      mu[i,c,1] <- alpha[i]
                 + beta[1,i] * GDPpc[c,1]
                 + beta[2,i] * trade[c,1]
                 + beta[3,i] * eu[c,1]
                 + beta[5,i] * gdp.growth[c,1]
                 + beta[6,i] * urban[c,1]
                 + beta[7,i] * industry[c,1]
                 + theta[1,i] * portfolio.size[c,1]
                 + theta[2,i] * diversity[c,1]
      rho[i,c] ~ dunif(-1, 1)
      delta[i,c] ~ dunif(0, 1)
      resid[i,c,1] <- Y[i,c,1] - mu[i,c,1]
    }

    #
    # Priors for variance component
    #
    lambda[1,i] ~ dnorm(0, 2^-2)
    lambda[2,i] ~ dnorm(0, 2^-2)
    
    #
    # Priors for the intercept
    #
    alpha[i] ~ dunif(0, 100)

    #
    # Priors for the control variables
    #
    for (b in 1:8) {
      beta[b,i] ~ dnorm(0, 1^-2)
    }

    #
    # Priors for main effects
    #
    for (t in 1:2) {
      theta[t,i] ~ dnorm(0, 1^-2)
    }
  }

  # Variance component, intercepts by country
  for (i in 1:nI) {
    for (c in 1:nC) {
      lambda_c[i,c] ~ dt(0, 0.1^-2, 3)
    }
  }

  #
  # Missing data
  #
  for (c in 1:nC) {
    for (t in 1:nY) {
      industry[c,t] ~ dnorm(industry.means[c], 0.05^-2)
    }
  }
}"
write(m, file= paste("models/model-", M, ".bug", sep = ""))
par <- NULL
par <- c(par, "alpha", "beta", "theta", "sigma")
par <- c(par, "lambda", "lambda_c")
par <- c(par, "delta")
par <- c(par, "nu")
par <- c(par, "rho")
par <- c(par, "resid")
inits <- list(
  list(.RNG.name="base::Super-Duper", .RNG.seed=1),
  list(.RNG.name="base::Super-Duper", .RNG.seed=2),
  list(.RNG.name="base::Wichmann-Hill", .RNG.seed=3),
  list(.RNG.name="base::Wichmann-Hill", .RNG.seed=2))
t0 <- proc.time()
rj <- run.jags(model = paste("models/model-", M, ".bug", sep = ""),
  data = dump.format(DJ, checkvalid=FALSE),
  inits = inits,
  modules = "glm",
  n.chains = 4, adapt = 1e2, burnin = 1e3, sample = 5e2, thin = 10,
  monitor = par, method = "parallel", summarise = FALSE)
s <- as.mcmc.list(rj)
save(s, file = paste("sample-", M, ".RData", sep = ""))
proc.time() - t0
load(file = paste("sample-", M, ".RData", sep = ""))
ggmcmc(ggs(s, family = "alpha|beta|theta|delta|lambda|rho"), 
       param_page = 10, file = paste("ggmcmc-full-", M, ".pdf", sep = ""))

ggmcmc(ggs(s, family = "sigma|alpha|beta|lambda|nu|rho"), 
       plot = c("traceplot", "crosscorrelation", "caterpillar"),
       param_page = 8, file = paste("ggmcmc-partial-", M, ".pdf", sep = ""))

Model results

Variance components.

L.lambda <- plab("lambda", list(Variable = c("(Intercept)", "Portfolio size"),
                                Sector = sector.label))
S.lambda <- ggs(s, family = "lambda\\[", par_labels = L.lambda)
ggs_caterpillar(S.lambda) +
  ggtitle("Variance component")

Figure 5: Variance component.

Variance component.

Variance components (country varying intercepts).

L.lambda.c <- plab("lambda_c", list(Indicator = indicator.label, Country = country.label))
S.lambda.c <- ggs(s, family = "lambda_c\\[", par_labels = L.lambda.c)
ggs_caterpillar(S.lambda.c, label = "Country") +
  facet_grid(~ Indicator) +
  ggtitle("Variance component (countries)")

Figure 6: Variance component (country varying intercepts).

Variance component (country varying intercepts).

Auto-regressive components.

L.rho <- plab("rho", list(Indicator = indicator.label, Country = country.label))
S.rho <- ggs(s, family = "rho", par_labels = L.rho)
ggs_caterpillar(S.rho, label = "Country") +
  facet_wrap(~ Indicator, scales="free") +
  aes(color=Indicator) +
  expand_limits(x = c(-1, 1)) +
  ggtitle("Auto-regressive component (countries)") +
  scale_colour_xfim()

Figure 7: Auto-regressive component.

Auto-regressive component.

Lagged dependent variable.

L.delta <- plab("delta", list(Indicator = indicator.label, Country = country.label))
S.delta <- ggs(s, family = "delta", par_labels = L.delta)
ggs_caterpillar(S.delta, label = "Country") +
  facet_wrap(~ Indicator, scales="free") +
  aes(color=Indicator) +
  ggtitle("Lagged dependent variable (countries)") +
  scale_colour_xfim()

Figure 8: Lagged dependent variable.

Lagged dependent variable.
L.thetas <- plab("theta", list(Variable = c("Portfolio size", "Diversity"),
                        Indicator = indicator.label))

L.betas <- plab("beta", list(Variable = c("GDP pc", "Trade", "EU", "Green",
                                          "GDP growth", "Urban", "Industry",
                                          "Consensus"),
                        Indicator = indicator.label))
L.betas <- bind_rows(L.thetas, L.betas)

S.betas <- ggs(s, family = "^beta\\[|^theta\\[", par_labels = L.betas) %>%
  filter(value != 0)
S.betas <- filter(S.betas, !Variable %in% c("Green", "Consensus"))

S.betas %>%
  group_by(Variable, Indicator) %>%
  summarize(median = median(value), sd = sd(value),
           `Prob > 0` = length(which(value > 0)) / n(),
    `Prob < 0` = length(which(value < 0)) / n(),
    `Mean expected effect` = mean(value)) %>%
  kable()
ggs_caterpillar(S.betas, label = "Variable") +
  facet_wrap(~ Indicator, scales="free") +
  aes(color = Indicator) +
  geom_vline(xintercept = 0, lty = 3) +
  scale_colour_xfim()
Slopes with the effects of control and main variables on performance, by performance indicator.

Figure 9: Slopes with the effects of control and main variables on performance, by performance indicator.

S.betas %>%
  filter(Indicator %in% c("General", "Specific1980")) %>%
  mutate(Variable = factor(as.character(Variable), 
                              levels = rev(c("Diversity", "Portfolio size",
                                         "EU", 
                                         "GDP pc", "GDP growth",
                                         "Industry", "Urban",
                                         "Trade")))) %>%
ggs_caterpillar(label = "Variable", sort = FALSE) +
  facet_wrap(~ Indicator, scales="free") +
  geom_vline(xintercept = 0, lty = 3) #+
Slopes with the effects of control and main variables on performance, by performance indicator.

Figure 10: Slopes with the effects of control and main variables on performance, by performance indicator.

S.betas %>%
  filter(Indicator %in% c("General")) %>%
  mutate(Variable = factor(as.character(Variable), 
                              levels = rev(c("Diversity", 
                                         "Portfolio size",
                                         "EU", 
                                         "GDP pc", "GDP growth",
                                         "Industry", "Urban",
                                         "Trade")))) %>%
ggs_caterpillar(label = "Variable", sort = FALSE) +
  geom_vline(xintercept = 0, lty = 3) +
  ggtitle("General performance")

Figure 11: Slopes with the effects of control and main variables on performance. General performance

Slopes with the effects of control and main variables on performance. General performance
S.betas %>%
  filter(Indicator %in% c("Specific1980")) %>%
  mutate(Variable = factor(as.character(Variable), 
                              levels = rev(c("Diversity", 
                                         "Portfolio size",
                                         "EU", 
                                         "GDP pc", "GDP growth",
                                         "Industry", "Urban",
                                         "Trade")))) %>%
ggs_caterpillar(label = "Variable", sort = FALSE) +
  geom_vline(xintercept = 0, lty = 3) +
  ggtitle("Specific 1980 performance")

Figure 12: Slopes with the effects of control and main variables on performance. Specific 1980 performance

Slopes with the effects of control and main variables on performance. Specific 1980 performance
S.betas %>%
  filter(Indicator %in% c("General", "Specific1980")) %>%
  mutate(Variable = factor(as.character(Variable), 
                              levels = rev(c("Diversity", 
                                         "Portfolio size",
                                         "EU", 
                                         "GDP pc", "GDP growth",
                                         "Industry", "Urban",
                                         "Trade")))) %>%
ggs_caterpillar(label = "Variable", sort = FALSE) +
  facet_wrap(~ Indicator, scales="free") +
  geom_vline(xintercept = 0, lty = 3) #+
Slopes with the effects of control and main variables on performance, by performance indicator.

Figure 13: Slopes with the effects of control and main variables on performance, by performance indicator.

ci(S.betas) %>%
ggplot(aes(x = Variable, 
              y = median,
              group = Indicator, color = Indicator)) +
  coord_flip() +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = Low, ymax = High), size = 1.5, position = position_dodge(width = 0.5)) +
  geom_linerange(aes(ymin = low, ymax = high), size = 0.5, position = position_dodge(width = 0.5)) +
  ylab("HPD") + xlab("Parameter") +
  geom_vline(xintercept = 0, lty = 3)

Figure 14: Slopes with the effects of control and main variables on performance.

Slopes with the effects of control and main variables on performance.
ggplot(S.betas, aes(x = value, color = Indicator, fill = Indicator)) +
  geom_density(alpha = 0.5) +
  facet_wrap(~ Variable, ncol = 5, scales = "free") +
  xlab("HPD") +
  geom_vline(xintercept = 0, lty = 3)
Slopes with the effects of control and main variables on performance.

Figure 15: Slopes with the effects of control and main variables on performance.

Variables by evidence.

S.betas %>%
  filter(value != 0) %>%
  group_by(Variable, Indicator) %>%
  summarize(`Prob > 0` = length(which(value > 0)) / n(),
    `Prob < 0` = length(which(value < 0)) / n(),
    `Mean expected effect` = mean(value)) %>%
  group_by(Variable, Indicator) %>%
  mutate(max = max(abs(`Prob > 0`), abs(`Prob < 0`))) %>%
  arrange(desc(max)) %>%
  select(-max) %>%
  kable()
S.betas.gral <- S.betas %>%
  filter(Indicator == "General") %>%
  mutate(Parameter = Variable)

ggs_caterpillar(S.betas.gral) +
  geom_vline(xintercept = 0, lty = 3)

Figure 16: Slopes for the effects of control variables on performance (Global performance indicator).

Slopes for the effects of control variables on performance (Global performance indicator).

Model evaluation

What is the model fit?

L.data <- plab("resid", list(Indicator = indicator.label,
                             Country = country.label,
                             Year = year.label))

Obs.sd <- Y %>%
  as.data.frame.table() %>%
  as_tibble() %>%
  rename(Indicator = Var1, Country = Var2, Year = Var3, value = Freq) %>%
  mutate(Year = as.integer(as.numeric(as.character(Year)))) %>%
  group_by(Indicator) %>%
  summarize(obs.sd = sd(value, na.rm = TRUE))

S.rsd <- ggs(s, family = "resid", par_labels = L.data) %>%
  filter(!str_detect(Country, "Z-")) %>%
  group_by(Iteration, Chain, Indicator) %>%
  summarize(rsd = sd(value))

ggplot(S.rsd, aes(x = rsd)) +
  geom_histogram(binwidth = 0.001) +
  geom_vline(data = Obs.sd, aes(xintercept = obs.sd)) +
  facet_grid(Indicator ~ .) +
  expand_limits(x = 0)

Figure 17: Model fit. Residual standard deviation (distribution) against observed standard deviation (vertical line).