This report presents the data analysis for “Discrimination against mobile EU citizens before and during the first Covid-19 lockdown: evidence from a conjoint experiment in Germany”, published in the European Union Politics.
source("load_packages.R")
library(readxl)
Generate a variable of exposure to COVID made up of the combination between:
# Original data can be retrieved from:
# https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/02-bundeslaender.html
"https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/02-bundeslaender.xlsx"
url.population <-
#download.file(url.population, destfile = "02-bundeslaender.xlsx")
read_excel("02-bundeslaender.xlsx",
population <-sheet = 2, skip = 7,
col_names = FALSE) %>%
select(Region= `...1`,
Population = `...4`) %>%
filter(!is.na(Population)) %>%
filter(!is.na(Region)) %>%
mutate(Region = str_sub(Region, 5, -1)) %>%
mutate(Population = as.integer(Population))
source("load_packages.R")
"https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv"
url.covid.rki <-# Uncomment the following line if you need to download the file automatically
#download.file(url.covid.rki, destfile = "dd4580c810204019a7b8eb3e0b329dd6_0.csv")
# or better yet, download it manually, as it is very large as of June 2021 (>300
# Mb), and download.file() may not handle it.
read.csv("dd4580c810204019a7b8eb3e0b329dd6_0.csv")
covid.orig <-head(covid.orig)
## ObjectId IdBundesland Bundesland Landkreis Altersgruppe Geschlecht
## 1 1 1 Schleswig-Holstein SK Flensburg A00-A04 M
## 2 2 1 Schleswig-Holstein SK Flensburg A00-A04 M
## 3 3 1 Schleswig-Holstein SK Flensburg A00-A04 M
## 4 4 1 Schleswig-Holstein SK Flensburg A00-A04 M
## 5 5 1 Schleswig-Holstein SK Flensburg A00-A04 M
## 6 6 1 Schleswig-Holstein SK Flensburg A00-A04 M
## AnzahlFall AnzahlTodesfall Meldedatum IdLandkreis
## 1 1 0 2020/09/30 00:00:00+00 1001
## 2 1 0 2020/10/29 00:00:00+00 1001
## 3 1 0 2020/11/03 00:00:00+00 1001
## 4 1 0 2020/11/20 00:00:00+00 1001
## 5 1 0 2020/11/23 00:00:00+00 1001
## 6 1 0 2020/12/18 00:00:00+00 1001
## Datenstand NeuerFall NeuerTodesfall Refdatum
## 1 18.06.2021, 00:00 Uhr 0 -9 2020/09/30 00:00:00+00
## 2 18.06.2021, 00:00 Uhr 0 -9 2020/10/29 00:00:00+00
## 3 18.06.2021, 00:00 Uhr 0 -9 2020/11/03 00:00:00+00
## 4 18.06.2021, 00:00 Uhr 0 -9 2020/11/19 00:00:00+00
## 5 18.06.2021, 00:00 Uhr 0 -9 2020/11/18 00:00:00+00
## 6 18.06.2021, 00:00 Uhr 0 -9 2020/12/14 00:00:00+00
## NeuGenesen AnzahlGenesen IstErkrankungsbeginn Altersgruppe2
## 1 0 1 0 Nicht übermittelt
## 2 0 1 0 Nicht übermittelt
## 3 0 1 0 Nicht übermittelt
## 4 0 1 1 Nicht übermittelt
## 5 0 1 1 Nicht übermittelt
## 6 0 1 1 Nicht übermittelt
Only work with the data up to the maximum date in which sampling was done. Calculate a 5-days moving average for infection rates on 100,000 inhabitants.
load("sample-eup.RData")
range(I$DateCompleted[I$Sample=="Main"])
dates.main <- range(I$DateCompleted[I$Sample=="Crisis"])
dates.crisis <-
# Original data contains entries by age group and sex, and at the Landkreis (local) level
# We remove that and group by region and by date
covid.orig %>%
covid <- tibble() %>%
select(Region = Bundesland,
Date = Meldedatum,
Infected = AnzahlFall,
Death = AnzahlTodesfall) %>%
mutate(Date = as.Date(str_sub(Date, start = 1, end = 10),
format = "%Y/%m/%d")) %>%
# Avoid days greater than one month after the last day of sampling
filter(Date <= (max(I$DateCompleted) + 30)) %>%
# Filter Infected and Death with -1
filter(Infected >= 0 & Death >= 0)
# Calculate total cases grouping age groups and sex
covid %>%
covid <- group_by(Region, Date) %>%
summarize(Infected = sum(Infected),
Death = sum(Death)) %>%
ungroup() %>%
# Merge with population
left_join(population)
# Calculate cumulative cases and infection rates
covid %>%
covid.cum <- group_by(Region) %>%
arrange(Region, Date) %>%
mutate(Infected = cumsum(Infected),
Death = cumsum(Death)) %>%
mutate(`Infection rate` = Infected / Population * 1e5,
`Death rate` = Death / Population * 1e5) %>%
mutate(`Infection growth rate` = (Infected - lag(Infected)) / lag(Infected),
`Death growth rate` = (Death - lag(Death)) / lag(Death)) %>%
mutate(`Infection growth rate smoothed` = zoo::rollmean(`Infection growth rate`, k = 5, fill = NA),
`Death growth rate smoothed` = zoo::rollmean(`Death growth rate`, k = 5, fill = NA))
Figure 1: Infection rate by 100,000 inhabitants.
%>%
covid.cum ggplot(aes(x = Date, y = `Infection rate`, group = Region)) +
geom_line()
Figure 2: Infection rate daily growth rate .
%>%
covid.cum ggplot(aes(x = Date, y = `Infection growth rate`, group = Region)) +
geom_line()