I’m just using Hein-bound speech data for now; I’ll replace this with our data scraped from the Congressional Record when we nail down exactly what we want to extract from speeches.
# a function to read speeches and speaker data
function(file){
get_speeches <-
message(file)
read_delim(str_c(here("hein-bound/", file) %>% str_remove("/cr") ),
delim = "|") %>%
# extract congress from file name
mutate(congress = str_remove(file, "_.*") ) %>%
select(speakerid, speech_id, congress) %>%
distinct()
}
# files
list.files(here("hein-bound") %>% str_remove("/cr") ) %>%
heinbound <- enframe(value = "file")
# subset to speakermap files from recent congresses
heinbound %>%
speakermaps <- filter(str_detect(file, "SpeakerMap"),
str_detect(file, "^1") #FIXME subsetting to >99th congress
)
# speakers dataframe
map_dfr(speakermaps$file, get_speeches)
speeches <-
$congress %<>% as.numeric()
speeches
# FIXME focusing on the 107-111th for now
%<>%
speeches filter(congress > 106)
# Devin's members data (expanded from voteview)
load(here("data", "members.Rdata"))
# Make mixed case names, like "McConnell" upper case (to go the other way, we would need to fix them one by one)
%<>%
members mutate(last_name = str_to_upper(last_name),
party = str_sub(party_name, 1, 1))
# GET CROSSWALK
load(here::here("data", "crosswalk.Rdata") )
%<>%
speeches select(congress, speakerid, speech_id) %>%
distinct() %>%
count(congress, speakerid) %>%
# Add ICPSRs
left_join(crosswalk) %>%
# add NOMINATE
left_join(members %>%
select(chamber, party, icpsr, nominate.dim1) %>%
distinct() ) %>%
mutate(name = bioname %>% str_remove(",.*") %>% str_to_title() ) %>%
filter(party %in% c("D", "R"))
function(data, var, lab){
plot_speeches <-
%>%
data mutate(x = pull(data, var)) %>%
ggplot() +
aes(x = x, y = n, label = name, color = party) +
geom_point(alpha = .2) +
geom_smooth() +
geom_text(check_overlap = T, color = "black") +
labs(y = "Number of Speeches per Congress",
x = lab,
color = "Party") +
scale_color_manual(values=c("#0015BC", "#FF0000"))
}
function(data, var, lab){
plot_speeches_congress <-
%>%
data mutate(x = pull(data, var)) %>%
ggplot() +
aes(x = x, y = n, label = name, color = party) +
geom_point(alpha = .2) +
geom_text(check_overlap = T, color = "black") +
labs(y = "Number of Speeches per Congress",
x = lab,
color = "Party") +
scale_color_manual(values=c("#0015BC", "#FF0000")) +
facet_wrap("congress", scales = "free")
}
plot_speeches(data = speeches, var = "nominate.dim1", lab = "NOMINATE 1 Score")
plot_speeches_congress(speeches, "nominate.dim1", "NOMINATE 1 Score")