options(stringsAsFactors = F)
library(magrittr)
library(tidyverse)
library(here)
library(knitr)

Get PAC ID–industry code crosswalk from CRP

(notice industry codes included and excluded)

read_crp <- . %>% {read_delim(file = here(str_c("CRP Data/cmtes/", .)),
                  delim = ",", 
                  quote = "|",
                  col_names = c("Cycle", "CmteID", "PACShort", "Affiliate",
                    "UltOrg","RecipID","RecipCode", "FECCandID",
                    "Party", "PrimCode","Source", "Sensitive", 
                    "Foreign", "Active"))}

crp <- map_dfr(list.files(here("CRP Data/cmtes")), read_crp)

## INDUSTRY CODES
industry <- readxl::read_xls(here("CRP Data/CRP_IDs.xls"), sheet = "CRP Industry Codes")
names(industry) <- industry[5,]

industry %<>% 
  filter(Sector %in% "Energy/Nat Resource",
         !Industry %in% c("Mining", "Waste Management" , "Fisheries & Wildlife", "Misc. Energy"),
         !Catname %in% c("Nuclear energy", 
                         "Nuclear plant construction, equipment & svcs",
                         "Water Utilities",
                         "Oilfield service, equipment & exploration"))



crpEnergycodes <- industry$Catcode

crp <- industry %>%
  left_join(crp, by = c( "Catcode" = "PrimCode")) %>% 
  mutate(comid = CmteID) %>% 
  select(comid, UltOrg, Affiliate, PACShort, Catcode) %>% 
  distinct() %>% 
  group_by(comid) %>%
  summarise(UltOrg = str_c(unique(UltOrg), collapse = "; "),
            Affiliate = paste(unique(Affiliate), collapse = "; "),
            PACShort = str_c(unique(PACShort), collapse = "; "),
            Catcode = str_c(unique(Catcode), collapse = "; ")) %>%
  distinct()

# Energy industry 
industry %>% kable()
Catcode Catname Catorder Industry Sector Sector Long
E0000 Energy, Natural Resources and Environment E07 Misc Energy Energy/Nat Resource Energy & Natural Resources
E1000 Energy production & distribution E07 Misc Energy Energy/Nat Resource Energy & Natural Resources
E1100 Oil & Gas E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1110 Major (multinational) oil & gas producers E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1120 Independent oil & gas producers E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1140 Natural Gas transmission & distribution E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1160 Petroleum refining & marketing E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1170 Gasoline service stations E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1180 Fuel oil dealers E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1190 LPG/Liquid Propane dealers & producers E01 Oil & Gas Energy/Nat Resource Energy & Natural Resources
E1500 Alternate energy production & services E07 Misc Energy Energy/Nat Resource Energy & Natural Resources
E1600 Electric Power utilities E08 Electric Utilities Energy/Nat Resource Energy & Natural Resources
E1610 Rural electric cooperatives E08 Electric Utilities Energy/Nat Resource Energy & Natural Resources
E1620 Gas & Electric Utilities E08 Electric Utilities Energy/Nat Resource Energy & Natural Resources
E1630 Independent power generation & cogeneration E08 Electric Utilities Energy/Nat Resource Energy & Natural Resources
E1700 Power plant construction & equipment E07 Misc Energy Energy/Nat Resource Energy & Natural Resources
E2000 Environmental services, equipment & consulting E09 Environmental Svcs/Equipment Energy/Nat Resource Energy & Natural Resources
# Energy company PACs
crp %>% head() %>% kable()
comid UltOrg Affiliate PACShort Catcode
C00001289 Chevron Corp; Union Oil; Unocal Corp Chevron Corp; NA Unocal Corp; Union Oil E1110; E1120
C00002972 National Rural Electric Cooperative Assn National Rural Electric Cooperative Assn National Rural Electric Cooperative Assn E1610
C00004952 National Rural Electric Cooperative Assn National Rural Electric Cooperative Assn Mississippi ACRE E1610
C00004960 Burlington Resources; ConocoPhillips NA; ConocoPhillips Burlington Resources E1120
C00007450 American Gas Assn NA American Gas Assn E1140
C00007849 National Rural Electric Cooperative Assn National Rural Electric Cooperative Assn Speak Up for Rural Electrification E1610
# write_csv(crpEnergy, path = "data/crpEnergy.csv")

PAC totals

load(here("CRP Data/pac_money.RData"))
d <- pac_money
d  # ~4 million observations  
## # A tibble: 4,261,052 x 29
##    PACID CID   FECRecNo Amount Date  RealCode Type  DI    FirstLastP
##    <chr> <chr> <chr>     <dbl> <chr> <chr>    <chr> <chr> <chr>     
##  1 C000… N000… 1258633    2000 06/0… M2300    24K   D     Mark Bake…
##  2 C000… N000… 1525884     500 08/0… M2300    24K   D     Bob Ehrli…
##  3 C000… N000… 1258638    3000 06/0… M2300    24K   D     Dennis Ha…
##  4 C000… N000… 1525890    1000 08/0… M2300    24K   D     Bill McCo…
##  5 C000… N000… 1525890    1000 08/0… M2300    24K   D     Bill McCo…
##  6 C000… N000… 1258636    1000 06/0… M2300    24K   D     Mark Gree…
##  7 C000… N000… 1258641     500 06/0… M2300    24K   D     Denny Reh…
##  8 C000… N000… 1525894     500 08/0… M2300    24K   D     John M Sh…
##  9 C000… N000… 1258639     500 06/0… M2300    24K   D     Connie Mo…
## 10 C000… N000… 1258634     500 06/0… M2300    24K   D     Tom Barre…
## # … with 4,261,042 more rows, and 20 more variables: DistIDRunFor <chr>,
## #   DistIDCurr <chr>, CurrCand <chr>, CycleCand <chr>, CRPICO <chr>, `No
## #   Pacs` <chr>, PACShort <chr>, Affiliate <chr>, Ultorg <chr>,
## #   RecipID <chr>, PrimCode <chr>, Source <chr>, Sensitive <chr>,
## #   Foreign <chr>, Active <chr>, Cycle <dbl>, RecipCode <chr>,
## #   Party <chr>, FECCandID <chr>, cycle <chr>
# instructions for corp pacs 
# https://www.opensecrets.org/resources/datadictionary/UserGuide.pdf pages 8-9

# drop losing candidates
d %<>% filter(!str_detect(RecipCode, "L"))

# drop presidential candidates
d %<>% filter(!str_detect(FECCandID, "^P"))

#  eliminate transfers and joint fund raising committees
d %<>% filter(!str_detect(RealCode, "Z4|Z9|z4|z9"))

# direct contributions to candidates, DI == "D". Drops DI == "I", indirect expenditures made for/against cands
d %<>% filter(DI == "D")

# identify industry 
d %<>% mutate(industry = ifelse(PrimCode %in% crpEnergycodes, "energy", "other") %>% as.factor())

# # drop negative?
d %>% filter(industry == "energy") %>% group_by(CID, PACID, FECCandID, Cycle, FirstLastP) %>% tally(Amount) %>% arrange(n) 
## # A tibble: 94,449 x 6
## # Groups:   CID, PACID, FECCandID, Cycle [94,415]
##    CID       PACID     FECCandID Cycle FirstLastP                  n
##    <chr>     <chr>     <chr>     <dbl> <chr>                   <dbl>
##  1 N00001093 C00104810 S8NY00082  2000 Charles E Schumer (D)  -10000
##  2 N00001604 C00002972 S6PA00100  2006 Arlen Specter (R)      -10000
##  3 N00003280 C00040568 S8MS00196  2010 Roger Wicker (R)       -10000
##  4 N00005404 C00019653 S4LA00016  1996 J Bennett Johnston (D) -10000
##  5 N00005709 C00081455 S6TX00040  2002 Phil Gramm (R)         -10000
##  6 N00009869 C00064774 ""         2018 Orrin G Hatch (R)      -10000
##  7 N00009869 C00107771 S6UT00063  2018 Orrin G Hatch (R)      -10000
##  8 N00033363 C00002972 S4AR00103  2016 Tom Cotton (R)         -10000
##  9 N00031129 C00430157 S4OK00232  2018 James Lankford (R)      -7000
## 10 N00009573 C00161570 S2AZ00141  2018 Jeff Flake (R)          -6500
## # … with 94,439 more rows
# yes, most of these look like refunds from previous cycles 

# # drop negative?
d %<>% filter(Amount > -1)


d %>% select(industry, everything()) %>% head() %>% kable()
industry PACID CID FECRecNo Amount Date RealCode Type DI FirstLastP DistIDRunFor DistIDCurr CurrCand CycleCand CRPICO No Pacs PACShort Affiliate Ultorg RecipID PrimCode Source Sensitive Foreign Active Cycle RecipCode Party FECCandID cycle
other C00000042 N00001925 1525884 500 08/09/2000 M2300 24K D Bob Ehrlich (R) MD02 MD02 Y Y I Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H4MD02059 2000
other C00000042 N00004781 1258638 3000 06/05/2000 M2300 24K D Dennis Hastert (R) IL14 IL14 Y Y I Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H6IL14095 2000
other C00000042 N00004391 1258636 1000 06/05/2000 M2300 24K D Mark Green (R) WI08 WI08 Y Y I Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H8WI00026 2000
other C00000042 N00004645 1258641 500 06/05/2000 M2300 24K D Denny Rehberg (R) MT01 Y Y O Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H0MT00033 2000
other C00000042 N00004961 1525894 500 08/09/2000 M2300 24K D John M Shimkus (R) IL20 IL20 Y Y I Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H2IL20042 2000
other C00000042 N00001805 1258639 500 06/05/2000 M2300 24K D Connie Morella (R) MD08 MD08 Y Y I Illinois Tool Works Illinois Tool Works C00000042 M2300 Hoovers N 0 1 2000 RW R H0MD08021 2000

Unique PACs per cycle

# Number of distinct pacs 
d %>% 
  group_by(Cycle, industry) %>%
  summarise(PACs = length(unique(PACID))) %>% 
  arrange(industry) %>% 
  knitr::kable()
Cycle industry PACs
1994 energy 251
1996 energy 253
1998 energy 250
2000 energy 227
2002 energy 200
2004 energy 177
2006 energy 176
2008 energy 194
2010 energy 198
2012 energy 203
2014 energy 204
2016 energy 218
2018 energy 209
1994 other 3110
1996 other 3243
1998 other 3258
2000 other 3291
2002 other 3392
2004 other 3586
2006 other 3656
2008 other 3748
2010 other 3813
2012 other 3664
2014 other 3861
2016 other 3770
2018 other 3808

Total by member and industry code

# consolidate by industry, dropping PrimCode
# Contributions total by member and industry code
d1 <- d %>%
  group_by(CID, industry, Cycle) %>%
  tally(Amount) %>%
  ungroup() %>% 
  rename(amounts = n) %>% 
  distinct()
d1 %>% head() %>% kable()
CID industry Cycle amounts
N00000010 energy 1994 10250
N00000010 energy 1996 12750
N00000010 energy 1998 6100
N00000010 energy 2000 6000
N00000010 energy 2002 6250
N00000010 energy 2004 9250

Crosswalk CRP IDs with ICPSR

load(here("data/icpsr_crosswalk_crp.Rdata"))
icpsr_crosswalk_crp$Cycle %<>% as.numeric()

d1 %<>% left_join(icpsr_crosswalk_crp) %>% distinct()


#source(here("members/nameCongress.R")) # augments voteview member names
## Load augmented member names without having to load voteview package
load(here("members/members.Rdata"))
members %<>% filter(chamber %in% c("House", "Senate"))

# merge with voteview 
d1 <- members %>% 
  select(congress, icpsr, party_name, state_abbrev, chamber, bioname) %>% 
  left_join(d1) 

# add 0s
d1 %<>% 
  mutate(industry= replace_na(industry, "other") )%>% # assume missing members are 0 (this is reasonable, but should check in crosswalk-crp.R)
   ungroup() %>%
    complete(industry, congress, icpsr, chamber,
             fill = list(amounts = 0)) %>% 
  distinct() 

# remerge with voteview
d1 <- members %>% select(congress, icpsr, chamber, bioname, party_name) %>% 
  left_join(d1 %>% select(industry, congress, icpsr, chamber, amounts)) 

# add back in cycle
d1 %<>% mutate(Cycle = (congress - 107)*2 + 2000) 

# members failing to match
# d1 %>% filter(is.na(industry)) %>% distinct(bioname)

d1 %>% head() %>% kable()
congress icpsr chamber bioname party_name industry amounts Cycle
108 99342 House DEAL, John Nathan Republican Party energy 23999 2002
108 99342 House DEAL, John Nathan Republican Party other 154639 2002
108 95407 Senate CAMPBELL, Ben Nighthorse Republican Party energy 0 2002
108 95407 Senate CAMPBELL, Ben Nighthorse Republican Party other 9000 2002
108 94828 House HALL, Ralph Moody Republican Party energy 0 2002
108 94828 House HALL, Ralph Moody Republican Party other 0 2002

Check for zero counts

# Make sure there are indeed 0 counts if we expect them
d1 %>% 
  group_by(industry) %>%
  count(amounts == 0) %>% 
  mutate(industry_total = sum(n)) %>% 
  kable()
industry amounts == 0 n industry_total
energy FALSE 5924 6551
energy TRUE 627 6551
other FALSE 6312 6551
other TRUE 239 6551
# Make sure we have equal numbers of observations per industry
d1 %>% count(industry) %>% kable()
industry n
energy 6551
other 6551

Duplicated member-year total observations?

# count observations (there should be one per industry)
d1 %>% group_by(bioname, icpsr, congress, chamber, industry) %>% 
  add_count() %>% 
  arrange(bioname) %>%
  filter(n > 1) %>% kable()
congress icpsr chamber bioname party_name industry amounts Cycle n

Total and average PAC contributions per member

d %<>% mutate(cycle = Cycle)
d1 %<>% mutate(cycle = Cycle)

# total
d1 %>%
  group_by(Cycle, party_name, industry) %>% 
  summarise(total = sum(amounts, na.rm = TRUE)/1000000) %>% 
  ungroup() %>% 
  ggplot() + 
  aes(x = factor(Cycle), y = total, fill = party_name) + 
  geom_col() + 
  facet_wrap("industry", scales = "free") +
  labs(y = "Total Industry PAC contributions (millions)", 
       x = "FEC Electoral Cycle")

# average per member 
d1 %>%
  group_by(Cycle, icpsr, party_name, industry) %>% 
  summarise(total = sum(amounts, na.rm = TRUE)/1000000) %>% 
  ungroup() %>% 
  group_by(Cycle, party_name, industry) %>% 
  summarise(mean = mean(total)) %>% 
  ggplot() + 
  aes(x = factor(Cycle), y = mean, fill = party_name) + 
  geom_col(position = "dodge") + 
  facet_wrap("industry", scales = "free") +
  labs(y = "Average Industry PAC contributions (millions)", 
       x = "FEC Electoral Cycle")

Save results

pac_contributions <- d
save(pac_contributions, file = here("data/pac_contributions.Rdata"))

pac_amounts <- d1
save(pac_amounts, file = here("data/pac_amounts.Rdata"))