Data scraped using this script: https://judgelord.github.io/cr/scraper.html
# Note, we don't need this metadata to parse the files
# metadata
load(here::here("data", "cr_metadata.Rdata"))
# clean up data for plot clarity
%<>%
cr_metadata mutate(year = str_sub(date, 1,4) %>% as.numeric(),
chamber = section %>%
str_remove("-.*") %>%
str_to_title() %>%
str_replace("Extensions", "Extensions of Remarks"))
# load cr text file names
list.files("/Users/devin/cr_bulk/data/htm")
cr_file <-
# extract date from file name
tibble(file = cr_file,
cr <-year = str_extract(cr_file, "[0-9]{4}") %>% as.numeric(),
date = str_extract(cr_file, "[0-9]{4}-[0-9]{2}-[0-9]{2}") %>%
as.Date() )
# get congress from year
%<>% mutate(congress = as.numeric(round((year - 2001.1)/2)) + 107) # the 107th congress began in 2001
cr
# extract chamber from URL
%<>% mutate(chamber = str_extract(file, "Pg.") %>%
cr str_remove("Pg") %>%
str_replace("E", "Extensions of Remarks") %>%
str_replace("H", "House") %>%
str_replace("S", "Senate") )
# reconstruct URLs from file names
%<>% mutate(url_txt = str_c("https://www.congress.gov/", congress, "/crec/",
cr %>% str_replace_all("-", "/"),
date "/modified/",
file))
NOTE: I’m just using a sample of documents for now.
#FIXME
# just using a few documents for now
# d %<>% top_n(22, date)
filter(cr, date == as.Date("2018-03-08") )
d <-
d[1:30, ]
d <-# /FIXME
# TEXT AND TEXT VARS
# a function to get the first bit of text
function(file){
head_text <-
read_lines(str_c("/Users/devin/cr_bulk/data/htm/", file)) %>%
text <- str_c(collapse = " ") %>%
str_squish() %>%
str_remove(".*?www.gpo.gov</a>\\] ") %>%
str_sub(0, 500) %>%
str_c("...")
return(text)
}
# a function to get all text
function(file){
all_text <-
read_lines(str_c("/Users/devin/cr_bulk/data/htm/", file)) %>%
text <- str_c(collapse = " ") %>%
str_squish() %>%
str_remove(".*?www.gpo.gov</a>\\] ")
return(text)
}
## test
# head_text(d$file[5])
# the first bit of text
$text_head <- d$file %>% map_chr(possibly(head_text, otherwise = ""))
d
# fill in procedural roles
%<>%
d mutate(process = str_extract(text_head, "^(ANNOUNCEMENT|RECESS|PRAYER|PLEDGE|MESSAGE|EXECUTIVE MESSAGE|EXECUTIVE COMMUNICATION|EXECUTIVE AND OTHER COMMUNICATION|MEASURE|ADJOURNMENT|DESIGNATION|THE JOURNAL|RESIGNATION|ELECTING|CONSTITUTIONAL|ADDITIONAL SPONSORS|SWEARING IN|MOMENT OF SILENCE|SENATE COMMITTEE MEETING|BUDGETARY|EFFECTS|REAPPOINTMENT|APPOINTMENT|RECALL|COMMUNICATION|REMOTE COMMITTEE PROCEEDINGS|REMOTE VOTING||ENROLLED BILL|ADDITIONAL COSPONSORS|DISCHARGED NOMINATION|CONFIRMATION|JOINT RESOLUTION|SENATE ENROLLED BILLS|PUBLICATION|EXPLANATORY STATEMENT|WITHDRAWAL)") )
# order by date (just for presentation)
%<>% arrange(date) %>% arrange(rev(date)) d
# Extract speaker names
# a function to grab speaker names
function(file){
extract_names <-
"(Mr.|Mrs.|Ms.|Miss|HON.) (([A-Z]|\\.| )* |-|)(Mc|Mac|Des|De|La|[A-Z])[A-Z][A-Z]+|The PRESIDING OFFICER|The SPEAKER\\.|The SPEAKER pro tempore \\(.*?\\)|The ACTING PRESIDENT|The VICE PRESIDENT"
names <-
# for testing
#file <- d$file[41]
read_lines(str_c("/Users/devin/cr_bulk/data/htm/", file)) %>%
text <- str_c(collapse = " ") %>%
str_squish()
text
#if( str_detect(text, names) ){
%<>%
text str_extract_all(names) %>%
unlist() %>%
# drop first letter of first sentence
str_remove("\\. [A-Z]$|\\.$") %>%
str_squish() %>%
#unique() %>%
#str_sub(0, 240) %>% # trim just in case pattern overmatches
str_c(collapse = ";")
#} else {
# text <- NA
#}
return(text)
}
## Test
# extract_names(d$file[22])
%<>%
d mutate(speaker = file %>% map_chr(possibly(extract_names, otherwise = "404error")),
speaker = coalesce(speaker, process))
%>%
d filter(str_detect(speaker, ";")) %>%
select(speaker, url_txt) %>% kablebox()
%>%
d filter(is.na(speaker)|speaker == "") %>%
select(speaker, text_head, url_txt) %>% kablebox()
speaker | text_head | url_txt |
---|---|---|
PRAYER Pastor Tony Perkins, Greenwell Springs Baptist Church, Greenwell Springs, Louisiana, offered the following prayer: Father, we thank You for today. I thank You for the men and women in this distinguished body, whom You have given the privilege of serving the people of this country. May they understand the unique, but fleeting, moment they occupy in the history of man. The challenges of our day are certainly many, and, as a result, the burden that those in this Chamber carry is very heavy. … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-2.htm |
# a function to parse files with more than one speaker
# for testing
# speakers <- d$speaker[22]
# file <- d$file[22]
function(speakers, file){
parse <-%<>% str_split(";") %>% unlist()
speakers
all_text(file) # all text
text <-
NA # init
speech <-
for(i in 1:length(speakers)){
# add one to speakers vector to mark the end of the last speach
c(speakers, "$")
s <-
# speech of speaker i
text %>%
speech[i] <- str_extract(str_c(s[i],".*?", s[i+1])) %>% str_remove(s[i+1])
# remove speech i from text
%<>%
text str_remove(str_c(".*?", s[i], "$"))
}return( list(speech) )
}
## test
# parse(d$speaker[1], d$file[1])
function(d){
parse_text <-$text <- map2(.x = d$speaker,
d.y = d$file,
.f = parse) %>% flatten()
# split speakers into a list
$speaker %<>% str_split(";")
d
# unnest
%<>% unnest(c(speaker, text))
d
%<>% distinct()
d
# extract names
# cr_metadata %<>%
# mutate(speaker = file %>% map_chr(possibly(extract_names, otherwise = "")))
############################################
# join metadata to file names
#FIXME for some reason, urls without a -[1-9] at the end seem to be missing from the metadata
## Commented out because we don't need html metadata to parse
# d %<>% left_join(cr_metadata)
%<>% mutate(text_head = text %>%
d str_sub(0,500) %>%
str_c("...") )
return(d)
#END parse
}
%<>% parse_text()
d
%>%
d filter(!str_detect(speaker,";.*;"),
!is.na(speaker) ) %>%
select(speaker, text_head, url_txt) %>% kablebox()
speaker | text_head | url_txt |
---|---|---|
HON. DAVE BRAT | HON. DAVE BRAT of virginia in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE279-2.htm |
Mr. BRAT | Mr. BRAT. Mr. Speaker, I rise to recognize Corporal Robert H. Meier. Corporal Meier served in the 88th Division which served in continuous combat operations for 14 months. Corporal Meier would eventually make the ultimate sacrifice as a soldier in Castleforte, Italy in 1944 where he was killed in action. Corporal Meier’s nephew, Bob Meier, recognized that his uncle should have, in addition to receiving the Purple Heart, received Combat Infantryman’s Badge and Bronze Star. Because of Bob Meier’s … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE279-2.htm |
HON. CHRISTOPHER H. SMITH | HON. CHRISTOPHER H. SMITH of new jersey in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE279-3.htm |
Mr. SMITH |
Mr. SMITH of New Jersey. Mr. Speaker, yesterday we held a hearing on China in Africa. The hearing analyzed China’s activity and engagement in sub-Saharan Africa. In particular, we looked into what motivates China and how Chinese involvement has affected African countries. While a number of African nations have welcomed Chinese engagement and investment, it often comes at a cost: a focus on extractive industries, entanglement with a neo-mercantilist trade policy and a tendency to adopt worst pr... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE279-3.htm </td> </tr> <tr> <td style="text-align:left;"> HON. DOUG COLLINS </td> <td style="text-align:left;"> HON. DOUG COLLINS of georgia in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-2.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. COLLINS </td> <td style="text-align:left;"> Mr. COLLINS of Georgia. Mr. Speaker, I rise today to recognize three northeast Georgians whom neighbors recently honored at the 2018 Gainesville American Values Dinner: Tommy Aaron, Tony Herdener, and Ben Lancaster. Tommy Aaron graduated from Gainesville High School, where he won the 1955 state title in golf. After joining the Professional Golfers' Association Tour, he won the Master's Tournament in 1973. Tony Herdener has served the people of northeast Georgia as the Chief Financial Officer of ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-2.htm </td> </tr> <tr> <td style="text-align:left;"> HON. MATT CARTWRIGHT </td> <td style="text-align:left;"> HON. MATT CARTWRIGHT of pennsylvania in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-3.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. CARTWRIGHT </td> <td style="text-align:left;"> Mr. CARTWRIGHT. Mr. Speaker, I rise today to recognize James Jay Delaney, who will receive the W. Francis Swingle Award from the Greater Pittston Friendly Sons of St. Patrick. Jay will be honored formally during the Friendly Sons' 104th annual celebration on March 17, 2018. He has been part of the Wilkes-Barre Fire Department since 1981 and has served as the city's fire chief for the past 13 years. Chief Delaney is a longtime resident of Avoca, Pennsylvania and a graduate of St. John the Evangel... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-3.htm </td> </tr> <tr> <td style="text-align:left;"> HON. STEVE COHEN </td> <td style="text-align:left;"> HON. STEVE COHEN of tennessee in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-4.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. COHEN </td> <td style="text-align:left;"> Mr. COHEN. Mr. Speaker, I rise today in support of the DUI Reporting Act, a bill I introduced today with my colleague Steve Chabot [[Page E281]] along with the support of Mothers Against Drunk Driving and a bipartisan coalition of Representatives from across the United States. If enacted, it would address the loophole in our nation's drunken- driving laws that enables repeat DUI offenders to be charged and tried; as first-time offenders because of inconsistent reporting. Currently, when police m... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280-4.htm </td> </tr> <tr> <td style="text-align:left;"> HON. MARCIA L. FUDGE </td> <td style="text-align:left;"> HON. MARCIA L. FUDGE of ohio in the house of representatives Monday, March 5, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280.htm </td> </tr> <tr> <td style="text-align:left;"> Ms. FUDGE </td> <td style="text-align:left;"> Ms. FUDGE. Mr. Speaker, I rise today to commemorate the 50th anniversary of the Kerner Report. In 1968, the National Advisory Commission on Civil Disorders, known as the Kerner Commission, found that the civil unrest in the African American community was a result of white racism. From employment and housing discrimination to segregated and underfunded schools, racism was the root cause of systemic poverty plaguing African Americans. In 1969, the theme for Delta Sigma Theta under the leadership o... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE280.htm </td> </tr> <tr> <td style="text-align:left;"> HON. SHEILA JACKSON LEE </td> <td style="text-align:left;"> HON. SHEILA JACKSON LEE of texas in the house of representatives Monday, March 5, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE281-2.htm </td> </tr> <tr> <td style="text-align:left;"> Ms. JACKSON LEE </td> <td style="text-align:left;"> Ms. JACKSON LEE. Mr. Speaker, I thank Congressman Evans for anchoring this very important Special Order on the subject of the famous Kerner Commission Report and the persistence of economic inequality and poverty in America. Together, we stand, firmly committed to combating poverty. Poverty in America reaches into all regions of the country, urban and rural, and affects millions of persons of all races, ethnicities, creed, ages, and gender. However, it seems that far too often, and for far too l... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE281-2.htm </td> </tr> <tr> <td style="text-align:left;"> HON. LUCILLE ROYBAL </td> <td style="text-align:left;"> HON. LUCILLE ROYBAL-ALLARD of california in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE281.htm </td> </tr> <tr> <td style="text-align:left;"> Ms. ROYBAL </td> <td style="text-align:left;"> Ms. ROYBAL-ALLARD. Mr. Speaker, I rise to salute Jonathan dos Santos Ramirez for his many positive contributions to Los Angeles and beyond, both as a prominent soccer player and as an active member of the community. Jonathan was born April 26, 1990, in Monterey, Mexico, as the youngest of three children born to Geraldo Francisco dos Santos and Liliana Ramirez. A member of FC Barcelona's famed youth academy system, La Masia, Jonathan made his eventual debut with the senior team as a 19-year-old d... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE281.htm </td> </tr> <tr> <td style="text-align:left;"> HON. JOHN J. FASO </td> <td style="text-align:left;"> HON. JOHN J. FASO of new york in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282-2.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. FASO </td> <td style="text-align:left;"> Mr. FASO. Mr. Speaker, it is with great respect that I rise today to recognize Mr. Tom Cole who has served as a member of the East Kingston Fire Department for fifty years. A broad thinker and steward of the Ulster County community, I admire Mr. Cole's curiosity. Even though he has many years of experience, having served as Fire Chief and responded to many unique situations, Mr. Cole is an eager student, always willing to improve his skills and expand his knowledge. Members of our local fire ser... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282-2.htm </td> </tr> <tr> <td style="text-align:left;"> HON. SANDER M. LEVIN </td> <td style="text-align:left;"> HON. SANDER M. LEVIN of michigan in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282-3.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. LEVIN </td> <td style="text-align:left;"> Mr. LEVIN. Mr. Speaker, I rise today to recognize the City of Mount Clemens, Michigan, as it celebrates its 200th Anniversary of its founding and designation as the county seat of Macomb County. I have been proud to represent this vibrant community for more than 25 years. In 1818, Governor of the Michigan Territory, Lewis Cass, established the County of Macomb, and designated Mount Clemens, which had been settled by explorer Christian Clemens, as the county seat. When Michigan became the 26th st... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282-3.htm </td> </tr> <tr> <td style="text-align:left;"> HON. NEAL P. DUNN </td> <td style="text-align:left;"> HON. NEAL P. DUNN of florida in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. DUNN </td> <td style="text-align:left;"> Mr. DUNN. Mr. Speaker, I rise today to honor the life and legacy of Walter Dartland--a leader in Florida who recently lost his battle with lymphoma. He was a gentleman and an honest man. Throughout his life, Walter worked to help people, whether that be in the Marine Corps, or as a university professor back home in Florida, he always gave of himself to better the lives of others. Walter was an attorney by trade, and spent much of his life fighting for the little guy’’ in consumer advocacy. In …
|
https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE282.htm |
HON. J. LUIS CORREA | HON. J. LUIS CORREA of california in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-2.htm |
Mr. CORREA | Mr. CORREA. Mr. Speaker, the month of March celebrates Women’s History Month. As a proud father, I am honored to recognize Latinas this month and believe in the importance of investing in the next generation of Latinas. One in six women in the United States is a Latina. There are currently over 27 million Latina women living in the United States. Latinas are vital contributing members of our American society through their work in business, education, science and technology, engineering, mathemat… | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-2.htm |
HON. DOUG COLLINS | HON. DOUG COLLINS of georgia in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-3.htm |
Mr. COLLINS | Mr. COLLINS of Georgia. Mr. Speaker, I rise today to recognize Frances Norris, a neighbor from Dawsonville who celebrated her 100th birthday on January 31. Originally from the state of Kansas, Mrs. Norris lived there with her family until the Dust Bowl hit in 1939, forcing them to make a new home in southern California. Five years ago, Mrs. Norris moved to northeast Georgia with her son and his family. Mrs. Norris built a rich career that included managing both a school cafeteria and a construct… | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-3.htm |
HON. SHEILA JACKSON LEE | HON. SHEILA JACKSON LEE of texas in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-4.htm |
Ms. JACKSON LEE |
Ms. JACKSON LEE. Mr. Speaker, fifty-three years ago, in Selma, Alabama, hundreds of heroic souls risked their lives for freedom and to secure the right to vote for all Americans by their participation in marches for voting rights on Bloody Sunday,'' Turnaround Tuesday,’’ or the final, completed march from Selma to Montgomery. Those foot soldiers'' of Selma, brave and determined men and women, boys and girls, persons of all races and creeds, loved their country so much that they were willin... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283-4.htm </td> </tr> <tr> <td style="text-align:left;"> HON. EMANUEL CLEAVER </td> <td style="text-align:left;"> HON. EMANUEL CLEAVER of missouri in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. CLEAVER </td> <td style="text-align:left;"> Mr. CLEAVER. Mr. Speaker, I regrettably missed votes on Tuesday March 6, 2018. I had intended to vote yes’’ on Roll Call vote 94, and no'' on vote 95. ____________________ </pre></body></html>... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE283.htm </td> </tr> <tr> <td style="text-align:left;"> HON. TODD ROKITA </td> <td style="text-align:left;"> HON. TODD ROKITA of indiana in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-2.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. ROKITA </td> <td style="text-align:left;"> Mr. ROKITA. Mr. Speaker, I rise today to honor a Hoosier hero who paid the ultimate sacrifice. Deputy Jacob Pickett of the Boone County Sheriff's Office was killed in the line of duty on March 2, 2018. Jacob was a five year veteran in law enforcement and served as the leader in the department's K-9 unit. He held the rank of Sheriff Deputy in Boone County. One of Indiana's finest, Jacob is a hero. He acted selflessly on a day that took an unexpected and fatal turn. But he died doing what he was t... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-2.htm </td> </tr> <tr> <td style="text-align:left;"> HON. LOU BARLETTA </td> <td style="text-align:left;"> HON. LOU BARLETTA of pennsylvania in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-3.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. BARLETTA </td> <td style="text-align:left;"> Mr. BARLETTA. Mr. Speaker, it is an honor to recognize Mr. Rick Rovegno who is receiving the Exchange Club of Carlisle's prestigious Molly Pitcher Award. I know I will neither be the first, nor the last, to applaud Mr. Rovegno upon his receipt of this well-deserved honor. Chartered by the National Exchange Club in 1949, the Exchange Club of Carlisle gives individuals the opportunity to use their time and talents to benefit their local communities and the country as a whole. The Molly Pitcher Awa... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-3.htm </td> </tr> <tr> <td style="text-align:left;"> HON. MO BROOKS </td> <td style="text-align:left;"> HON. MO BROOKS of alabama in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-4.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. BROOKS </td> <td style="text-align:left;"> Mr. BROOKS of Alabama. Mr. Speaker, it is with great pride that I recognize the City of Florence, Alabama, as it celebrates its 200th Anniversary. I am honored to serve the people of Florence, which is located in Alabama's 5th District. By way of background, on March 12, 1818, General John Coffee saw the potential for a settlement on the banks of the Tennessee River, and he founded Florence, Alabama. General Coffee led the Cypress Land Company, and these visionaries established what they saw as ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284-4.htm </td> </tr> <tr> <td style="text-align:left;"> HON. ALCEE L. HASTINGS </td> <td style="text-align:left;"> HON. ALCEE L. HASTINGS of florida in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. HASTINGS </td> <td style="text-align:left;"> Mr. HASTINGS. Mr. Speaker, I rise today with great sadness to honor the life and legacy of my dear friend, Captain George Frank Myles, Jr. George passed away peacefully on March 4, 2018 at the age of 69 in Boca Raton, Florida. He was a husband, father, grandfather, brother, nephew, uncle, friend, and dedicated public servant. George is survived by his wife of 46 years, Jacqueline Williams Myles; their three daughters: Tequisha, Kendyll, and Kersti Myles; one grandchild Savannah; sister, Fossteen... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE284.htm </td> </tr> <tr> <td style="text-align:left;"> HON. SUSAN W. BROOKS </td> <td style="text-align:left;"> HON. SUSAN W. BROOKS of indiana in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285-2.htm </td> </tr> <tr> <td style="text-align:left;"> Mrs. BROOKS </td> <td style="text-align:left;"> Mrs. BROOKS of Indiana. Mr. Speaker, it is with a heavy heart that I rise today to honor the life of an outstanding public servant, Boone County Deputy Jacob Pickett, who served his city and country with principle and integrity. Tragically, Deputy Pickett was killed in the line of duty on March 2, 2018. Deputy Pickett, or Jake’’ as he was known to his family and friends, served in law enforcement for the last eight years. Jake made the decision every day to put his life on the line to make Ind…
|
https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285-2.htm |
HON. MARKWAYNE MULLIN | HON. MARKWAYNE MULLIN of oklahoma in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285-3.htm |
Mr. MULLIN |
Mr. MULLIN. Mr. Speaker, I rise today to recognize and applaud Robert Holden, a member of the Choctaw and Chickasaw Tribes, who joined the staff of the National Congress of American Indians (NCAI) in 1984 and retired last December after 33 years of dedicated service to NCAI and to the millions of Native people that were foremost in his mind every day of his life. Many of you know that NCAI was begun in 1944 at the peak of the Federal government’s Termination Policy,'' which sought to dissolve ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285-3.htm </td> </tr> <tr> <td style="text-align:left;"> HON. MARK POCAN </td> <td style="text-align:left;"> HON. MARK POCAN of wisconsin in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. POCAN </td> <td style="text-align:left;"> Mr. POCAN. Mr. Speaker, public education is the foundation of our 21st Century democracy. Our public schools are where our students come to be educated in the fullest sense of the word as citizens of this great country. We strive every day to make every public school a place where we prepare the nation's young people to contribute to our society, economy, and citizenry. Ninety percent of American children attend public schools. We call on local, state, and federal lawmakers to prioritize support... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE285.htm </td> </tr> <tr> <td style="text-align:left;"> HON. LUCILLE ROYBAL </td> <td style="text-align:left;"> HON. LUCILLE ROYBAL-ALLARD of california in the house of representatives Thursday, March 8, 2018 ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE286.htm </td> </tr> <tr> <td style="text-align:left;"> Ms. ROYBAL </td> <td style="text-align:left;"> Ms. ROYBAL-ALLARD. Mr. Speaker, I rise to recognize Mr. Giovani Gio’’ dos Santos Ramirez on his athletic and philanthropic contributions to the Los Angeles community and beyond. Giovani, who was born May 11, 1989, is a Mexican professional soccer player who currently plays for the Major League Soccer club the Los Angeles Galaxy, as well as the Mexico national team. He began his football career at a very young age, being recruited by Spanish club FC Barcelona and playing for their B team until …
|
https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE286.htm |
HON. SHEILA JACKSON LEE | HON. SHEILA JACKSON LEE of texas in the house of representatives Wednesday, March 7, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE287.htm |
Ms. JACKSON LEE | Ms. JACKSON LEE. Mr. Speaker, I rise today to call upon the President and the Congress of the United States to act without delay and pass legislation to prevent gun violence and mass casualties from shootings. The community of Parkland, Florida–along with the rest of the country–is still reeling from the horrific tragedy that took place at the Marjorie Stoneman Douglass High School on Valentine’s Day, February 14, 2018. Our hearts still ache with sadness and disbelief for the families and love… | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE287.htm |
HON. DOUG COLLINS | HON. DOUG COLLINS of georgia in the house of representatives Thursday, March 8, 2018 … | https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE288.htm |
Mr. COLLINS |
Mr. COLLINS of Georgia. Mr. Speaker, I rise to recognize two Gainesville students, Hannah Stewart and Rasmit Devkota, who recently earned the title of the area’s top spellers after competing in the local spelling bees. Hannah Stewart, a fifth grade student at Enota Multiple Intelligences Academy, won the Gainesville City School System’s elementary spelling bee competition, clinching the victory by correctly spelling infraction.'' Rasmit Devkota, a seventh grade student at Gainesville Middle Sc... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgE288.htm </td> </tr> <tr> <td style="text-align:left;"> </td> <td style="text-align:left;"> PRAYER Pastor Tony Perkins, Greenwell Springs Baptist Church, Greenwell Springs, Louisiana, offered the following prayer: Father, we thank You for today. I thank You for the men and women in this distinguished body, whom You have given the privilege of serving the people of this country. May they understand the unique, but fleeting, moment they occupy in the history of man. The challenges of our day are certainly many, and, as a result, the burden that those in this Chamber carry is very heavy. ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-2.htm </td> </tr> <tr> <td style="text-align:left;"> The SPEAKER </td> <td style="text-align:left;"> The SPEAKER. The Chair has examined the Journal of the last day's proceedings and announces to the House his approval thereof. Pursuant to clause 1, rule I, the Journal stands approved. ____________________ </pre></body></html>... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-3.htm </td> </tr> <tr> <td style="text-align:left;"> The SPEAKER </td> <td style="text-align:left;"> The SPEAKER. Will the gentleman from Massachusetts (Mr. Capuano) come forward and lead the House in the Pledge of Allegiance. ... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-4.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. CAPUANO </td> <td style="text-align:left;"> Mr. CAPUANO led the Pledge of Allegiance as follows: I pledge allegiance to the Flag of the United States of America, and to the Republic for which it stands, one nation under God, indivisible, with liberty and justice for all. ____________________ </pre></body></html>... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-4.htm </td> </tr> <tr> <td style="text-align:left;"> The SPEAKER </td> <td style="text-align:left;"> The SPEAKER. The Chair will entertain up to five requests for 1- minute speeches on each side of the aisle. ____________________ </pre></body></html>... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-5.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. CHABOT </td> <td style="text-align:left;"> asked and was given permission to address the House for 1 minute.) Mr. CHABOT... </td> <td style="text-align:left;"> https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-6.htm </td> </tr> <tr> <td style="text-align:left;"> Mr. CHABOT </td> <td style="text-align:left;"> Mr. CHABOT asked and was given permission to address the House for 1 minute.) Mr. CHABOT. Mr. Speaker, in recognition of International Women's Day, I would like to acknowledge the efforts of two women from Cincinnati. Long ago, Harriet Beecher Stowe wrote Uncle Tom’s Cabin’’ in Cincinnati. When she later met Abraham Lincoln in the White House, he reportedly said: ``So you are the little woman who wrote the book that started this great war.’’ More recently, Elina Govil, a high school student fr…
|
https://www.congress.gov/115/crec/2018/03/08/modified/CREC-2018-03-08-pt1-PgH1479-6.htm |
%>%
d count(speaker, sort = T) %>% kablebox()
speaker | n |
---|---|
HON. DOUG COLLINS | 3 |
HON. SHEILA JACKSON LEE | 3 |
Mr. COLLINS | 3 |
Ms. JACKSON LEE | 3 |
The SPEAKER | 3 |
HON. LUCILLE ROYBAL | 2 |
Mr. CHABOT | 2 |
Ms. ROYBAL | 2 |
1 | |
HON. ALCEE L. HASTINGS | 1 |
HON. CHRISTOPHER H. SMITH | 1 |
HON. DAVE BRAT | 1 |
HON. EMANUEL CLEAVER | 1 |
HON. J. LUIS CORREA | 1 |
HON. JOHN J. FASO | 1 |
HON. LOU BARLETTA | 1 |
HON. MARCIA L. FUDGE | 1 |
HON. MARK POCAN | 1 |
HON. MARKWAYNE MULLIN | 1 |
HON. MATT CARTWRIGHT | 1 |
HON. MO BROOKS | 1 |
HON. NEAL P. DUNN | 1 |
HON. SANDER M. LEVIN | 1 |
HON. STEVE COHEN | 1 |
HON. SUSAN W. BROOKS | 1 |
HON. TODD ROKITA | 1 |
Mr. BARLETTA | 1 |
Mr. BRAT | 1 |
Mr. BROOKS | 1 |
Mr. CAPUANO | 1 |
Mr. CARTWRIGHT | 1 |
Mr. CLEAVER | 1 |
Mr. COHEN | 1 |
Mr. CORREA | 1 |
Mr. DUNN | 1 |
Mr. FASO | 1 |
Mr. HASTINGS | 1 |
Mr. LEVIN | 1 |
Mr. MULLIN | 1 |
Mr. POCAN | 1 |
Mr. ROKITA | 1 |
Mr. SMITH | 1 |
Mrs. BROOKS | 1 |
Ms. FUDGE | 1 |
# Devin's members data (expanded from voteview)
load(here("data", "members.Rdata"))
# Devin's name matching function
source(here("code", "nameMethods.R"))
# common typos and known permutations and nicknames
source(here("code", "MemberNameTypos.R"))
# get congress from year
%<>% mutate(congress = as.numeric(round((year - 2001.1)/2)) + 107) # the 107th congress began in 2001
d
# clean up speaker names and add chamber titles for better matching
%<>% mutate(chamber = ifelse(
d == "Extensions of Remarks" &
chamber str_detect(text_head, "(Mr|Mrs|Ms|Miss)\\. Speaker\\,|in the house of representatives"), "House", "Senate"),
speaker = speaker %>%
str_remove("(^|;)(Mr|Mrs|Ms|Miss|HON)(\\.| )") %>%
str_squish())
%<>% mutate(speaker =
d ifelse(row_number() > 1 & str_detect(lag(speaker),
speaker), lag(speaker),
speaker))
d %>% extractMemberName(col_name = "speaker", members = members)
d1 <-
%>%
d1 select(speaker, bioname, icpsr, chamber) %>%
distinct() %>% kablebox()
speaker | bioname | icpsr | chamber |
---|---|---|---|
DAVE BRAT | BRAT, David A. | 21553 | House |
CHRISTOPHER H. SMITH | SMITH, Christopher Henry | 14863 | House |
DOUG COLLINS | COLLINS, Doug | 21323 | House |
MATT CARTWRIGHT | CARTWRIGHT, Matt | 21358 | House |
STEVE COHEN | COHEN, Stephen | 20748 | House |
MARCIA L. FUDGE | FUDGE, Marcia L. | 20941 | House |
SHEILA JACKSON LEE | JACKSON LEE, Sheila | 29573 | House |
LUCILLE ROYBAL | ROYBAL-ALLARD, Lucille | 29317 | House |
JOHN J. FASO | FASO, John J. | 21716 | House |
SANDER M. LEVIN | LEVIN, Sander Martin | 15033 | House |
NEAL P. DUNN | DUNN, Neal Patrick | 21714 | House |
J. LUIS CORREA | CORREA, Jose Luis | 21711 | House |
EMANUEL CLEAVER | CLEAVER, Emanuel, II | 20517 | House |
TODD ROKITA | ROKITA, Todd | 21131 | House |
LOU BARLETTA | BARLETTA, Lou | 21171 | House |
MO BROOKS | BROOKS, Mo | 21193 | House |
ALCEE L. HASTINGS | HASTINGS, Alcee Lamar | 29337 | House |
SUSAN W. BROOKS | NA | NA | NA |
MARKWAYNE MULLIN | MULLIN, Markwayne | 21355 | House |
MARK POCAN | POCAN, Mark | 21370 | House |
NA | NA | NA | NA |
The SPEAKER | NA | NA | NA |
CAPUANO | CAPUANO, Michael Everett | 29919 | House |
CHABOT | CHABOT, Steve | 29550 | House |
function(cr_date){
cr_write <- filter(cr, date == as.Date(cr_date) )
d <-
d[1:30, ]
d <-
# the first bit of text (faster because proceedural titles area at the beginning, no need to search full text)
$text_head <- d$file %>% map_chr(possibly(head_text, otherwise = ""))
d
# fill in proceedural roles
%<>%
d mutate(process = str_extract(text_head, "^(ANNOUNCEMENT|RECESS|PRAYER|PLEDGE|MESSAGE|EXECUTIVE MESSAGE|EXECUTIVE COMMUNICATION|EXECUTIVE AND OTHER COMMUNICATION|MEASURE|ADJOURNMENT|DESIGNATION|THE JOURNAL|RESIGNATION|ELECTING|CONSTITUTIONAL|ADDITIONAL SPONSORS|SWEARING IN|MOMENT OF SILENCE|SENATE COMMITTEE MEETING|BUDGETARY|EFFECTS|REAPPOINTMENT|APPOINTMENT|RECALL|COMMUNICATION|REMOTE COMMITTEE PROCEEDINGS|REMOTE VOTING||ENROLLED BILL|ADDITIONAL COSPONSORS|DISCHARGED NOMINATION|CONFIRMATION|JOINT RESOLUTION|SENATE ENROLLED BILLS|PUBLICATION|EXPLANATORY STATEMENT|WITHDRAWAL)") )
%<>%
d mutate(speaker = file %>% map_chr(possibly(extract_names, otherwise = "404error")),
speaker = coalesce(speaker, process))
%<>% parse_text()
d
%<>%
d mutate(speaker = file %>% map_chr(possibly(extract_names, otherwise = "404error")),
speaker = coalesce(speaker, process))
# get congress from year
%<>% mutate(congress = as.numeric(round((year - 2001.1)/2)) + 107) # the 107th congress began in 2001
d
# clean up speaker names and add chamber titles for better matching
%<>% mutate(chamber = ifelse(
d == "Extensions of Remarks" &
chamber str_detect(text_head, "(Mr|Mrs|Ms|Miss)\\. Speaker\\,|in the house of representatives"), "House", "Senate"),
speaker = speaker %>%
str_remove("(^|;)(Mr|Mrs|Ms|Miss|HON)(\\.| )") %>%
str_squish() )
%<>% mutate(speaker =
d ifelse(row_number() > 1 & str_detect(lag(speaker),
speaker), lag(speaker),
speaker))
d %>% extractMemberName(col_name = "speaker", members = members)
d1 <-
# fill in empty
%<>%
d1 mutate(file = file %>% replace_na("CREC-missing"),
icpsr = icpsr %>% replace_na("NA"))
%<>% mutate(path = str_c("data",
d1 "txt",
year,
icpsr, str_c(file %>% str_remove(".htm"),
"-", ID, "-", icpsr, ".txt"),
sep = "/" ))
# make missing directories
function(x){
make_dir <-if(!dir.exists(x)){dir.create(x)}
}
walk(str_remove(d1$path, "/CREC.*"),
.f = make_dir)
## May want to change directory
# here(other_dir) %>% str_remove("project_root/")
# save
walk2(d1$text,
::here(d1$path),
here.f = write_lines)
}
## One day of the CR
# cr_write("2020-02-06")
## Many days of the CR (e.g. feburary 2020)
cr %>% filter(str_detect(date, "2020-02")) %>% distinct(date)
dates <-
walk(dates,
cr_write)
list.files(here::here("data", "txt"), recursive = T)
# fill in empty
%<>%
d1 mutate(file = file %>% replace_na("CREC-missing"),
icpsr = icpsr %>% as.integer() %>% replace_na(00000))
%<>% mutate(path = str_c("data",
d1 "txt",
year,
icpsr, str_c(file %>% str_remove(".htm"),
"-", ID, "-", icpsr, ".txt"),
sep = "/" ))
%>%
d1 left_join(members)%>%
select(bioname, cqlabel, path) %>% kablebox()
bioname | cqlabel | path |
---|---|---|
BRAT, David A. | (VA-07) | data/txt/2018/21553/CREC-2018-03-08-pt1-PgE279-2-000001-21553.txt |
BRAT, David A. | (VA-07) | data/txt/2018/21553/CREC-2018-03-08-pt1-PgE279-2-000002-21553.txt |
SMITH, Christopher Henry | (NJ-04) | data/txt/2018/14863/CREC-2018-03-08-pt1-PgE279-3-000003-14863.txt |
SMITH, Christopher Henry | (NJ-04) | data/txt/2018/14863/CREC-2018-03-08-pt1-PgE279-3-000004-14863.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE280-2-000005-21323.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE280-2-000006-21323.txt |
CARTWRIGHT, Matt | (PA-17) | data/txt/2018/21358/CREC-2018-03-08-pt1-PgE280-3-000007-21358.txt |
CARTWRIGHT, Matt | (PA-17) | data/txt/2018/21358/CREC-2018-03-08-pt1-PgE280-3-000008-21358.txt |
COHEN, Stephen | (TN-09) | data/txt/2018/20748/CREC-2018-03-08-pt1-PgE280-4-000009-20748.txt |
COHEN, Stephen | (TN-09) | data/txt/2018/20748/CREC-2018-03-08-pt1-PgE280-4-000010-20748.txt |
FUDGE, Marcia L. | (OH-11) | data/txt/2018/20941/CREC-2018-03-08-pt1-PgE280-000011-20941.txt |
FUDGE, Marcia L. | (OH-11) | data/txt/2018/20941/CREC-2018-03-08-pt1-PgE280-000012-20941.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE281-2-000013-29573.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE281-2-000014-29573.txt |
ROYBAL-ALLARD, Lucille | (CA-40) | data/txt/2018/29317/CREC-2018-03-08-pt1-PgE281-000015-29317.txt |
ROYBAL-ALLARD, Lucille | (CA-40) | data/txt/2018/29317/CREC-2018-03-08-pt1-PgE281-000016-29317.txt |
FASO, John J. | (NY-19) | data/txt/2018/21716/CREC-2018-03-08-pt1-PgE282-2-000017-21716.txt |
FASO, John J. | (NY-19) | data/txt/2018/21716/CREC-2018-03-08-pt1-PgE282-2-000018-21716.txt |
LEVIN, Sander Martin | (MI-09) | data/txt/2018/15033/CREC-2018-03-08-pt1-PgE282-3-000019-15033.txt |
LEVIN, Sander Martin | (MI-09) | data/txt/2018/15033/CREC-2018-03-08-pt1-PgE282-3-000020-15033.txt |
DUNN, Neal Patrick | (FL-02) | data/txt/2018/21714/CREC-2018-03-08-pt1-PgE282-000021-21714.txt |
DUNN, Neal Patrick | (FL-02) | data/txt/2018/21714/CREC-2018-03-08-pt1-PgE282-000022-21714.txt |
CORREA, Jose Luis | (CA-46) | data/txt/2018/21711/CREC-2018-03-08-pt1-PgE283-2-000023-21711.txt |
CORREA, Jose Luis | (CA-46) | data/txt/2018/21711/CREC-2018-03-08-pt1-PgE283-2-000024-21711.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE283-3-000025-21323.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE283-3-000026-21323.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE283-4-000027-29573.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE283-4-000028-29573.txt |
CLEAVER, Emanuel, II | (MO-05) | data/txt/2018/20517/CREC-2018-03-08-pt1-PgE283-000029-20517.txt |
CLEAVER, Emanuel, II | (MO-05) | data/txt/2018/20517/CREC-2018-03-08-pt1-PgE283-000030-20517.txt |
ROKITA, Todd | (IN-04) | data/txt/2018/21131/CREC-2018-03-08-pt1-PgE284-2-000031-21131.txt |
ROKITA, Todd | (IN-04) | data/txt/2018/21131/CREC-2018-03-08-pt1-PgE284-2-000032-21131.txt |
BARLETTA, Lou | (PA-11) | data/txt/2018/21171/CREC-2018-03-08-pt1-PgE284-3-000033-21171.txt |
BARLETTA, Lou | (PA-11) | data/txt/2018/21171/CREC-2018-03-08-pt1-PgE284-3-000034-21171.txt |
BROOKS, Mo | (AL-05) | data/txt/2018/21193/CREC-2018-03-08-pt1-PgE284-4-000035-21193.txt |
BROOKS, Mo | (AL-05) | data/txt/2018/21193/CREC-2018-03-08-pt1-PgE284-4-000036-21193.txt |
HASTINGS, Alcee Lamar | (FL-20) | data/txt/2018/29337/CREC-2018-03-08-pt1-PgE284-000037-29337.txt |
HASTINGS, Alcee Lamar | (FL-20) | data/txt/2018/29337/CREC-2018-03-08-pt1-PgE284-000038-29337.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgE285-2-000039-0.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgE285-2-000040-0.txt |
MULLIN, Markwayne | (OK-02) | data/txt/2018/21355/CREC-2018-03-08-pt1-PgE285-3-000041-21355.txt |
MULLIN, Markwayne | (OK-02) | data/txt/2018/21355/CREC-2018-03-08-pt1-PgE285-3-000042-21355.txt |
POCAN, Mark | (WI-02) | data/txt/2018/21370/CREC-2018-03-08-pt1-PgE285-000043-21370.txt |
POCAN, Mark | (WI-02) | data/txt/2018/21370/CREC-2018-03-08-pt1-PgE285-000044-21370.txt |
ROYBAL-ALLARD, Lucille | (CA-40) | data/txt/2018/29317/CREC-2018-03-08-pt1-PgE286-000045-29317.txt |
ROYBAL-ALLARD, Lucille | (CA-40) | data/txt/2018/29317/CREC-2018-03-08-pt1-PgE286-000046-29317.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE287-000047-29573.txt |
JACKSON LEE, Sheila | (TX-18) | data/txt/2018/29573/CREC-2018-03-08-pt1-PgE287-000048-29573.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE288-000049-21323.txt |
COLLINS, Doug | (GA-09) | data/txt/2018/21323/CREC-2018-03-08-pt1-PgE288-000050-21323.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgH1479-2-000051-0.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgH1479-3-000052-0.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgH1479-4-000053-0.txt |
CAPUANO, Michael Everett | (MA-07) | data/txt/2018/29919/CREC-2018-03-08-pt1-PgH1479-4-000054-29919.txt |
NA | NA | data/txt/2018/0/CREC-2018-03-08-pt1-PgH1479-5-000055-0.txt |
CHABOT, Steve | (OH-01) | data/txt/2018/29550/CREC-2018-03-08-pt1-PgH1479-6-000056-29550.txt |
CHABOT, Steve | (OH-01) | data/txt/2018/29550/CREC-2018-03-08-pt1-PgH1479-6-000057-29550.txt |
Exclude prayer, the pledge, the journal, resignation, adjournment, executive communications, announcements, communications, appointment, reappointment, recess, recall, designations, additional sponsors, and other proceedural sections. (These texts are parsed and saved in the “NA” folder.)
Parse sections with more than one speaker, starting with “[SPEAKER NAME]. (Mister|Madam) Speaker,”. For example, see the Impeachment speaches, where speakers yield time to other speakers.
Check members with irregular capitalization beyond “Mc|Mac|Des|De|La”
Match speaker names to ICPSR IDs like I did here for the hein-bound data using the crosswalk crated here.
Names to fix in function: Susan W. Brrooks