parse_cr(bulk_directory = here::here("data", "htm"), skip_parsed = T, dates = "all")

Arguments

bulk_directory

skip_parsed

dates

Details

Value

References

Author

Note

See also

Examples

##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--  or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (bulk_directory = here::here("data", "htm"), skip_parsed = T,
    dates = "all")
{
    cr_file <- list.files(bulk_directory)
    cr <- tibble(file = cr_file, year = str_extract(cr_file,
        "[0-9]{4}") %>% as.numeric(), date = str_extract(cr_file,
        "[0-9]{4}-[0-9]{2}-[0-9]{2}") %>% as.Date())
    cr %<>% arrange(date) %>% arrange(rev(date))
    if (as.character(dates) == "all") {
        dates <- cr %>% filter(year < 2021) %>% pull(date) %>%
            unique()
    }
    if (skip_parsed == T) {
        cr_parsed <- list.files(bulk_directory %>% str_replace("/htm",
            "/txt"), recursive = T)
        length(cr_parsed)
        cr_parsed %<>% str_extract("[0-9]{4}-[0-9]{2}-[0-9]{2}") %<>%
            unique() %>% as.Date()
        length(cr_parsed)
        cr %<>% filter(!date %in% cr_parsed)
        dim(cr)
    }
    cr %<>% mutate(congress = as.numeric(round((year - 2001.1)/2)) +
        107)
    cr %<>% mutate(chamber = str_extract(file, "Pg.") %>% str_remove("Pg") %>%
        str_replace("E", "Extensions of Remarks") %>% str_replace("H",
        "House") %>% str_replace("S", "Senate"))
    cr %<>% mutate(url_txt = str_c("https://www.congress.gov/",
        congress, "/crec/", date %>% str_replace_all("-", "/"),
        "/modified/", file))
    walk(.x = unique(dates), .f = write_cr)
  }
#> function (bulk_directory = here::here("data", "htm"), skip_parsed = T,
#>     dates = "all")
#> {
#>     cr_file <- list.files(bulk_directory)
#>     cr <- tibble(file = cr_file, year = str_extract(cr_file,
#>         "[0-9]{4}") %>% as.numeric(), date = str_extract(cr_file,
#>         "[0-9]{4}-[0-9]{2}-[0-9]{2}") %>% as.Date())
#>     cr %<>% arrange(date) %>% arrange(rev(date))
#>     if (as.character(dates) == "all") {
#>         dates <- cr %>% filter(year < 2021) %>% pull(date) %>%
#>             unique()
#>     }
#>     if (skip_parsed == T) {
#>         cr_parsed <- list.files(bulk_directory %>% str_replace("/htm",
#>             "/txt"), recursive = T)
#>         length(cr_parsed)
#>         cr_parsed %<>% str_extract("[0-9]{4}-[0-9]{2}-[0-9]{2}") %<>%
#>             unique() %>% as.Date()
#>         length(cr_parsed)
#>         cr %<>% filter(!date %in% cr_parsed)
#>         dim(cr)
#>     }
#>     cr %<>% mutate(congress = as.numeric(round((year - 2001.1)/2)) +
#>         107)
#>     cr %<>% mutate(chamber = str_extract(file, "Pg.") %>% str_remove("Pg") %>%
#>         str_replace("E", "Extensions of Remarks") %>% str_replace("H",
#>         "House") %>% str_replace("S", "Senate"))
#>     cr %<>% mutate(url_txt = str_c("https://www.congress.gov/",
#>         congress, "/crec/", date %>% str_replace_all("-", "/"),
#>         "/modified/", file))
#>     walk(.x = unique(dates), .f = write_cr)
#>   }
#> <environment: 0x1388e9f20>