Medicare

Catalog

The data.json file is an Open Data catalog containing all available datasets. As new data is added, data.json is automatically updated.

data_json <- read_json_arrow(
  file = "https://data.cms.gov/data.json",
  col_select = c("dataset"),
    as_data_frame = TRUE) |> 
  to_duckdb()

data_json |> 
  glimpse()
Rows: ??
Columns: 1
Database: DuckDB v1.2.1 [Andrew@Windows 10 x64:R 4.4.2/:memory:]
$ dataset <list> [<data.frame[139 x 22]>]

Note: Removing col_select = c("dataset") from the above call returns the following metadata:

context     <chr> https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld
id          <chr> https://data.cms.gov/data.json
type        <chr> dcat:Catalog
conformsTo  <chr> https://project-open-data.cio.gov/v1.1/schema
describedBy <chr> https://project-open-data.cio.gov/v1.1/schema/catalog.json
dataset     <list> [<data.frame[138 x 22]>]

Dataset

Within data.json, there is an array called dataset:

dataset <- data_json |> 
  pull(dataset) |> 
  pluck(1) |> 
  as_tibble()

dataset
# A tibble: 139 × 22
   `@type`      accessLevel accrualPeriodicity bureauCode contactPoint$`@type`
   <chr>        <chr>       <chr>              <list>     <chr>               
 1 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 2 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 3 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 4 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 5 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 6 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 7 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 8 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
 9 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
10 dcat:Dataset public      R/P1Y              <chr [1]>  vcard:Contact       
# ℹ 129 more rows
# ℹ 19 more variables: contactPoint$fn <chr>, $hasEmail <chr>,
#   describedBy <chr>, dataQuality <lgl>, description <chr>,
#   distribution <list>, identifier <chr>, keyword <list>, landingPage <chr>,
#   language <list>, license <chr>, modified <dttm>, programCode <list>,
#   publisher <df[,2]>, references <list>, temporal <chr>, theme <list>,
#   title <chr>, describedByType <chr>

One can search through this array using the dataset’s title, such as "Payroll Based Journal Daily Nurse Staffing":

dataset |> 
  filter(
    grepl(
      "Payroll Based Journal Daily Nurse Staffing", 
      title)) |> 
  unnest_wider(contactPoint, names_sep = "_") |>
  unnest_wider(publisher, names_sep = "_") |>
  mutate(
    bureauCode  = unlist(bureauCode, use.names = FALSE),
    keyword     = map_chr(keyword, \(x) paste0(unlist(x, use.names = FALSE), collapse = ", ")),
    language    = unlist(language, use.names = FALSE),
    programCode = unlist(programCode, use.names = FALSE),
    references  = unlist(references, use.names = FALSE),
    theme       = unlist(theme, use.names = FALSE)) |> 
  rename_with(remove_at_symbol) |> 
  purse()
- $type                  c1  dcat:Dataset                                   ...
- $accessLevel           c1  public                                         ...
- $accrualPeriodicity    c1  R/P3M                                          ...
- $bureauCode            c1  009:38                                         ...
- $contactPoint_type     c1  vcard:Contact                                  ...
- $contactPoint_fn       c1  Nursing Home Staffing - CCSQ (PBJ Reports)     ...
- $contactPoint_hasEmail c1  mailto:nhstaffing@cms.hhs.gov                  ...
- $describedBy           c1  https://data.cms.gov/resources/payroll-based-j ...
- $dataQuality           l1  T                                              ...
- $description           c1  The Payroll Based Journal (PBJ) Nurse Staffing ...
- $distribution          L1  list(structure(list("@type" = c("dcat:Distribu ...
- $identifier            c1  https://data.cms.gov/data-api/v1/dataset/7e0d5 ...
- $keyword               c1  Medicare, Original Medicare, Hospitals & Facil ...
- $landingPage           c1  https://data.cms.gov/quality-of-care/payroll-b ...
- $language              c1  en-US                                          ...
- $license               c1  https://www.usa.gov/government-works           ...
- $modified              p1  2025-04-02                                     ...
- $programCode           c1  009:000                                        ...
- $publisher_type        c1  org:Organization                               ...
- $publisher_name        c1  Centers for Medicare & Medicaid Services       ...
- $references            c1  https://data.cms.gov/resources/payroll-based-j ...
- $temporal              c1  2017-01-01/2024-09-30                          ...
- $theme                 c1  Medicare                                       ...
- $title                 c1  Payroll Based Journal Daily Nurse Staffing     ...
- $describedByType       c1  NA                                             ...

Distribution

Within dataset, there is an array called distribution which contains all dataset versions, in all available formats:

distribution <- dataset |> 
  select(distribution) |> 
  unnest(distribution) |> 
  rename_with(remove_at_symbol)

distribution |> 
  purse()
- $type         c4156 dcat:Distribution                                     ...
- $format       c4156 API                                                   ...
- $accessURL    c4156 https://data.cms.gov/data-api/v1/dataset/9767cb68-8ea ...
- $resourcesAPI c4156 https://data.cms.gov/data-api/v1/dataset-resources/97 ...
- $description  c4156 latest                                                ...
- $title        c4156 Accountable Care Organization Participants : 2025-01- ...
- $modified     p4156 2025-01-15                                            ...
- $temporal     c4156 2025-01-01/2025-12-31                                 ...
- $downloadURL  c4156 NA                                                    ...
- $mediaType    c4156 NA                                                    ...

Formats

  • description: "latest" == URL always pointing to latest data
  • mediaType: "text/csv" == downloadable CSV file
  • mediaType: "application/zip" == downloadable ZIP file
  • format: "API" == API endpoint
  • temporal: Data at fixed points in time
    description format                mediaType    n
1 latest API - 112
2 - API - 1752
3 - - application/vnd.ms-excel 21
4 - - application/zip 540
5 - - text/csv 1731

For instance, the following URL will always point to the Q2 2021 Payroll Based Journal Daily Nurse Staffing data:

staffing <- distribution |> 
  filter(
    grepl("Payroll Based Journal Daily Nurse Staffing", title),
    format == "API", 
    grepl("^2021-04", temporal))

staffing |> 
  purse()
- $type         c1  dcat:Distribution                                       ...
- $format       c1  API                                                     ...
- $accessURL    c1  https://data.cms.gov/data-api/v1/dataset/d10d792e-ea6e- ...
- $resourcesAPI c1  https://data.cms.gov/data-api/v1/dataset-resources/d10d ...
- $description  c1  NA                                                      ...
- $title        c1  Payroll Based Journal Daily Nurse Staffing : 2021-06-30 ...
- $modified     p1  2021-10-14                                              ...
- $temporal     c1  2021-04-01/2021-06-30                                   ...
- $downloadURL  c1  NA                                                      ...
- $mediaType    c1  NA                                                      ...
staffing |> 
  pull(accessURL) |> 
  request() |> 
  req_perform() |> 
  resp_body_json(simplifyVector = TRUE) |> 
  head() |> 
  purse()
- $PROVNUM          c6  015010                         015010               ...
- $PROVNAME         c6  COOSA VALLEY HEALTHCARE CENTER COOSA VALLEY HEALTHC ...
- $CITY             c6  SYLACAUGA                      SYLACAUGA            ...
- $STATE            c6  AL                             AL                   ...
- $COUNTY_NAME      c6  Talladega                      Talladega            ...
- $COUNTY_FIPS      c6  121                            121                  ...
- $CY_Qtr           c6  2021Q2                         2021Q2               ...
- $WorkDate         c6  20210401                       20210402             ...
- $MDScensus        c6  70                             68                   ...
- $Hrs_RNDON        c6  8                              8                    ...
- $Hrs_RNDON_emp    c6  8                              8                    ...
- $Hrs_RNDON_ctr    c6  0                              0                    ...
- $Hrs_RNadmin      c6  48.25                          34.25                ...
- $Hrs_RNadmin_emp  c6  48.25                          34.25                ...
- $Hrs_RNadmin_ctr  c6  0                              0                    ...
- $Hrs_RN           c6  32                             28                   ...
- $Hrs_RN_emp       c6  32                             28                   ...
- $Hrs_RN_ctr       c6  0                              0                    ...
- $Hrs_LPNadmin     c6  0                              0                    ...
- $Hrs_LPNadmin_emp c6  0                              0                    ...
- $Hrs_LPNadmin_ctr c6  0                              0                    ...
- $Hrs_LPN          c6  63.25                          69.75                ...
- $Hrs_LPN_emp      c6  63.25                          69.75                ...
- $Hrs_LPN_ctr      c6  0                              0                    ...
- $Hrs_CNA          c6  189.5                          182.75               ...
- $Hrs_CNA_emp      c6  189.5                          182.75               ...
- $Hrs_CNA_ctr      c6  0                              0                    ...
- $Hrs_NAtrn        c6  0                              0                    ...
- $Hrs_NAtrn_emp    c6  0                              0                    ...
- $Hrs_NAtrn_ctr    c6  0                              0                    ...
- $Hrs_MedAide      c6  0                              0                    ...
- $Hrs_MedAide_emp  c6  0                              0                    ...
- $Hrs_MedAide_ctr  c6  0                              0                    ...

Temporal Data

Datasets with multiple historical versions available will have a temporal field in the distribution array of the data.json.

Format: YYYY-mm-dd/YYYY-mm-dd

The following example returns the 2017 Medicare Inpatient Hospitals - by Provider and Service:

temporal <- distribution |> 
  filter(
    grepl("Medicare Inpatient Hospitals - by Provider and Service", title), 
    format == "API",
    temporal == "2017-01-01/2017-12-31")

temporal |> 
  purse()
- $type         c1  dcat:Distribution                                       ...
- $format       c1  API                                                     ...
- $accessURL    c1  https://data.cms.gov/data-api/v1/dataset/b61ba5eb-021b- ...
- $resourcesAPI c1  https://data.cms.gov/data-api/v1/dataset-resources/b61b ...
- $description  c1  NA                                                      ...
- $title        c1  Medicare Inpatient Hospitals - by Provider and Service  ...
- $modified     p1  2023-05-10                                              ...
- $temporal     c1  2017-01-01/2017-12-31                                   ...
- $downloadURL  c1  NA                                                      ...
- $mediaType    c1  NA                                                      ...
temporal |> 
  pull(accessURL) |> 
  request() |> 
  req_perform() |> 
  resp_body_json(simplifyVector = TRUE) |> 
  head() |> 
  purse()
- $Rndrng_Prvdr_CCN          c6  010001                                     ...
- $Rndrng_Prvdr_Org_Name     c6  Southeast Alabama Medical Center           ...
- $Rndrng_Prvdr_City         c6  Dothan                                     ...
- $Rndrng_Prvdr_St           c6  1108 Ross Clark Circle                     ...
- $Rndrng_Prvdr_State_FIPS   c6  01                                         ...
- $Rndrng_Prvdr_Zip5         c6  36301                                      ...
- $Rndrng_Prvdr_State_Abrvtn c6  AL                                         ...
- $Rndrng_Prvdr_RUCA         c6  1                                          ...
- $Rndrng_Prvdr_RUCA_Desc    c6  Metropolitan area core: primary flow withi ...
- $DRG_Cd                    c6  023                                        ...
- $DRG_Desc                  c6  CRANIOTOMY WITH MAJOR DEVICE IMPLANT OR AC ...
- $Tot_Dschrgs               c6  32                                         ...
- $Avg_Submtd_Cvrd_Chrg      c6  126083.1875                                ...
- $Avg_Tot_Pymt_Amt          c6  26993.15625                                ...
- $Avg_Mdcr_Pymt_Amt         c6  25320.75                                   ...

Different JSON Methods

There are two methods of accessing the latest data.

Both result in a URL pointing to the most recent version of the dataset. This URL is canonical, i.e., it will not change as new versions are added.

For this reason, it is recommended to always start with the data.json object as opposed to hardcoding any URL.

Standard JSON

Use the distribution with the "latest" description:

<data_json>
    => <dataset> 
    => <distribution> 
    -> ($description == "latest")
    -> ($accessURL)

For example, this URL for Opt Out Affidavits is:

https://data.cms.gov/
   data-api/v1/dataset/
   9887a515-7552-4693-bf58-735c77af46d7/
   data
   ^^^^
distribution |> 
  filter(
    grepl("Order and Referring", title),
    description == "latest") |> 
  purse()
- $type         c1  dcat:Distribution                                       ...
- $format       c1  API                                                     ...
- $accessURL    c1  https://data.cms.gov/data-api/v1/dataset/c99b5865-1119- ...
- $resourcesAPI c1  https://data.cms.gov/data-api/v1/dataset-resources/c99b ...
- $description  c1  latest                                                  ...
- $title        c1  Order and Referring : 2025-04-15                        ...
- $modified     p1  2025-04-15                                              ...
- $temporal     c1  2025-04-06/2025-04-12                                   ...
- $downloadURL  c1  NA                                                      ...
- $mediaType    c1  NA                                                      ...

JSON:API

The JSON:API form has a slightly different structure that includes metadata about the dataset. Otherwise it is identical to the Standard JSON method.

Use the URL in the identifier field.

<data_json>
    => <dataset> 
    -> ($title == "Order and Referring")
    -> ($identifier)

For example, this URL for Opt Out Affidavits is:

https://data.cms.gov/
   data-api/v1/dataset/
   9887a515-7552-4693-bf58-735c77af46d7/
   data-viewer
   ^^^^^^^^^^^
dataset |> 
  filter(grepl("Order and Referring", title)) |> 
  unnest_wider(contactPoint, names_sep = "_") |>
  unnest_wider(publisher, names_sep = "_") |>
  mutate(
    bureauCode  = unlist(bureauCode, use.names = FALSE),
    keyword     = map_chr(keyword, \(x) paste0(unlist(x, use.names = FALSE), collapse = ", ")),
    language    = unlist(language, use.names = FALSE),
    programCode = unlist(programCode, use.names = FALSE),
    references  = unlist(references, use.names = FALSE),
    theme       = unlist(theme, use.names = FALSE)) |> 
  rename_with(remove_at_symbol) |> 
  purse()
- $type                  c1  dcat:Dataset                                   ...
- $accessLevel           c1  public                                         ...
- $accrualPeriodicity    c1  R/P3.5D                                        ...
- $bureauCode            c1  009:38                                         ...
- $contactPoint_type     c1  vcard:Contact                                  ...
- $contactPoint_fn       c1  Provider Enrollment Data Requests - CPI        ...
- $contactPoint_hasEmail c1  mailto:ProviderEnrollmentDataRequests@cms.hhs. ...
- $describedBy           c1  https://data.cms.gov/resources/order-and-refer ...
- $dataQuality           l1  T                                              ...
- $description           c1  The Order and Referring dataset provides infor ...
- $distribution          L1  list(structure(list("@type" = c("dcat:Distribu ...
- $identifier            c1  https://data.cms.gov/data-api/v1/dataset/c99b5 ...
- $keyword               c1  Medicare, Original Medicare, Provider Enrollme ...
- $landingPage           c1  https://data.cms.gov/provider-characteristics/ ...
- $language              c1  en-US                                          ...
- $license               c1  https://www.usa.gov/government-works           ...
- $modified              p1  2025-04-15                                     ...
- $programCode           c1  009:000                                        ...
- $publisher_type        c1  org:Organization                               ...
- $publisher_name        c1  Centers for Medicare & Medicaid Services       ...
- $references            c1  https://data.cms.gov/resources/order-and-refer ...
- $temporal              c1  2023-05-21/2025-04-12                          ...
- $theme                 c1  Medicare                                       ...
- $title                 c1  Order and Referring                            ...
- $describedByType       c1  NA                                             ...

Pagination

The default is to provide the first 1,000 rows per request.

However, there is an ability to increase the limit to 5,000 rows per request. You can use pagination to retrieve the entire dataset.

For example, with the Opt Out Affidavits dataset, start with the following request to get the number of rows:

base <- request("https://data.cms.gov/data-api/v1/dataset") |> 
  req_url_path_append("9887a515-7552-4693-bf58-735c77af46d7") |> 
  req_url_path_append("data")

base

stats <- base |> 
  req_url_path_append("stats")

stats

stats <- stats |> 
  req_perform() |> 
  resp_body_json(simplifyVector = TRUE)

stats |> 
  purse()
- $found_rows i1  49923
- $total_rows i1  49923
sq <- providertwo:::offset_seq(stats$found_rows, 5000)

glue::glue(
  "{base$url}",
  "?size=5000&",
  "offset=",
  "{sq}"
)
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=0
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=5000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=10000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=15000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=20000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=25000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=30000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=35000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=40000
https://data.cms.gov/data-api/v1/dataset/9887a515-7552-4693-bf58-735c77af46d7/data?size=5000&offset=45000

Pagination Example

Paginates through the Opt Out Affidavits data:

Find the Number of Rows

Use the stats endpoints i.e.,

  • /data-viewer/stats
  • /data/stats
data/stats?
   filter[f1][path]=State Code&
   filter[f1][value]=GA

data?
   size=5&
   filter[f1][path]=State Code&
   filter[f1][value]=GA
query <- glue::glue(
  'list(
  "filter[id-{FID}][path]" = "{PATH}",
  "filter[id-{FID}][value]" = "{VALUE}"
  )', 
  FID = 1, 
  PATH = "State Code", 
  VALUE = "GA")

query
list(
"filter[id-1][path]" = "State Code",
"filter[id-1][value]" = "GA"
)
query <- query |> 
  rlang::parse_expr() |> 
  rlang::eval_bare()

query
$`filter[id-1][path]`
[1] "State Code"

$`filter[id-1][value]`
[1] "GA"
accessURL <- distribution |> 
  filter(
    grepl("Opt Out Affidavits : ", title), 
    format == "API",
    description == "latest") |> 
  pull(accessURL) |> 
  request() |> 
  req_url_query(size = 5000, !!!query)

resp_found <- accessURL |> 
  req_url_path_append("stats") |> 
  req_perform() |> 
  resp_body_json() |> 
  fuimus::gelm("found")

resp_found
[1] 1160
accessURL |> 
  req_perform() |> 
  resp_body_json(simplifyVector = TRUE) |> 
  tibble() |> 
  purse()
- $First Name                  c1160 Ana                    Joan            ...
- $Last Name                   c1160 Adelstein              Miller          ...
- $NPI                         c1160 1881778967             1982776308      ...
- $Specialty                   c1160 Clinical Psychologist  Clinical Psycho ...
- $Optout Effective Date       c1160 07/01/2012             07/01/2014      ...
- $Optout End Date             c1160 07/01/2026             07/01/2026      ...
- $First Line Street Address   c1160 675 SEMINOLE AVENUE NE 2520 WINDY HILL ...
- $Second Line Street Address  c1160 SUITE 307              SUITE 106       ...
- $City Name                   c1160 ATLANTA                MARIETTA        ...
- $State Code                  c1160 GA                     GA              ...
- $Zip code                    c1160 30307                  300678633       ...
- $Eligible to Order and Refer c1160 N                      N               ...
- $Last updated                c1160 08/15/2024             08/15/2024      ...

CSV Downloads

  1. Start with a request to the data.json.
  2. Match the dataset title with its name.
  3. The most recent release will be at the top of the distribution array.
  4. There is also a temporal field which can be used to find earlier releases.
  5. Datasets available as either CSV or ZIP files will be designated as such in the mediaType field.
  6. The downloadURL field will provide a direct download link for the data.
orderrefer_url <- distribution |> 
  filter(
    grepl("Order and Referring", title),
    mediaType == "text/csv") |> 
  slice(1) |> 
  pull(downloadURL)
  
orderrefer_csv <- read_csv_arrow(
  file = orderrefer_url, 
  as_data_frame = TRUE) |> 
  to_duckdb()

# approx 2 million rows
orderrefer_csv
# Source:   table<arrow_002> [?? x 8]
# Database: DuckDB v1.2.1 [Andrew@Windows 10 x64:R 4.4.2/:memory:]
          NPI LAST_NAME     FIRST_NAME PARTB DME   HHA   PMD   HOSPICE
        <int> <chr>         <chr>      <chr> <chr> <chr> <chr> <chr>  
 1 1558467555 .MCINDOE      THOMAS     N     Y     N     Y     N      
 2 1770667479 A             SCOTT      N     Y     N     Y     N      
 3 1417051921 A BELLE       N          Y     Y     Y     Y     N      
 4 1356025894 A FOLEY       MEGAN      Y     Y     Y     Y     N      
 5 1972040137 A NOVOTNY     ELIZABETH  Y     Y     Y     Y     N      
 6 1760465553 A SATTAR      MUHAMMAD   Y     Y     Y     Y     Y      
 7 1295400745 A'NEAL        BROGAN     Y     Y     N     N     N      
 8 1265446264 A'VANT FOWLER CATHERINE  Y     Y     N     N     N      
 9 1700562584 AAB           BAILEY     Y     Y     Y     N     Y      
10 1205257284 AAB           KATIE      Y     Y     N     N     N      
# ℹ more rows

CMS Summary Statistics/COVID data

Likely not relevant to package scope

New: Resources

Resources (supplemental documents to the main dataset) are now available for download through the public API. Included are sub-files, tables, supplementary data, reports, and documentation. The new endpoints appear in the resourcesAPI field, a secondary endpoint from data.json.

Note: Limit by the name of the resource you want to download. This name may change between versions.

Example: Reassignment SubFile (csv)

Site: Public Provider Enrollment Reassignment SubFile

resp_resources <- distribution |> 
  filter(
  grepl("Medicare Fee-For-Service", title),
  description == "latest") |> 
  pull(resourcesAPI) |> 
  request() |> 
  req_perform() |> 
  resp_body_json(simplifyVector = FALSE) |> 
  list_flatten() |> 
  list_flatten()

reassign_url <- tibble(
  name = fuimus::gelm(resp_resources, "name$") |> unlist(use.names = FALSE),
  size = fuimus::gelm(resp_resources, "fileSize$") |> unlist(use.names = FALSE),
  url =  fuimus::gelm(resp_resources, "downloadURL$") |> unlist(use.names = FALSE))

reassign_url
# A tibble: 7 × 3
  name                                                                size url  
  <chr>                                                              <int> <chr>
1 Medicare FFS Public Provider Enrollment Q1 2025                   3.09e8 http…
2 Medicare Fee-For-Service Public Provider Enrollment Data Diction… 5.60e5 http…
3 Medicare FFS  Public Provider Enrollment Methodology              3.60e5 http…
4 Reassignment Sub-File 2025 Q1                                     1.11e8 http…
5 Address Sub-File 2025 Q1                                          4.28e7 http…
6 Secondary Specialty Sub-File 2025 Q1                              2.81e7 http…
7 Medicare Fee-For-Service Public Provider Enrollment Fact Sheet    3.27e4 http…
reassign_url <- reassign_url|> 
  filter(grepl("Reassignment", name))

reassign_url |> 
  purse()
- $name c1  Reassignment Sub-File 2025 Q1                                   ...
- $size i1  110876247                                                       ...
- $url  c1  https://data.cms.gov/sites/default/files/2025-04/PPEF_Reassignm ...
reassign_csv <- read_csv_arrow(
  file = reassign_url$url, 
  as_data_frame = TRUE) |> 
  to_duckdb()

reassign_csv
# Source:   table<arrow_003> [?? x 2]
# Database: DuckDB v1.2.1 [Andrew@Windows 10 x64:R 4.4.2/:memory:]
   REASGN_BNFT_ENRLMT_ID RCV_BNFT_ENRLMT_ID
   <chr>                 <chr>             
 1 I20031103000001       O20031216000213   
 2 I20031103000001       O20111004000177   
 3 I20031103000007       O20040308000883   
 4 I20031103000007       O20051206000046   
 5 I20031103000014       O20070303000050   
 6 I20031103000014       O20221026001638   
 7 I20031103000020       O20190812002967   
 8 I20031103000033       O20181107003537   
 9 I20031103000033       O20181204002777   
10 I20031103000037       O20051216000183   
# ℹ more rows