6 Downloads

Run {duckplyr} Commands

library(duckplyr)

db_exec("INSTALL httpfs")

db_exec("LOAD httpfs")

db_exec("INSTALL json")

db_exec("LOAD json")

read_sql_duckdb("SELECT current_setting('memory_limit') AS memlimit")

# A duckplyr data frame: 1 variable
  memlimit
  <chr>   
1 12.5 GiB

db_exec("PRAGMA memory_limit = '1GB'")

read_sql_duckdb("SELECT current_setting('memory_limit') AS memlimit")

# A duckplyr data frame: 1 variable
  memlimit 
  <chr>    
1 953.6 MiB

db_exec("PRAGMA enable_progress_bar = true")

read_sql_duckdb("SELECT current_setting('threads') AS threads")

# A duckplyr data frame: 1 variable
  threads
    <dbl>
1       4

read_sql_duckdb("SELECT * FROM duckdb_settings()")

# A duckplyr data frame: 5 variables
   name                                       value description input_type scope
   <chr>                                      <chr> <chr>       <chr>      <chr>
 1 access_mode                                auto… Access mod… VARCHAR    GLOB…
 2 allocator_background_threads               false Whether to… BOOLEAN    GLOB…
 3 allocator_bulk_deallocation_flush_thresho… 512.… If a bulk … VARCHAR    GLOB…
 4 allocator_flush_threshold                  128.… Peak alloc… VARCHAR    GLOB…
 5 allow_community_extensions                 true  Allow to l… BOOLEAN    GLOB…
 6 allow_extensions_metadata_mismatch         false Allow to l… BOOLEAN    GLOB…
 7 allow_persistent_secrets                   true  Allow the … BOOLEAN    GLOB…
 8 allow_unredacted_secrets                   false Allow prin… BOOLEAN    GLOB…
 9 allow_unsigned_extensions                  false Allow to l… BOOLEAN    GLOB…
10 allowed_directories                        []    List of di… VARCHAR[]  GLOB…
# ℹ more rows

Provider Enrollment Sub-Files

resources <- prop(
  public_Dataset("enrollees"), 
  "resources")

subfiles <- filter(
  prop(resources, "files"), 
  fileType == "csv", 
  sf_detect(name, "Sub-File")) |> 
  pull(downloadURL)

basename(subfiles)

[1] "PPEF_Reassignment_Extract_2025.01.02.csv"       
[2] "PPEF_Practice_Location_Extract_2025.01.02.csv"  
[3] "PPEF_Secondary_Specialty_Extract_2025.01.02.csv"

Reassignment Sub-File 2024 Q4

path <- tempfile("reassign_subfile", fileext = ".csv")

download.file(url = subfiles[1], destfile = path)

dk <- duckplyr::read_csv_duckdb(
  path, 
  prudence = "lavish",
  options = list(types = list(c("VARCHAR", "VARCHAR")))) |> 
  collect()

dk <- fcompute(
  dk,
  enid_ind = stringi::stri_replace_all_regex(REASGN_BNFT_ENRLMT_ID, "\n", ""),
  enid_org = RCV_BNFT_ENRLMT_ID)

fnobs(dk$enid_ind) |> 
  prettyNum(big.mark = ",")

[1] "3,386,477"

fndistinct(dk$enid_ind) |> 
  prettyNum(big.mark = ",")

[1] "2,024,798"

fndistinct(dk$enid_org) |> 
  prettyNum(big.mark = ",")

[1] "262,076"

Address Sub-File 2024 Q4

path <- tempfile("address_subfile", fileext = ".csv")

download.file(url = subfiles[2], destfile = path)

dk2 <- read_csv_duckdb(path)

dk2

# A duckplyr data frame: 4 variables
   ENRLMT_ID           CITY_NAME     STATE_CD ZIP_CD   
   <chr>               <chr>         <chr>    <chr>    
 1 "I20031103000005"   MECHANICSBURG PA       170501925
 2 "\nI20031103000013" SAN JUAN      PR       009175030
 3 "\nI20031103000015" TOMS RIVER    NJ       08757    
 4 "\nI20031103000028" JERSEY CITY   NJ       073062305
 5 "\nI20031103000030" AGUADILLA     PR       006055256
 6 "\nI20031103000032" BINGHAM FARMS MI       480255810
 7 "\nI20031103000036" LAJAS         PR       006672082
 8 "\nI20031103000037" PONCE         PR       00780    
 9 "\nI20031103000063" CAGUAS        PR       00726    
10 "\nI20031103000096" SAN JUAN      PR       00921    
# ℹ more rows

Secondary Specialty Sub-File 2024 Q4

path <- tempfile("second_specialty", fileext = ".csv")

download.file(url = subfiles[3], destfile = path)

dk3 <- read_csv_duckdb(path)

dk3

# A duckplyr data frame: 3 variables
   ENRLMT_ID           PROVIDER_TYPE_CD PROVIDER_TYPE_DESC                      
   <chr>               <chr>            <chr>                                   
 1 "I20031103000037"   14-11            PRACTITIONER - INTERNAL MEDICINE        
 2 "\nI20031103000037" 14-81            PRACTITIONER - CRITICAL CARE (INTENSIVI…
 3 "\nI20031103000039" 14-19            PRACTITIONER - ORAL SURGERY             
 4 "\nI20031103000089" 14-09            PRACTITIONER - INTERVENTIONAL PAIN MANA…
 5 "\nI20031103000123" 14-30            PRACTITIONER - DIAGNOSTIC RADIOLOGY     
 6 "\nI20031103000159" 14-11            PRACTITIONER - INTERNAL MEDICINE        
 7 "\nI20031103000159" 14-17            PRACTITIONER - HOSPICE/PALLIATIVE CARE  
 8 "\nI20031103000159" 14-83            PRACTITIONER - HEMATOLOGY/ONCOLOGY      
 9 "\nI20031103000197" 14-25            PRACTITIONER - PHYSICAL MEDICINE AND RE…
10 "\nI20031103000203" 14-11            PRACTITIONER - INTERNAL MEDICINE        
# ℹ more rows