# Query {#sec-querying}
```{r}
#| label: setup
#| include: false
source ("includes/_common.R" )
# test_url <- glue::as_glue("https://data.cms.gov/data-api/v1/dataset/d10d792e-ea6e-4145-8512-34efbc1be04b/data?size=10&offset=0&")
# test_url <- test_url + greater_or_equal("overall_rating", 3)
```
## Quick Query
```{r}
#| label: quick_query
#| message: true
x <- pro_endpoint ("asc_facility" )
` %|||% ` <- function (x, y) {
if (! is.null (x)) y else NULL
}
cli_query <- function (x) {
x <- unlist (x)
glue_col ("{silver {names(x)}=}{red {bold {unname(x)}}}" ) |> glue_collapse (sep = "&" )
}
quick_query_ <- new_generic ("quick_query_" , "x" )
method (quick_query_, providertwo::: class_endpoint) <- function (x, query = NULL ) {
n <- providertwo::: identifier_ (x) |>
request () |>
req_url_query (
count = "true" ,
format = "json" ,
keys = "true" ,
results = "false" ,
rowIds = "false" ,
schema = "false" ,
offset = 0 L,
limit = 1 L,
!!! query
) |>
providertwo::: perform_simple () |>
_$ count
if (n == 0 ) {
cli:: cli_alert_danger ("Query {.var {cli_query(query)}} returned {.emph 0} results." , wrap = TRUE )
return (invisible (NULL ))
}
if (n <= providertwo::: limit_ (x)) {
cli:: cli_alert_success ("Returning {.emph {n}} results..." , wrap = TRUE )
return (
providertwo::: identifier_ (x) |>
map (
function (i)
request (i) |>
req_url_query (
count = "false" ,
format = "json" ,
keys = "true" ,
results = "true" ,
rowIds = "false" ,
schema = "false" ,
offset = 0 L,
limit = providertwo::: limit_ (x),
!!! query
)
) |>
req_perform_parallel (on_error = "continue" ) |>
map (
function (x)
providertwo::: parse_string (x, query = "results" ) |>
as_tbl () |>
providertwo::: map_na_if ()
) |>
pluck (1 ) |>
providertwo::: name_fields_ (x)
)
}
if (n > providertwo::: limit_ (x)) {
cli:: cli_alert_danger (
"UNIMPLEMENTED: Query returned {.emph {n}} results, exceeding the limit of {providertwo:::limit_(x)}." ,
wrap = TRUE
)
return (invisible (NULL ))
}
}
q1 <- list (
"conditions[0][property]" = "state" ,
"conditions[0][operator]" = "=" ,
"conditions[0][value]" = "NY"
)
q2 <- list (
"conditions[0][property]" = "state" ,
"conditions[0][operator]" = "IN" ,
"conditions[0][value][1]" = "CA" ,
"conditions[0][value][2]" = "GA" ,
"conditions[0][value][3]" = "NY"
)
q3 <- list (
"conditions[0][property]" = "asc2_rate" ,
"conditions[0][operator]" = "<" ,
"conditions[0][value]" = "0.02"
)
r1 <- quick_query_ (pro_endpoint ("asc_facility" ), query = q1)
r1 |> str ()
quick_query_ (pro_endpoint ("asc_facility" ), query = q2)
r3 <- quick_query_ (pro_endpoint ("asc_facility" ), query = q3)
r3 |> str ()
```
### Params with length > 1
```{r}
#| label: params
foo <- function (x) {
x_name <- deparse (substitute (x))
glue:: glue ("{x_name}[{seq_along(x)}]={x}" ) |>
glue:: glue_collapse (sep = "&" )
}
ID <- 5 : 7
foo (ID)
foo (x = list (ID = 1 : 2 ))
```
### Multi-value Params
```{r}
#| label: multi
multi_value <- function (x) {
glue:: glue ("[{seq_along(x)}]={x}" ) |>
glue:: glue_collapse (sep = "&" )
}
multi_value (state.abb[1 : 5 ])
```
## Query Generation
1. Select Dataset/Method
1. Build JSON Query
1. Build Request
1. Retrieve Number of Results
1. Build Offset Sequence
### Allowed Operators
```{r}
#| label: allowed_operators
#| echo: false
list (
"=" = "equals(x)" ,
"<>" = "equals(x, negate = TRUE)" ,
">" = "greater_than(x)" ,
">=" = "greater_than(x, equals = TRUE)" ,
"<" = "less_than(x)" ,
"<=" = "less_than(x, equals = TRUE)" ,
"STARTS_WITH" = "starts_with(x)" ,
"CONTAINS" = "contains(x)" ,
"ENDS_WITH" = "ends_with(x)" ,
"IN" = "is_in(x)" ,
"NOT IN" = "is_in(x, negate = TRUE)" ,
"BETWEEN" = "between(x)" ,
"NOT BETWEEN" = "between(x, negate = TRUE)" ,
"IS NULL" = "blank(x)" ,
"IS NOT NULL" = "blank(x, negate = TRUE)"
) |>
providertwo::: print_list ()
```
```{r}
#| label: query_generation
new_query <- function (args, operator) {
len <- cheapr:: lengths_ (args, names = TRUE )
single <- args[cheapr:: which_ (len == 1 )]
multi <- args[cheapr:: which_ (len > 1 )]
x <- list (s = if (vctrs:: vec_is_empty (single)) NULL else single,
m = if (vctrs:: vec_is_empty (multi)) NULL else multi)
idx <- rlang:: set_names (seq_along (args), names (args))
opr <- rlang:: set_names (operator, names (args))
g_single <- glue:: glue (
"filter[{idx[names(x$s)]}][condition][path]={names(x$s)} \n " ,
"filter[{idx[names(x$s)]}][condition][operator]={opr[names(x$s)]} \n " ,
"filter[{idx[names(x$s)]}][condition][value]={providertwo:::delist(x$s)}" )
g_mult <- glue:: as_glue (
c (
glue:: glue (
"filter[{idx[names(x$m)]}][condition][path]={names(x$m)} \n " ,
"filter[{idx[names(x$m)]}][condition][operator]={opr[names(x$m)]}" ),
glue:: glue (
"filter[{idx[names(x$m)]}][condition][value]" ,
"[{seq_along(providertwo:::delist(x$m))}]={providertwo:::delist(x$m)}" ))
)
glue:: glue_collapse (c (g_single, g_mult), sep = " \n " )
}
new_query (args = list (state = c ("GA" , "MD" ),
last_name = "SMITH" ,
npi = 1234567890 ),
operator = c ("IN" , "STARTS_WITH" , "=" ))
```
### `class_query`
```{r}
#| label: class_query
class_query <- new_class (
"class_query" ,
properties = list (
path = class_character,
operator = class_character,
value = class_vector,
limit = class_numeric,
sort = class_character,
column = class_vector,
group = new_property (
class_list,
default = list (
conjunction = "AND" ,
memberOf = "" ))
)
)
```
### `query` Formatters
```{r}
#| label: query_test
ex <- list (
state = c ("GA" , "MD" ),
name = "Jerry" ,
number = 300.12 )
list_lengths (ex)
args <- list_combine (ex, i = set_names (seq_along (ex), names (ex)))
list_assign (x = ex, values = set_names (as.list (seq_along (ex)), names (ex)))
```
#### `query_comparison`
```{r}
#| label: query_comparison
query_comparison <- function (args,
operator = c ("=" , ">" , ">=" , "<" , "<=" , "<>" )) {
o <- match.arg (operator)
x <- args[cheapr:: which_ (cheapr:: lengths_ (args, names = TRUE ) == 1 )]
i <- args$ i[names (x)]
glue:: glue (
"
filter[{i}][condition][path]={names(x)}
filter[{i}][condition][operator]={o}
filter[{i}][condition][value]={providertwo:::delist(x)}
"
)
}
query_comparison (args)
query_comparison (args, ">=" )
```
#### `query_equals_simple`
```{r}
#| label: query_equals_simple
query_equals_simple <- function (args) {
args <- args[cheapr:: which_ (cheapr:: lengths_ (args, names = TRUE ) == 1 )]
glue:: glue ("filter[{names(args)}]={providertwo:::delist(args)}" )
}
query_equals_simple (ex)
```
#### `query_in_notin`
```{r}
#| label: query_in_notin
query_in_notin <- function (args, operator = "IN" ) {
o <- match.arg (operator, c ("IN" , "NOT IN" ))
x <- args[cheapr:: which_ (cheapr:: lengths_ (args, names = TRUE ) > 1 )]
i <- args$ i[names (x)]
i <- names (i)[! cheapr:: is_na (names (i))]
x$ i <- NULL
glue:: as_glue (
c (
glue:: glue (
"filter[{i}][condition][path]={names(x)} " ,
"filter[{i}][condition][operator]={o}"
),
glue:: glue (
"filter[{i}]" ,
"[condition][value]" ,
"[{seq_along(providertwo:::delist(x))}]={providertwo:::delist(x)}"
)
)
)
}
query_in_notin (args, "IN" )
query_in_notin (ex, "NOT IN" )
```
#### Finalizing `query` Components
```{r}
glue:: as_glue (c (query_in_notin (ex), query_comparison (ex)))
```
### New Function
```{r}
fields <- c (
"year" ,
"covered_recipient_npi" ,
"covered_recipient_type" ,
"covered_recipient_first_name" ,
"covered_recipient_last_name" ,
"recipient_city" ,
"recipient_state" ,
"recipient_zip_code" ,
"teaching_hospital_name" ,
"form_of_payment_or_transfer_of_value" ,
"nature_of_payment_or_transfer_of_value" ,
"applicable_manufacturer_or_applicable_gpo_making_payment_name" ,
"applicable_manufacturer_or_applicable_gpo_making_payment_id" )
process_params <- \(arg_names, field_names) {
nms <- set_names (arg_names, field_names)
parse_expr (
paste0 (
"list2(" ,
glue_collapse (
glue ('{names(nms)} = {unname(nms)}' ),
sep = ", " ), ")" ))
}
has_operator <- \(args) {
map_lgl (args, function (x) inherits (x, "query_operator" ))
}
name_path <- \(args) {
map (args, \(x) x[[names (x)]][["path" ]] <- names (x))
}
```
```{r}
open_pay <- function (year,
npi = NULL ,
covered_type = NULL ,
first = NULL ,
last = NULL ,
city = NULL ,
state = NULL ,
zip = NULL ,
teaching_hospital = NULL ,
payment_form = NULL ,
payment_nature = NULL ,
payer = NULL ,
payer_id = NULL ) {
fn_args <- process_params (fn_fmls_names (), fields) |>
eval_bare () |>
compact ()
# if (any(has_operator(fn_args)))
# fn_args[has_operator(fn_args)] <- name_path(fn_args[has_operator(fn_args)])
fn_args
}
```
```{r}
open_pay (year = 2021 , npi = "1043218118" )
open_pay (year = 2021 , payment_nature = "Royalty or License" )
open_pay (year = 2021 , payment_form = "Stock option" )
open_pay (year = 2021 , payer = "Adaptive Biotechnologies Corporation" )
open_pay (year = 2021 , teaching_hospital = "Nyu Langone Hospitals" )
open_pay (year = 2017 : 2023 , npi = "1043477615" )
```
```{r}
starts_with <- \(x) {
structure (
c (path = NA_character_ ,
operator = "STARTS_WITH" ,
value = x),
class = "query_operator" )
}
starts_with ("Royalty or License" )
x <- open_pay (
year = 2021 ,
payment_nature = starts_with ("Royalty or License" ),
teaching_hospital = starts_with ("NYU" ))
x
names (x[has_operator (x)])
x[has_operator (x)][[1 ]][["path" ]] <- names (x[has_operator (x)])[[1 ]]
x
# map(args, \(x) x[[names(x)]][["path"]] <- names(x))
#
# x[has_operator(x)] <- providertwo:::map2(
# x[has_operator(x)],
# names(x[has_operator(x)]),
# function(x, y) x[["path"]] <- y)
#
# x
```
## Query Format
```{r}
url_parse (
base_url = "https://openpaymentsdata.cms.gov/api/1/datastore/" ,
url = "query/fdc3c773-018a-412c-8a81-d7b8a13a037b/0" ) |>
url_modify_query (
` conditions[0][property] ` = "covered_recipient_last_name" ,
` conditions[0][value] ` = "SMITH" ,
` conditions[0][operator] ` = "=" ,
schema = "false" ,
keys = "false" ,
results = "false" ,
offset = 0 ,
limit = 500 ) |>
url_build () |>
request () |>
providertwo::: perform_simple () |>
_[["count" ]]
```
```{r}
url_parse (
base_url = "https://openpaymentsdata.cms.gov/api/1/datastore/" ,
url = "query/fdc3c773-018a-412c-8a81-d7b8a13a037b/0" ) |>
url_modify_query (
` conditions[0][property] ` = "covered_recipient_first_name" ,
` conditions[0][value] ` = "PATRICK" ,
` conditions[0][operator] ` = "=" ,
` conditions[1][property] ` = "covered_recipient_last_name" ,
` conditions[1][value] ` = "WARD" ,
` conditions[1][operator] ` = "=" ,
schema = "false" ,
keys = "false" ,
results = "false" ,
offset = 0 ) |>
url_build () |>
request () |>
providertwo::: perform_simple () |>
_[["count" ]]
```
### Other Examples
```{r}
exquery <- glue:: glue (
'
"filter[{i}][path]" = "{PATH}",
"filter[{i}][operator]" = "{OPERATOR}",
"filter[{i}][value]" = "{VALUE}"
' ,
i = 1 : 2 ,
PATH = c ("STATE_CD" , "LAST_NAME" ),
OPERATOR = "=" ,
VALUE = c ("GA" , "SMITH" )
) |>
glue:: glue_collapse (sep = ", \n " )
exquery
glue:: glue ('c({exquery})' ) |>
rlang:: parse_expr () |>
rlang:: eval_bare ()
```
```{r}
glue:: glue (
'
filter[{PATH}][operator]={OPERATOR}
filter[{PATH}][value]={VALUE}
' ,
PATH = c ("STATE_CD" , "LAST_NAME" ),
OPERATOR = "IN" ,
VALUE = c ("GA" , "SMITH" )
)
```
```{r}
#| label: format_syntax
format_syntax <- \(name, value) {
setNames (
value,
paste0 (
name,
"[id-" ,
seq_along (value),
"][condition][value]" ))
}
format_syntax ("filter" , c ("GA" , "NY" ))
# query_syntax <- c(
# "filter[STATE_CD]" = "GA",
# "filter[LAST_NAME]" = "SMITH"
# )
```
## Pagination
```{r}
base <- request ("https://data.cms.gov/data-api/v1/dataset" ) |>
req_url_path_append ("9887a515-7552-4693-bf58-735c77af46d7" ) |>
req_url_path_append ("data" )
base
stats <- base |>
req_url_path_append ("stats" )
stats
stats <- stats |>
req_perform () |>
resp_body_json (simplifyVector = TRUE )
stats |> purse ()
# Offset shouldn't go to 50000
providertwo::: offset_seq (stats$ found_rows, 5000 )
urls <- glue:: glue (
"{base$url}" ,
"?size=5000&" ,
"offset=" ,
"{providertwo:::offset_seq(stats$found_rows, 5000)}"
)
urls
resp <- urls[1 : 3 ] |>
map (
function (x) {
request (x) |>
req_perform () |>
resp_body_json (simplifyVector = TRUE ) |>
tibble ()
}) |>
list_rbind (names_to = "reqID" ) |>
janitor:: clean_names ()
resp |>
janitor:: get_dupes ()
dupes <- resp |>
count (npi, sort = TRUE ) |>
filter (n > 1 ) |>
pull (npi)
resp |>
filter (npi %in% dupes) |>
arrange (npi)
resp |>
filter (npi %in% c ("1225232226" , "1740295518" , "1326103029" )) |>
relocate (last_updated) |>
arrange (npi)
```
## Example 2
Select Dataset
```{r}
#| label: select_dataset
#| eval: false
distribution <- read_json_arrow (
file = "https://data.cms.gov/data.json" ,
col_select = c ("dataset" ),
as_data_frame = TRUE ) |>
to_duckdb () |>
pull (dataset) |>
pluck (1 ) |>
select (distribution) |>
as_tibble () |>
unnest (distribution) |>
# rename_with(remove_at_symbol) |>
filter (format == "API" ,
description == "latest" )
distribution |>
filter (grepl ("Enrollment" , title)) |>
pull (title)
accessURL <- distribution |>
filter (grepl ("Opt Out Affidavits : " , title)) |>
pull (accessURL)
accessURL
```
Format Query
```{r}
#| eval: false
query <- glue:: glue (
'c(
"filter[{i}][path]" = "{PATH}",
"filter[{i}][value]" = "{VALUE}"
)' ,
i = 1 ,
PATH = "State Code" ,
VALUE = "CA" )
query
query <- query |>
parse_expr () |>
eval_bare ()
query
```
Retrieve Number of Results
```{r}
#| eval: false
queryurl <- accessURL |>
request () |>
req_url_query (size = 5000 , !!! query)
nresults <- queryurl |>
req_url_path_append ("stats" ) |>
req_perform () |>
resp_body_json () |>
fuimus:: gelm ("found" )
nresults
```
```{r}
#| eval: false
url_parse (queryurl$ url)
```
Generate and Send Requests
```{r}
#| eval: false
reqs <- map (
glue (
"{url}" ,
"&offset={off}" ,
url = queryurl$ url,
off = providertwo::: offset_seq (nresults, 5000 )),
httr2:: request)
res <- req_perform_parallel (reqs) |>
resps_successes () |>
map (\(x) tibble (resp_body_json (x, simplifyVector = TRUE ))) |>
bind_rows ()
providertwo::: set_clean (res, names (res)) |>
providertwo::: map_na_if () |>
mtt (
optout_effective_date = providertwo::: as_date (optout_effective_date, fmt = "%m/%d/%Y" ),
optout_end_date = providertwo::: as_date (optout_end_date, fmt = "%m/%d/%Y" ),
last_updated = providertwo::: as_date (last_updated, fmt = "%m/%d/%Y" )
)
```
### Testing in a Function
```{r}
#| eval: false
protofunc <- function (id = NULL ,
state = NULL ,
name = NULL ,
number = NULL ) {
arg <- class_args (
id = id,
state = state,
name = name,
number = number)
query_comparison (compact (props (arg)))
}
protofunc (state = c ("GA" ),
name = "Jerry" ,
number = 300.12 )
```
## Try Again
```{r}
arg_list <- new_class (
"arg_list" ,
properties = list (params = class_list),
constructor = function (...) new_object (S7_object (), params = rlang:: list2 (...)),
validator = function (self) if (! rlang:: is_named (self@ params)) "all @params must all be named" )
less_than <- function (x, equals = FALSE ) {
` attr<- ` (x, "operator" , if (equals) "<=" else "<" )
}
is_in <- function (x, negate = FALSE ) {
` attr<- ` (x, "operator" , ifelse (negate, "NOT IN" , "IN" ))
}
a <- arg_list (
npi = less_than ("1234567890" ),
state = is_in (state.abb),
name = "Jerry" )
# get_operator <- purrr::attr_getter("operator")
# attr(prop(a, "params")$npi, "operator")
has_operator <- \(x) names (attributes (x)) %in% "operator"
has_operator (a@ params$ npi)
has_operator (a@ params$ name)
map (a@ params, has_operator) |>
compact () |>
names ()
a@ params[names (a@ params) %in% "npi" ]
```
```{r}
#| eval: false
startswith <- \(x) {
list (
OPERATOR = "STARTS_WITH" ,
VALUE = as.character (x)
)
}
startswith ("Royalty or License" )
between <- \(x, negate = FALSE ) {
list (
OPERATOR = ifelse (negate, "NOT BETWEEN" , "BETWEEN" ),
VALUE = as.character (x)
)
}
between (c (1 , 10 ))
between (c (1 , 10 ), negate = TRUE )
endswith <- \(x) {
list (
OPERATOR = "ENDS_WITH" ,
VALUE = as.character (x)
)
}
endswith ("NYU" )
is_in <- \(x, negate = FALSE ) {
list (
OPERATOR = ifelse (negate, "NOT IN" , "IN" ),
VALUE = as.character (x)
)
}
is_in (c ("GA" , "NY" ))
is_in (c ("GA" , "NY" ), negate = TRUE )
```
```{r}
x <- list (npi = ~ providertwo::: starts_with ("gdklfgj" ))
x
x$ npi[1 ]
rlang:: is_formula (x$ npi, lhs = TRUE )
foo <- function (npi = NULL ,
entity = NULL ,
first = NULL ,
last = NULL ,
organization = NULL ,
name_type = NULL ,
taxonomy_desc = NULL ,
city = NULL ,
state = NULL ,
zip = NULL ,
country = NULL ) {
rlang:: list2 (
number = npi,
enumeration_type = entity,
first_name = first,
last_name = last,
name_purpose = name_type,
organization_name = organization,
taxonomy_description = taxonomy_desc,
city = city,
state = state,
postal_code = zip,
country_code = country,
skip = 0 L
)
}
x <- foo (npi = ~ providertwo::: starts_with (1234567890 ))
rlang:: f_lhs (x$ number) <- rlang:: expr (NPI)
x$ number
rlang:: f_lhs (x$ number)
rlang:: f_rhs (x$ number) |>
rlang:: eval_tidy ()
```