Skip to content

Commit

Permalink
Adding U2M OAuth Support (#49)
Browse files Browse the repository at this point in the history
* fixing connection pane tests

* Adding support for U2M OAuth by default when `DATABRICKS_TOKEN` isn't specified.
`read_env_var` now accepts an `error` param (boolean), `db_token` calls this as `FALSE` so that it now returns `NULL` when not set (previously errord), this is a breaking change.

* Adding U2M OAuth

* Changing fake token structure to avoid security triggers

* Documentation fixes

---------

Co-authored-by: Zac Davies <zac@databricks.com>
  • Loading branch information
zacdav-db and Zac Davies authored Jul 11, 2024
1 parent b22d404 commit bab0033
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 29 deletions.
Binary file modified .DS_Store
Binary file not shown.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ export(db_workspace_list)
export(db_workspace_mkdirs)
export(db_wsid)
export(dbfs_storage_info)
export(determine_brickster_venv)
export(docker_image)
export(email_notifications)
export(file_storage_info)
Expand Down
2 changes: 1 addition & 1 deletion R/connection-pane.R
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ get_warehouse <- function(id, host, token) {
"name" = x$name,
"id" = id,
"creator" = x$creator_name,
"channel" = x$channel$name,
"channel" = dplyr::coalesce(x$channel$name, NA),
"serverless" = x$enable_serverless_compute,
"size" = x$cluster_size,
"# clusters" = x$num_clusters,
Expand Down
54 changes: 48 additions & 6 deletions R/package-auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ db_host <- function(id = NULL, prefix = NULL, profile = getOption("db_profile",
#'
#' @description
#' Token must be specified as an environment variable `DATABRICKS_TOKEN`.
#' If `DATABRICKS_TOKEN` is missing will default to using OAuth U2M flow.
#'
#' Refer to [api authentication docs](https://docs.databricks.com/dev-tools/api/latest/authentication.html)
#'
Expand All @@ -87,7 +88,7 @@ db_token <- function(profile = getOption("db_profile")) {
return(token)
}

read_env_var(key = "token", profile = profile)
read_env_var(key = "token", profile = profile, error = FALSE)
}

#' Fetch Databricks Workspace ID
Expand Down Expand Up @@ -193,10 +194,13 @@ read_databrickscfg <- function(key = c("token", "host", "wsid"), profile = NULL)
#'
#' @param key The value to fetch from profile. One of `token`, `host`, or `wsid`
#' @param profile Character, the name of the profile to retrieve values
#' @param error Boolean, when key isn't found should error be raised
#'
#' @return named list of values associated with profile
#' @keywords internal
read_env_var <- function(key = c("token", "host", "wsid"), profile = NULL) {
read_env_var <- function(key = c("token", "host", "wsid"),
profile = NULL, error = TRUE) {

key <- match.arg(key)

# fetch value based on profile
Expand All @@ -208,14 +212,52 @@ read_env_var <- function(key = c("token", "host", "wsid"), profile = NULL) {

value <- Sys.getenv(key_name)


if (value == "") {
stop(cli::format_error(c(
"Environment variable {.var {key_name}} not found:",
"x" = "Need to specify {.var {key_name}} environment variable."
)))
if (error) {
stop(cli::format_error(c(
"Environment variable {.var {key_name}} not found:",
"x" = "Need to specify {.var {key_name}} environment variable."
)))
} else {
value <- NULL
}
}

value
}


#' Create OAuth 2.0 Client
#' @details Creates an OAuth 2.0 Client, support for U2M flows only.
#' May later be extended for account U2M and all M2M flows.
#'
#' @inheritParams auth_params
#'
#' @return List that contains httr2_oauth_client and relevant auth url
#' @keywords internal
db_oauth_client <- function(host = db_host()) {

ws_token_url = glue::glue("https://{host}/oidc/v1/token", host = host)
ws_auth_url = glue::glue("https://{host}/oidc/v1/authorize", host = host)

client <- httr2::oauth_client(
id = "databricks-cli",
token_url = ws_token_url,
name = "brickster"
)

client_and_auth <- list(
client = client,
auth_url = ws_auth_url
)

# add option for client to be fetched via request helpers
options(brickster_oauth_client = client_and_auth)

client_and_auth

}



24 changes: 23 additions & 1 deletion R/request-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,35 @@ db_request <- function(endpoint, method, version = NULL, body = NULL, host, toke
user_agent_str <- paste0("brickster/", utils::packageVersion("brickster"))

req <- httr2::request(base_url = url) %>%
httr2::req_auth_bearer_token(token) %>%
httr2::req_headers("User-Agent" = user_agent_str) %>%
httr2::req_user_agent(string = user_agent_str) %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_method(method) %>%
httr2::req_retry(max_tries = 3, backoff = ~ 2)

# if token is present use directly
# otherwise initiate OAuth 2.0 U2M Workspace flow
if (!is.null(token)) {
req <- httr2::req_auth_bearer_token(req = req, token = token)
} else {

# fetch client
oauth_client <- getOption(
x = "brickster_oauth_client",
db_oauth_client(host = host)
)

# use client to auth
req <- httr2::req_oauth_auth_code(
req,
client = oauth_client$client,
scope = "all-apis",
auth_url = oauth_client$auth_url,
redirect_uri = "http://localhost:8020"
)

}

if (!is.null(body)) {
body <- base::Filter(length, body)
req <- req %>%
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Docs website has [an article](https://zacdav-db.github.io/brickster/articles/set
## API Coverage

| API | Available | Version |
|----------------------------------------------|-------------|-------------|
|-------------------------------------------------------------------------------------------------------|-----------|---------|
| [DBFS](https://docs.databricks.com/dev-tools/api/latest/dbfs.html) | Yes | 2.0 |
| [Secrets](https://docs.databricks.com/dev-tools/api/latest/secrets.html) | Yes | 2.0 |
| [Repos](https://docs.databricks.com/dev-tools/api/latest/repos.html) | Yes | 2.0 |
Expand Down
22 changes: 22 additions & 0 deletions man/db_oauth_client.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/db_token.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/read_env_var.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ test_that("auth functions - baseline behaviour", {
"mock.cloud.databricks.com"
)

# oauth functions require host to be specified and valid
expect_error(db_oauth_client(host = NULL))
expect_identical(
db_oauth_client(host = "")$auth_url,
"https:///oidc/v1/authorize"
)
expect_identical(
db_oauth_client(host = "")$client$token_url,
"https:///oidc/v1/token"
)
expect_s3_class(
db_oauth_client(host = "")$client,
"httr2_oauth_client"
)


})

test_that("auth functions - switching profile", {
Expand Down
49 changes: 31 additions & 18 deletions vignettes/setup-auth.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,42 @@ knitr::opts_chunk$set(

## Defining Credentials

The `{brickster}` package connects to a Databricks workspace via personal access tokens. Tokens can be generated in a few steps, for a step-by-step breakdown [refer to the documentation](https://docs.databricks.com/dev-tools/api/latest/authentication.html).
The `{brickster}` package connects to a Databricks workspace is two ways:

1. [OAuth user-to-machine (U2M) authentication](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html#oauth-user-to-machine-u2m-authentication)
2. [Personal Access Tokens (PAT)](https://docs.databricks.com/en/dev-tools/auth/pat.htmlhttps://docs.databricks.com/en/dev-tools/auth/pat.html)

It's recommended to use option (1) when using `{brickster}` interactively, if you need to run code via an automated process the only option currently is (2).

Personal Access Tokens can be generated in a few steps, for a step-by-step breakdown [refer to the documentation](https://docs.databricks.com/dev-tools/api/latest/authentication.html).

Once you have a token you'll be able to store it alongside the workspace URL in an `.Renviron` file. The `.Renviron` is used for storing the variables, such as those which may be sensitive (e.g. credentials) and de-couple them from the code (additional reading: [1](https://support.rstudio.com/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf), [2](https://cran.r-project.org/web/packages/startup/vignettes/startup-intro.html)).

To get started add the following to your `.Renviron`:

- `DATABRICKS_HOST`: The workspace URL

- `DATABRICKS_TOKEN`: Personal access token
- `DATABRICKS_TOKEN`: Personal access token (*not required if using OAuth U2M*)

- `DATABRICKS_WSID`: The workspace ID ([docs](https://docs.databricks.com/workspace/workspace-details.html#workspace-instance-names-urls-and-ids))

`DATABRICKS_WSID` is only required for the RStudio IDE integration with the connection pane.

Example of entries in `.Renviron`:

DATABRICKS_HOST=https://xxxxxxx.cloud.databricks.com/
DATABRICKS_TOKEN=dapi123456789012345678a9bc01234defg5
DATABRICKS_WSID=123123123123123
```
DATABRICKS_HOST=xxxxxxx.cloud.databricks.com
DATABRICKS_TOKEN=dapi123456789012345678a9bc01234defg5
DATABRICKS_WSID=123123123123123
```

**Note**: Recommend creating an `.Renviron` for each project. You can create `.Renviron` within your user home directory if required.

Restarting your R session will allow those variable to be picked up via the `{brickster}` package.

## Using Credentials with `{brickster}`

Authentication should now be possible without specifying the credentials in your R code. We can now load `{brickster}` and list the clusters within the workspace using `db_cluster_list()`, to access our host/token we use `db_host()`/`db_token()` respectively.
Authentication should now be possible without specifying the credentials in your R code. You can load `{brickster}` and list the clusters within the workspace using `db_cluster_list()`, to access the host/token use `db_host()`/`db_token()` respectively.

```{r setup}
library(brickster)
Expand All @@ -59,6 +68,8 @@ All `{brickster}` functions have their host/token parameters default to calling
clusters <- db_cluster_list()
```

When using OAuth U2M authentication you don't define a token in `.Renviron` and therefore `db_token()` will return `NULL`.

## Managing Multiple Credentials

There are two methods that `{brickster}` supports to simplify switching of credentials within an R project/session:
Expand Down Expand Up @@ -113,18 +124,20 @@ It is expected that profiles in `.Renviron` will adhere to the same naming conve

Here is an example of an `.Renviron` file that has three profiles (default, dev, prod):

# default
DATABRICKS_HOST=https://xxxxxxx.cloud.databricks.com/
DATABRICKS_TOKEN=dapi123456789012345678a9bc01234defg5
DATABRICKS_WSID=123123123123123
# dev
DATABRICKS_HOST_DEV=https://xxxxxxx-dev.cloud.databricks.com/
DATABRICKS_TOKEN_DEV=dapi123456789012345678a9bc01234defg6
DATABRICKS_WSID_DEV=123123123123124
# prod
DATABRICKS_HOST_PROD=https://xxxxxxx-prod.cloud.databricks.com/
DATABRICKS_TOKEN_PROD=dapi123456789012345678a9bc01234defg7
DATABRICKS_WSID_PROD=123123123123125
```
# default
DATABRICKS_HOST=xxxxxxx.cloud.databricks.com
DATABRICKS_TOKEN=dapixxxxxxxxxxxxxxxxxxxxxxxxx
DATABRICKS_WSID=123123123123123
# dev
DATABRICKS_HOST_DEV=xxxxxxx-dev.cloud.databricks.com
DATABRICKS_TOKEN_DEV=dapixxxxxxxxxxxxxxxxxxxxxxxxx
DATABRICKS_WSID_DEV=123123123123124
# prod
DATABRICKS_HOST_PROD=xxxxxxx-prod.cloud.databricks.com
DATABRICKS_TOKEN_PROD=dapixxxxxxxxxxxxxxxxxxxxxxxxx
DATABRICKS_WSID_PROD=123123123123125
```

### Configuring `.databrickscfg`

Expand Down

0 comments on commit bab0033

Please sign in to comment.