Skip to content

Commit

Permalink
Merge remote-tracking branch 'oss-datahub/master' into vertex_src_temp
Browse files Browse the repository at this point in the history
  • Loading branch information
ryota-cloud committed Mar 3, 2025
2 parents 6c43ecc + 5790b84 commit d381b9e
Show file tree
Hide file tree
Showing 52 changed files with 3,491 additions and 139 deletions.
40 changes: 40 additions & 0 deletions datahub-frontend/app/auth/GuestAuthenticationConfigs.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package auth;

public class GuestAuthenticationConfigs {
public static final String GUEST_ENABLED_CONFIG_PATH = "auth.guest.enabled";
public static final String GUEST_USER_CONFIG_PATH = "auth.guest.user";
public static final String GUEST_PATH_CONFIG_PATH = "auth.guest.path";
public static final String DEFAULT_GUEST_USER_NAME = "guest";
public static final String DEFAULT_GUEST_PATH = "/public";

private Boolean isEnabled = false;
private String guestUser =
DEFAULT_GUEST_USER_NAME; // Default if not defined but guest auth is enabled.
private String guestPath =
DEFAULT_GUEST_PATH; // The path for initial access to login as guest and bypass login page.

public GuestAuthenticationConfigs(final com.typesafe.config.Config configs) {
if (configs.hasPath(GUEST_ENABLED_CONFIG_PATH)
&& configs.getBoolean(GUEST_ENABLED_CONFIG_PATH)) {
isEnabled = true;
}
if (configs.hasPath(GUEST_USER_CONFIG_PATH)) {
guestUser = configs.getString(GUEST_USER_CONFIG_PATH);
}
if (configs.hasPath(GUEST_PATH_CONFIG_PATH)) {
guestPath = configs.getString(GUEST_PATH_CONFIG_PATH);
}
}

public boolean isGuestEnabled() {
return isEnabled;
}

public String getGuestUser() {
return guestUser;
}

public String getGuestPath() {
return guestPath;
}
}
21 changes: 21 additions & 0 deletions datahub-frontend/app/controllers/AuthenticationController.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import auth.AuthUtils;
import auth.CookieConfigs;
import auth.GuestAuthenticationConfigs;
import auth.JAASConfigs;
import auth.NativeAuthenticationConfigs;
import auth.sso.SsoManager;
Expand Down Expand Up @@ -58,6 +59,8 @@ public class AuthenticationController extends Controller {
private final CookieConfigs cookieConfigs;
private final JAASConfigs jaasConfigs;
private final NativeAuthenticationConfigs nativeAuthenticationConfigs;
private final GuestAuthenticationConfigs guestAuthenticationConfigs;

private final boolean verbose;

@Inject private org.pac4j.core.config.Config ssoConfig;
Expand All @@ -73,6 +76,7 @@ public AuthenticationController(@Nonnull Config configs) {
cookieConfigs = new CookieConfigs(configs);
jaasConfigs = new JAASConfigs(configs);
nativeAuthenticationConfigs = new NativeAuthenticationConfigs(configs);
guestAuthenticationConfigs = new GuestAuthenticationConfigs(configs);
verbose = configs.hasPath(AUTH_VERBOSE_LOGGING) && configs.getBoolean(AUTH_VERBOSE_LOGGING);
}

Expand Down Expand Up @@ -110,6 +114,23 @@ public Result authenticate(Http.Request request) {
return Results.redirect(redirectPath);
}

if (guestAuthenticationConfigs.isGuestEnabled()
&& guestAuthenticationConfigs.getGuestPath().equals(redirectPath)) {
final String accessToken =
authClient.generateSessionTokenForUser(guestAuthenticationConfigs.getGuestUser());
redirectPath =
"/"; // We requested guest login by accessing {guestPath} URL. It is not really a target.
CorpuserUrn guestUserUrn = new CorpuserUrn(guestAuthenticationConfigs.getGuestUser());
return Results.redirect(redirectPath)
.withSession(createSessionMap(guestUserUrn.toString(), accessToken))
.withCookies(
createActorCookie(
guestUserUrn.toString(),
cookieConfigs.getTtlInHours(),
cookieConfigs.getAuthCookieSameSite(),
cookieConfigs.getAuthCookieSecure()));
}

// 1. If SSO is enabled, redirect to IdP if not authenticated.
if (ssoManager.isSsoEnabled()) {
return redirectToIdentityProvider(request, redirectPath)
Expand Down
5 changes: 5 additions & 0 deletions datahub-frontend/conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ auth.oidc.grantType = ${?AUTH_OIDC_GRANT_TYPE}
#
auth.jaas.enabled = ${?AUTH_JAAS_ENABLED}
auth.native.enabled = ${?AUTH_NATIVE_ENABLED}
auth.guest.enabled = ${?GUEST_AUTHENTICATION_ENABLED}
# The name of the guest user id
auth.guest.user = ${?GUEST_AUTHENTICATION_USER}
# The path to bypass login page and get logged in as guest
auth.guest.path = ${?GUEST_AUTHENTICATION_PATH}

# Enforces the usage of a valid email for user sign up
auth.native.signUp.enforceValidEmail = true
Expand Down
61 changes: 61 additions & 0 deletions datahub-frontend/test/security/GuestAuthenticationConfigsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package security;

import static org.junit.jupiter.api.Assertions.*;

import auth.GuestAuthenticationConfigs;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junitpioneer.jupiter.ClearEnvironmentVariable;
import org.junitpioneer.jupiter.SetEnvironmentVariable;

@TestInstance(TestInstance.Lifecycle.PER_METHOD)
@SetEnvironmentVariable(key = "DATAHUB_SECRET", value = "test")
@SetEnvironmentVariable(key = "KAFKA_BOOTSTRAP_SERVER", value = "")
@SetEnvironmentVariable(key = "DATAHUB_ANALYTICS_ENABLED", value = "false")
@SetEnvironmentVariable(key = "AUTH_OIDC_ENABLED", value = "true")
@SetEnvironmentVariable(key = "AUTH_OIDC_JIT_PROVISIONING_ENABLED", value = "false")
@SetEnvironmentVariable(key = "AUTH_OIDC_CLIENT_ID", value = "testclient")
@SetEnvironmentVariable(key = "AUTH_OIDC_CLIENT_SECRET", value = "testsecret")
@SetEnvironmentVariable(key = "AUTH_VERBOSE_LOGGING", value = "true")
class GuestAuthenticationConfigsTest {

@BeforeEach
@ClearEnvironmentVariable(key = "GUEST_AUTHENTICATION_ENABLED")
@ClearEnvironmentVariable(key = "GUEST_AUTHENTICATION_USER")
@ClearEnvironmentVariable(key = "GUEST_AUTHENTICATION_PATH")
public void clearConfigCache() {
ConfigFactory.invalidateCaches();
}

@Test
public void testGuestConfigDisabled() {
Config config = ConfigFactory.load();
GuestAuthenticationConfigs guestAuthConfig = new GuestAuthenticationConfigs(config);
assertFalse(guestAuthConfig.isGuestEnabled());
}

@Test
@SetEnvironmentVariable(key = "GUEST_AUTHENTICATION_ENABLED", value = "true")
public void testGuestConfigEnabled() {
Config config = ConfigFactory.load();
GuestAuthenticationConfigs guestAuthConfig = new GuestAuthenticationConfigs(config);
assertTrue(guestAuthConfig.isGuestEnabled());
assertEquals("guest", guestAuthConfig.getGuestUser());
assertEquals("/public", guestAuthConfig.getGuestPath());
}

@Test
@SetEnvironmentVariable(key = "GUEST_AUTHENTICATION_ENABLED", value = "true")
@SetEnvironmentVariable(key = "GUEST_AUTHENTICATION_USER", value = "publicUser")
@SetEnvironmentVariable(key = "GUEST_AUTHENTICATION_PATH", value = "/publicPath")
public void testGuestConfigWithUserEnabled() {
Config config = ConfigFactory.load();
GuestAuthenticationConfigs guestAuthConfig = new GuestAuthenticationConfigs(config);
assertTrue(guestAuthConfig.isGuestEnabled());
assertEquals("publicUser", guestAuthConfig.getGuestUser());
assertEquals("/publicPath", guestAuthConfig.getGuestPath());
}
}
2 changes: 2 additions & 0 deletions docs/api/tutorials/owners.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@ Save this `user.yaml` as a local file.
email: bar@acryl.io
slack: "@the_bar_raiser"
description: "I like raising the bar higher"
title: "Analytics Engineer"
groups:
- foogroup@acryl.io
- id: datahub
slack: "@datahubproject"
phone: "1-800-GOT-META"
description: "The DataHub Project"
title: "Data Engineer"
picture_link: "https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/datahub-logo-color-stable.svg"
```
Expand Down
9 changes: 8 additions & 1 deletion docs/authentication/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ DataHub provides 3 mechanisms for authentication at login time:
In subsequent requests, the session token is used to represent the authenticated identity of the user, and is validated by DataHub's backend service (discussed below).
Eventually, the session token is expired (24 hours by default), at which point the end user is required to log in again.

DataHub also supports Guest users to access the system without requiring an explicit login when enabled. The default configuration disables guest authentication.
When Guest access is enabled, accessing datahub with a configurable URL path logs the user in an existing user that is designated as the guest. The privileges of the guest user
are controlled by adjusting privileges of that designated guest user.

### Authentication in the Backend (Metadata Service)

When a user makes a request for Data within DataHub, the request is authenticated by DataHub's Backend (Metadata Service) via a JSON Web Token. This applies to both requests originating from the DataHub application,
Expand All @@ -40,12 +44,15 @@ more about Personal Access Tokens [here](personal-access-tokens.md).

To learn more about DataHub's backend authentication, check out [Introducing Metadata Service Authentication](introducing-metadata-service-authentication.md).

Credentials must be provided as Bearer Tokens inside of the **Authorization** header in any request made to DataHub's API layer. To learn
Credentials must be provided as Bearer Tokens inside of the **Authorization** header in any request made to DataHub's API layer.

```shell
Authorization: Bearer <your-token>
```

As with the frontend, the backend also can optionally enable Guest authentication. If Guest authentication is enabled, all API calls made to the backend
without an Authorization header are treated as guest users and the privileges associated with the designated guest user apply to those requests.

Note that in DataHub local quickstarts, Authentication at the backend layer is disabled for convenience. This leaves the backend
vulnerable to unauthenticated requests and should not be used in production. To enable
backend (token-based) authentication, simply set the `METADATA_SERVICE_AUTH_ENABLED=true` environment variable
Expand Down
51 changes: 49 additions & 2 deletions docs/authentication/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,17 @@ There can be many types of Authenticator. For example, there can be Authenticato
and more! A key goal of the abstraction is *extensibility*: a custom Authenticator can be developed to authenticate requests
based on an organization's unique needs.

DataHub ships with 2 Authenticators by default:
DataHub ships with 3 Authenticators by default:

- **DataHubSystemAuthenticator**: Verifies that inbound requests have originated from inside DataHub itself using a shared system identifier
and secret. This authenticator is always present.

- **DataHubTokenAuthenticator**: Verifies that inbound requests contain a DataHub-issued Access Token (discussed further in the "DataHub Access Token" section below) in their
'Authorization' header. This authenticator is required if Metadata Service Authentication is enabled.

- **DataHubGuestAuthenticator**: Verifies if guest authentication is enabled with a guest user configured and allows unauthenticated users to perform operations as the designated
guest user. By default, this Authenticator is disabled. If this is required, it needs to be explicitly enabled and requires a restart of the datahub GMS service.
-
## What is an AuthenticatorChain?

An **AuthenticatorChain** is a series of **Authenticators** that are configured to run one-after-another. This allows
Expand Down Expand Up @@ -124,4 +127,48 @@ Today, Access Tokens are granted by the Token Service under two scenarios:
> At present, the Token Service supports the symmetric signing method `HS256` to generate and verify tokens.
Now that we're familiar with the concepts, we will talk concretely about what new capabilities have been built on top
of Metadata Service Authentication.
of Metadata Service Authentication.

## How do I enable Guest Authentication

The Guest Authentication configuration is present in two configuration files - the `application.conf` for DataHub frontend, and
`application.yaml` for GMS. To enable Guest Authentication, set the environment variable `GUEST_AUTHENTICATION_ENABLED` to `true`
for both the GMS and the frontend service and restart those services.
If enabled, the default user designated as guest is called `guest`. This user must be explicitly created and privileges assigned
to control the guest user privileges.

A recommended approach to operationalize guest access is, first, create a designated guest user account with login credentials,
but keep guest access disabled. This allows you to configure and test the exact permissions this user should have. Once you've
confirmed the privileges are set correctly, you can then enable guest access, which removes the need for login/credentials
while maintaining the verified permission settings.

The name of the designated guest user can be changed by defining the env var `GUEST_AUTHENTICATION_USER`.
The entry URL to authenticate as the guest user is `/public` and can be changed via the env var `GUEST_AUTHENTICATION_PATH`

Here are the relevant portions of the two configs

For the Frontend
```yaml
#application.conf
...
auth.guest.enabled = ${?GUEST_AUTHENTICATION_ENABLED}
# The name of the guest user id
auth.guest.user = ${?GUEST_AUTHENTICATION_USER}
# The path to bypass login page and get logged in as guest
auth.guest.path = ${?GUEST_AUTHENTICATION_PATH}
...
```

and for GMS
```yaml
#application.yaml
# Required if enabled is true! A configurable chain of Authenticators
...
authenticators:
...
- type: com.datahub.authentication.authenticator.DataHubGuestAuthenticator
configs:
guestUser: ${GUEST_AUTHENTICATION_USER:guest}
enabled: ${GUEST_AUTHENTICATION_ENABLED:false}
...
```
3 changes: 3 additions & 0 deletions li-utils/src/main/java/com/linkedin/metadata/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,9 @@ public class Constants {
"dataHubStepStateProperties";

// Authorization
// Do not use this env var directly to determine if REST API Auth is to be enabled. Instead, use
// the spring property "authorization.restApiAuthorization" from application.yaml for
// consistency. The spring property can be initialized by this env var (among other methods).
public static final String REST_API_AUTHORIZATION_ENABLED_ENV = "REST_API_AUTHORIZATION_ENABLED";

// Metadata Change Event Parameter Names
Expand Down
3 changes: 2 additions & 1 deletion metadata-auth/auth-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ dependencies() {

implementation externalDependency.guava
compileOnly externalDependency.lombok
compileOnly externalDependency.springContext

annotationProcessor externalDependency.lombok

testImplementation externalDependency.testng
testImplementation externalDependency.mockito
testImplementation project(path: ':metadata-operation-context')
testImplementation 'uk.org.webcompere:system-stubs-testng:2.1.6'

testImplementation externalDependency.springBootTest
}

task sourcesJar(type: Jar) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import static com.linkedin.metadata.Constants.ML_MODEL_GROUP_ENTITY_NAME;
import static com.linkedin.metadata.Constants.ML_PRIMARY_KEY_ENTITY_NAME;
import static com.linkedin.metadata.Constants.NOTEBOOK_ENTITY_NAME;
import static com.linkedin.metadata.Constants.REST_API_AUTHORIZATION_ENABLED_ENV;
import static com.linkedin.metadata.authorization.ApiGroup.ENTITY;
import static com.linkedin.metadata.authorization.ApiOperation.CREATE;
import static com.linkedin.metadata.authorization.ApiOperation.DELETE;
Expand Down Expand Up @@ -53,7 +52,10 @@
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.PostConstruct;
import org.apache.http.HttpStatus;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

/**
* Notes: This class is an attempt to unify privilege checks across APIs.
Expand All @@ -67,8 +69,25 @@
* <p>isAPI...() functions are intended for OpenAPI and Rest.li since they are governed by an enable
* flag. GraphQL is always enabled and should use is...() functions.
*/
@Component
public class AuthUtil {

// Since all methods of this class are static, need to postConstruct to initialize the static var
// from the instance var that spring can initialize
// TODO: Some unit tests seem to rely on this being false, so setting the default to false.
// When running as the spring boot application, the default property value is true.
private static boolean isRestApiAuthorizationEnabled = false;

// Eliminating the dependency on the env var REST_API_AUTHORIZATION_ENABLED and instead using the
// application property to keep it consistent with all other usage of that property.
@Value("${authorization.restApiAuthorization:true}")
protected Boolean restApiAuthorizationEnabled;

@PostConstruct
protected void init() {
AuthUtil.isRestApiAuthorizationEnabled = this.restApiAuthorizationEnabled;
}

/**
* This should generally follow the policy creation UI with a few exceptions for users, groups,
* containers, etc so that the platform still functions as expected.
Expand Down Expand Up @@ -340,7 +359,7 @@ private static boolean isAPIAuthorized(
@Nonnull final AuthorizationSession session,
@Nonnull final Disjunctive<Conjunctive<PoliciesConfig.Privilege>> privileges,
@Nonnull final Collection<EntitySpec> resources) {
if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))) {
if (AuthUtil.isRestApiAuthorizationEnabled) {
return isAuthorized(session, buildDisjunctivePrivilegeGroup(privileges), resources);
} else {
return true;
Expand Down Expand Up @@ -583,5 +602,5 @@ private static boolean isDenied(
return AuthorizationResult.Type.DENY.equals(result.getType());
}

private AuthUtil() {}
protected AuthUtil() {}
}
Loading

0 comments on commit d381b9e

Please sign in to comment.