Skip to content

Commit 79c362c

Browse files
yuqi1129jerryshao
andauthored
[#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitino server and GVFS Java client (#5508)
### What changes were proposed in this pull request? Add support for Support Azure blob storage for Gravitino server and GVFS Java client ### Why are the changes needed? It's a big improvement for fileset usage. Fix: #5492 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? ITs --------- Co-authored-by: Jerry Shao <jerryshao@datastrato.com>
1 parent 3907b04 commit 79c362c

File tree

14 files changed

+585
-4
lines changed

14 files changed

+585
-4
lines changed

LICENSE.bin

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@
285285
Apache Hadoop Aliyun connector
286286
Apache Hadoop GCS connector
287287
Apache Hadoop AWS connector
288+
Apache Hadoop Azure connector
288289
Apache Hadoop Annotatations
289290
Apache Hadoop Auth
290291
Apache Hadoop Client Aggregator

build.gradle.kts

+2-2
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ tasks {
774774
!it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" &&
775775
it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink") &&
776776
it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") &&
777-
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle"
777+
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle"
778778
) {
779779
from(it.configurations.runtimeClasspath)
780780
into("distribution/package/libs")
@@ -796,7 +796,7 @@ tasks {
796796
!it.name.startsWith("trino-connector") &&
797797
it.name != "bundled-catalog" &&
798798
it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
799-
it.name != "aliyun-bundle" && it.name != "aws-bundle"
799+
it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle"
800800
) {
801801
dependsOn("${it.name}:build")
802802
from("${it.name}/build/libs")

bundles/azure-bundle/build.gradle.kts

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
20+
21+
plugins {
22+
`maven-publish`
23+
id("java")
24+
alias(libs.plugins.shadow)
25+
}
26+
27+
dependencies {
28+
compileOnly(project(":api"))
29+
compileOnly(project(":core"))
30+
compileOnly(project(":catalogs:catalog-hadoop"))
31+
32+
compileOnly(libs.hadoop3.common)
33+
34+
implementation(libs.commons.lang3)
35+
// runtime used
36+
implementation(libs.commons.logging)
37+
implementation(libs.hadoop3.abs)
38+
implementation(project(":catalogs:catalog-common")) {
39+
exclude("*")
40+
}
41+
}
42+
43+
tasks.withType(ShadowJar::class.java) {
44+
isZip64 = true
45+
configurations = listOf(project.configurations.runtimeClasspath.get())
46+
archiveClassifier.set("")
47+
48+
// Relocate dependencies to avoid conflicts
49+
relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
50+
relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons")
51+
relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
52+
relocate("com.google.guava", "org.apache.gravitino.azure.shaded.com.google.guava")
53+
}
54+
55+
tasks.jar {
56+
dependsOn(tasks.named("shadowJar"))
57+
archiveClassifier.set("empty")
58+
}
59+
60+
tasks.compileJava {
61+
dependsOn(":catalogs:catalog-hadoop:runtimeJars")
62+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.gravitino.abs.fs;
21+
22+
import com.google.common.annotations.VisibleForTesting;
23+
import com.google.common.collect.ImmutableMap;
24+
import java.io.IOException;
25+
import java.util.Map;
26+
import javax.annotation.Nonnull;
27+
import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
28+
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
29+
import org.apache.gravitino.storage.ABSProperties;
30+
import org.apache.hadoop.conf.Configuration;
31+
import org.apache.hadoop.fs.FileSystem;
32+
import org.apache.hadoop.fs.Path;
33+
34+
public class AzureFileSystemProvider implements FileSystemProvider {
35+
36+
@VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss";
37+
38+
@VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs";
39+
40+
private static final String ABFS_IMPL = "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem";
41+
42+
private static final String ABFS_IMPL_KEY = "fs.abfss.impl";
43+
44+
@Override
45+
public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, String> config)
46+
throws IOException {
47+
Configuration configuration = new Configuration();
48+
49+
Map<String, String> hadoopConfMap =
50+
FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of());
51+
52+
if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)
53+
&& config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) {
54+
hadoopConfMap.put(
55+
String.format(
56+
"fs.azure.account.key.%s.dfs.core.windows.net",
57+
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
58+
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
59+
}
60+
61+
if (!config.containsKey(ABFS_IMPL_KEY)) {
62+
configuration.set(ABFS_IMPL_KEY, ABFS_IMPL);
63+
}
64+
65+
hadoopConfMap.forEach(configuration::set);
66+
67+
return FileSystem.get(path.toUri(), configuration);
68+
}
69+
70+
@Override
71+
public String scheme() {
72+
return ABS_PROVIDER_SCHEME;
73+
}
74+
75+
@Override
76+
public String name() {
77+
return ABS_PROVIDER_NAME;
78+
}
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
org.apache.gravitino.abs.fs.AzureFileSystemProvider
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.gravitino.storage;
21+
22+
public class ABSProperties {
23+
24+
// The account name of the Azure Blob Storage.
25+
public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name";
26+
27+
// The account key of the Azure Blob Storage.
28+
public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key";
29+
}

catalogs/catalog-hadoop/build.gradle.kts

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ dependencies {
8080
testImplementation(project(":bundles:aws-bundle"))
8181
testImplementation(project(":bundles:gcp-bundle"))
8282
testImplementation(project(":bundles:aliyun-bundle"))
83+
testImplementation(project(":bundles:azure-bundle"))
8384

8485
testImplementation(libs.minikdc)
8586
testImplementation(libs.hadoop3.minicluster)

0 commit comments

Comments
 (0)