Skip to content

Commit c21b022

Browse files
charliecheng630Charlie Cheng
and
Charlie Cheng
authored
[#2736] feat(spark-connector) support hive external table (#2739)
### What changes were proposed in this pull request? support hive external table format `CREATE EXTERNAL TABLE family (id INT, name STRING)` ### Why are the changes needed? Fix: #2736 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT and IT --------- Co-authored-by: Charlie Cheng <charlie.cheng@cacafly.com>
1 parent f7f12d8 commit c21b022

File tree

5 files changed

+89
-3
lines changed

5 files changed

+89
-3
lines changed

integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java

+26-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.Set;
1818
import java.util.stream.Collectors;
1919
import org.apache.commons.io.FileUtils;
20+
import org.apache.hadoop.fs.FileStatus;
2021
import org.apache.hadoop.fs.Path;
2122
import org.apache.spark.sql.AnalysisException;
2223
import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
@@ -650,6 +651,21 @@ protected void checkDirExists(Path dir) {
650651
}
651652
}
652653

654+
protected void checkDataFileExists(Path dir) {
655+
Boolean isExists = false;
656+
try {
657+
for (FileStatus fileStatus : hdfs.listStatus(dir)) {
658+
if (fileStatus.isFile()) {
659+
isExists = true;
660+
break;
661+
}
662+
}
663+
Assertions.assertTrue(isExists);
664+
} catch (IOException e) {
665+
throw new RuntimeException(e);
666+
}
667+
}
668+
653669
@Test
654670
void testTableOptions() {
655671
String tableName = "options_table";
@@ -726,9 +742,17 @@ protected String getExpectedTableData(SparkTableInfo table) {
726742
}
727743

728744
protected String getCreateSimpleTableString(String tableName) {
745+
return getCreateSimpleTableString(tableName, false);
746+
}
747+
748+
protected String getCreateSimpleTableString(String tableName, boolean isExternal) {
749+
String external = "";
750+
if (isExternal) {
751+
external = "EXTERNAL";
752+
}
729753
return String.format(
730-
"CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)",
731-
tableName);
754+
"CREATE %s TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)",
755+
external, tableName);
732756
}
733757

734758
protected List<SparkColumnInfo> getSimpleTableColumn() {

integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java

+21
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,27 @@ void testHiveFormatWithStoredAs() {
206206
checkParquetFile(tableInfo);
207207
}
208208

209+
@Test
210+
void testHiveFormatWithExternalTable() {
211+
String tableName = "test_hive_format_with_external_table";
212+
dropTableIfExists(tableName);
213+
String createTableSql = getCreateSimpleTableString(tableName, true);
214+
sql(createTableSql);
215+
SparkTableInfo tableInfo = getTableInfo(tableName);
216+
217+
SparkTableInfoChecker checker =
218+
SparkTableInfoChecker.create()
219+
.withName(tableName)
220+
.withTableProperties(
221+
ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true"));
222+
checker.check(tableInfo);
223+
checkTableReadWrite(tableInfo);
224+
225+
dropTableIfExists(tableName);
226+
Path tableLocation = new Path(tableInfo.getTableLocation());
227+
checkDataFileExists(tableLocation);
228+
}
229+
209230
@Test
210231
void testHiveFormatWithUsing() {
211232
String tableName = "test_hive_format_using_table";

spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConstants.java

+5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package com.datastrato.gravitino.spark.connector.hive;
77

8+
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
9+
810
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
911
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.StorageFormat;
1012
import com.google.common.annotations.VisibleForTesting;
@@ -27,11 +29,14 @@ public class HivePropertiesConstants {
2729
public static final String GRAVITINO_HIVE_FORMAT_AVRO = StorageFormat.AVRO.toString();
2830
public static final String GRAVITINO_HIVE_FORMAT_JSON = StorageFormat.JSON.toString();
2931
public static final String GRAVITINO_HIVE_FORMAT_CSV = StorageFormat.CSV.toString();
32+
public static final String GRAVITINO_HIVE_EXTERNAL_TABLE = EXTERNAL_TABLE.name();
33+
public static final String GRAVITINO_HIVE_TABLE_TYPE = "table-type";
3034

3135
public static final String SPARK_HIVE_STORED_AS = "hive.stored-as";
3236
public static final String SPARK_HIVE_INPUT_FORMAT = "input-format";
3337
public static final String SPARK_HIVE_OUTPUT_FORMAT = "output-format";
3438
public static final String SPARK_HIVE_SERDE_LIB = "serde-lib";
39+
public static final String SPARK_HIVE_EXTERNAL = "external";
3540

3641
@VisibleForTesting
3742
public static final String TEXT_INPUT_FORMAT_CLASS =

spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConverter.java

+19-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package com.datastrato.gravitino.spark.connector.hive;
77

8+
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
89
import com.datastrato.gravitino.spark.connector.PropertiesConverter;
910
import com.google.common.annotations.VisibleForTesting;
1011
import com.google.common.collect.ImmutableMap;
@@ -60,6 +61,10 @@ public Map<String, String> toGravitinoTableProperties(Map<String, String> proper
6061
String provider = gravitinoTableProperties.get(TableCatalog.PROP_PROVIDER);
6162
String storeAs = gravitinoTableProperties.get(HivePropertiesConstants.SPARK_HIVE_STORED_AS);
6263
String fileFormat = Optional.ofNullable(storeAs).orElse(provider);
64+
String isExternal =
65+
Optional.ofNullable(gravitinoTableProperties.get(TableCatalog.PROP_EXTERNAL))
66+
.orElse("false");
67+
6368
if (fileFormat != null) {
6469
String gravitinoFormat = fileFormatMap.get(fileFormat.toLowerCase(Locale.ROOT));
6570
if (gravitinoFormat != null) {
@@ -70,6 +75,11 @@ public Map<String, String> toGravitinoTableProperties(Map<String, String> proper
7075
}
7176
}
7277

78+
if (isExternal.equalsIgnoreCase("true")) {
79+
gravitinoTableProperties.put(
80+
HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE,
81+
HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE.name());
82+
}
7383
sparkToGravitinoPropertyMap.forEach(
7484
(sparkProperty, gravitinoProperty) -> {
7585
if (gravitinoTableProperties.containsKey(sparkProperty)) {
@@ -83,7 +93,15 @@ public Map<String, String> toGravitinoTableProperties(Map<String, String> proper
8393

8494
@Override
8595
public Map<String, String> toSparkTableProperties(Map<String, String> properties) {
86-
return toOptionProperties(properties);
96+
Map<String, String> sparkTableProperties = toOptionProperties(properties);
97+
String hiveTableType =
98+
sparkTableProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE);
99+
if (HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE.equalsIgnoreCase(hiveTableType)) {
100+
sparkTableProperties.remove(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE);
101+
sparkTableProperties.put(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true");
102+
}
103+
104+
return sparkTableProperties;
87105
}
88106

89107
@VisibleForTesting

spark-connector/src/test/java/com/datastrato/gravitino/spark/connector/hive/TestHivePropertiesConverter.java

+18
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,24 @@ void testTableFormat() {
7878
ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "a", "b", "b"), hiveProperties);
7979
}
8080

81+
@Test
82+
void testExternalTable() {
83+
Map<String, String> hiveProperties =
84+
hivePropertiesConverter.toGravitinoTableProperties(
85+
ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true"));
86+
Assertions.assertEquals(
87+
hiveProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE),
88+
HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE);
89+
90+
hiveProperties =
91+
hivePropertiesConverter.toSparkTableProperties(
92+
ImmutableMap.of(
93+
HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE,
94+
HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE));
95+
Assertions.assertEquals(
96+
ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true"), hiveProperties);
97+
}
98+
8199
@Test
82100
void testOptionProperties() {
83101
Map<String, String> properties =

0 commit comments

Comments
 (0)