From 4f2d60dd387c095b448365931492249208671b57 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 3 Mar 2024 22:09:12 -0800 Subject: [PATCH] [HUDI-7471] Use existing util method to get Spark conf in tests (#10802) --- .../hudi/testutils/HoodieClientTestUtils.java | 2 +- .../hudi/testutils/providers/SparkProvider.java | 2 +- .../datasources/TestHoodieInMemoryFileIndex.scala | 5 ++--- .../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 9 +++++++-- .../org/apache/hudi/TestHoodieSparkUtils.scala | 13 +++---------- .../deltastreamer/TestSourceFormatAdapter.java | 5 ++--- .../sources/helpers/TestSanitizationUtils.java | 6 ++---- .../hudi/utilities/testutils/UtilitiesTestBase.java | 2 +- .../transform/TestSqlQueryBasedTransformer.java | 4 ++-- 9 files changed, 21 insertions(+), 27 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 2413bf2dffd43..57a2793f0f660 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -87,7 +87,7 @@ public class HoodieClientTestUtils { */ public static SparkConf getSparkConfForTest(String appName) { SparkConf sparkConf = new SparkConf().setAppName(appName) - .setMaster("local[4]") + .setMaster("local[8]") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar") .set("spark.sql.shuffle.partitions", "4") diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java index 3a8bb1a300f1d..91045034e5f3e 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/providers/SparkProvider.java @@ -38,7 +38,7 @@ public interface SparkProvider extends org.apache.hudi.testutils.providers.Hoodi default SparkConf conf(Map overwritingConfigs) { SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.app.name", getClass().getName()); - sparkConf.set("spark.master", "local[*]"); + sparkConf.set("spark.master", "local[8]"); sparkConf.set("spark.default.parallelism", "4"); sparkConf.set("spark.sql.shuffle.partitions", "4"); sparkConf.set("spark.driver.maxResultSize", "2g"); diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala index 8e7f6bf14b7e5..c9052a952e687 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala @@ -18,6 +18,7 @@ package org.apache.spark.execution.datasources import org.apache.hadoop.fs.Path +import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest import org.apache.spark.sql.SparkSession import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Test @@ -31,9 +32,7 @@ class TestHoodieInMemoryFileIndex { @Test def testCreateInMemoryIndex(@TempDir tempDir: File): Unit = { val spark = SparkSession.builder - .appName("Hoodie Datasource test") - .master("local[2]") - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config(getSparkConfForTest("Hoodie Datasource test")) .getOrCreate val folders: Seq[Path] = Seq( diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index c57785e5ffea7..d7a1f9331ae1f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -243,10 +243,15 @@ class TestHoodieSparkSqlWriter { @Test def testThrowExceptionInvalidSerializer(): Unit = { spark.stop() - val session = SparkSession.builder().appName("hoodie_test").master("local").getOrCreate() + val session = SparkSession.builder() + // Here we intentionally remove the "spark.serializer" config to test failure + .config(getSparkConfForTest("hoodie_test").remove("spark.serializer")) + .getOrCreate() try { val sqlContext = session.sqlContext - val options = Map("path" -> "hoodie/test/path", HoodieWriteConfig.TBL_NAME.key -> "hoodie_test_tbl") + val options = Map( + "path" -> (tempPath.toUri.toString + "/testThrowExceptionInvalidSerializer/basePath"), + HoodieWriteConfig.TBL_NAME.key -> "hoodie_test_tbl") val e = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.ErrorIfExists, options, session.emptyDataFrame)) assert(e.getMessage.contains("spark.serializer")) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala index 15b6b2b35da76..85c3c619111b6 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala @@ -20,6 +20,7 @@ package org.apache.hudi import org.apache.avro.generic.GenericRecord import org.apache.hudi.testutils.DataSourceTestUtils +import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest import org.apache.spark.sql.types.{ArrayType, StructField, StructType} import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.junit.jupiter.api.Assertions._ @@ -88,11 +89,7 @@ class TestHoodieSparkUtils { @Test def testCreateRddSchemaEvol(): Unit = { val spark = SparkSession.builder - .appName("Hoodie Datasource test") - .master("local[2]") - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") - .config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar") - .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") + .config(getSparkConfForTest("Hoodie Datasource test")) .getOrCreate val schema = DataSourceTestUtils.getStructTypeExampleSchema @@ -126,11 +123,7 @@ class TestHoodieSparkUtils { @Test def testCreateRddWithNestedSchemas(): Unit = { val spark = SparkSession.builder - .appName("Hoodie Datasource test") - .master("local[2]") - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") - .config("spark.kryo.registrator", "org.apache.spark.HoodieSparkKryoRegistrar") - .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") + .config(getSparkConfForTest("Hoodie Datasource test")) .getOrCreate val innerStruct1 = new StructType().add("innerKey","string",false).add("innerValue", "long", true) diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java index 1d6f2f110b2b2..788105c202843 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java @@ -49,6 +49,7 @@ import java.util.stream.Stream; +import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -64,9 +65,7 @@ public class TestSourceFormatAdapter { public static void start() { spark = SparkSession .builder() - .master("local[*]") - .appName(TestSourceFormatAdapter.class.getName()) - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config(getSparkConfForTest(TestSourceFormatAdapter.class.getName())) .getOrCreate(); jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java index 1a660ac713534..39dfa430268e3 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java @@ -22,7 +22,6 @@ import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.utilities.deltastreamer.TestSourceFormatAdapter; import org.apache.hudi.utilities.testutils.SanitizationTestUtils; import org.apache.avro.Schema; @@ -45,6 +44,7 @@ import java.io.InputStream; import java.util.stream.Stream; +import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest; import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateProperFormattedSchema; import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateRenamedSchemaWithConfiguredReplacement; import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateRenamedSchemaWithDefaultReplacement; @@ -61,9 +61,7 @@ public class TestSanitizationUtils { public static void start() { spark = SparkSession .builder() - .master("local[*]") - .appName(TestSourceFormatAdapter.class.getName()) - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config(getSparkConfForTest(TestSanitizationUtils.class.getName())) .getOrCreate(); jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java index f68d88253e2aa..298a76a2aff34 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java @@ -159,7 +159,7 @@ public static void initTestServices(boolean needsHdfs, boolean needsHive, boolea zookeeperTestService.start(); } - jsc = UtilHelpers.buildSparkContext(UtilitiesTestBase.class.getName() + "-hoodie", "local[4]"); + jsc = UtilHelpers.buildSparkContext(UtilitiesTestBase.class.getName() + "-hoodie", "local[8]"); context = new HoodieSparkEngineContext(jsc); sqlContext = new SQLContext(jsc); sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate(); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java index b6fdc25824226..6f05dc1b184fa 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/transform/TestSqlQueryBasedTransformer.java @@ -29,6 +29,7 @@ import java.util.Collections; +import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -39,8 +40,7 @@ public void testSqlQuery() { SparkSession spark = SparkSession .builder() - .master("local[2]") - .appName(TestSqlQueryBasedTransformer.class.getName()) + .config(getSparkConfForTest(TestSqlQueryBasedTransformer.class.getName())) .getOrCreate(); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());