From c3ed78122d499479292c4065035ffb0d54688fe8 Mon Sep 17 00:00:00 2001 From: Hongfei Li Date: Wed, 15 May 2024 14:26:51 +0000 Subject: [PATCH] change gpu side --- .../spark/GpuXGBoostRegressorSuite.scala | 20 +++++++++++++++++++ .../scala/spark/XGBoostRegressorSuite.scala | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala index 6c58ae9fcd63..df9586138b5e 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala @@ -23,8 +23,10 @@ import ml.dmlc.xgboost4j.scala.spark.{XGBoostRegressionModel, XGBoostRegressor} import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{FloatType, IntegerType, StructField, StructType} +import org.apache.commons.logging.LogFactory class GpuXGBoostRegressorSuite extends GpuTestSuite { + val logger = LogFactory.getLog("XGBoostSpark") val labelName = "label_col" val groupName = "group_col" @@ -255,4 +257,22 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite { assert(testDf.count() === ret.length) } } + + test("Ranking: test position bias") { + withGpuSparkSession(enableCsvConf()) { spark => + logger.info("hongfeili-scala: begin execute test") + val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "rank:ndcg", + "num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist", + "features_cols" -> featureNames, "label_col" -> labelName, "lambdarank_unbiased" -> true, "eval_metric" -> "ndcg") + val Array(trainingDf, testDf) = spark.read.option("header", "true").schema(schema) + .csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1) + + val model = new XGBoostRegressor(xgbParam) + .setGroupCol(groupName) + .fit(trainingDf) + + val ret = model.transform(testDf).collect() + assert(testDf.count() === ret.length) + } + } } diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala index 3e241f5001be..3b865471d000 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala @@ -26,9 +26,12 @@ import org.apache.spark.sql.{DataFrame, Row} import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.ml.feature.VectorAssembler +import org.apache.commons.logging.LogFactory + class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite { protected val treeMethod: String = "auto" + val logger = LogFactory.getLog("XGBoostSpark") test("XGBoost-Spark XGBoostRegressor output should match XGBoost4j") { val trainingDM = new DMatrix(Regression.train.iterator) @@ -133,6 +136,7 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu } test("ranking: test position bias") { + logger.info("hongfeili-scala: begin execute test") val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "0", "verbosity" -> "3", "objective" -> "rank:ndcg", "num_workers" -> numWorkers, "num_round" -> 5, "group_col" -> "group", "tree_method" -> treeMethod, "lambdarank_unbiased" -> true, "eval_metric" -> "ndcg") @@ -142,7 +146,6 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu val model = new XGBoostRegressor(paramMap).fit(trainingDF) val prediction = model.transform(testDF).collect() - println("hello---------hongfei") assert(testDF.count() === prediction.length) }