diff --git a/conf/benchmarks.lst b/conf/benchmarks.lst
index 3ef7717b5..4d62ba3a4 100644
--- a/conf/benchmarks.lst
+++ b/conf/benchmarks.lst
@@ -24,5 +24,6 @@ ml.linear
ml.lda
ml.svm
ml.gmm
+ml.xgboost
-graph.nweight
\ No newline at end of file
+graph.nweight
diff --git a/conf/workloads/ml/xgboost.conf b/conf/workloads/ml/xgboost.conf
index 7788f6242..9c04e6da6 100644
--- a/conf/workloads/ml/xgboost.conf
+++ b/conf/workloads/ml/xgboost.conf
@@ -17,7 +17,7 @@ hibench.xgboost.features ${hibench.xgboost.${hibench.scale.pr
hibench.xgboost.partitions ${hibench.default.map.parallelism}
hibench.xgboost.numClasses 2
-hibench.xgboost.maxDepth 30
+hibench.xgboost.maxDepth 8
hibench.xgboost.maxBins 32
hibench.xgboost.numIterations 20
hibench.xgboost.learningRate 0.1
diff --git a/pom.xml b/pom.xml
index 6bfe47d04..4e90e1178 100644
--- a/pom.xml
+++ b/pom.xml
@@ -79,6 +79,17 @@
Scala-tools Maven 2 Repository
https://oss.sonatype.org/content/groups/scala-tools/
+
+ xgboostrepo
+ XGBoost Maven Repo
+ https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release
+
+ true
+
+
+ false
+
+
diff --git a/sparkbench/ml/pom.xml b/sparkbench/ml/pom.xml
index 44ae66cc9..32075c625 100644
--- a/sparkbench/ml/pom.xml
+++ b/sparkbench/ml/pom.xml
@@ -56,12 +56,12 @@
ml.dmlc
xgboost4j_${scala.binary.version}
- 1.0.0
+ 1.1.0
ml.dmlc
xgboost4j-spark_${scala.binary.version}
- 1.0.0
+ 1.1.0
diff --git a/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/XGBoost.scala b/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/XGBoost.scala
index 9f68e5abd..7664dab1d 100644
--- a/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/XGBoost.scala
+++ b/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/XGBoost.scala
@@ -18,11 +18,9 @@
package com.intel.hibench.sparkbench.ml
import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.mllib.tree.GradientBoostedTrees
-import org.apache.spark.mllib.tree.configuration.BoostingStrategy
-import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{LabeledPoint => NewLabeledPoint}
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
@@ -33,7 +31,7 @@ object XGBoost {
case class Params(
numClasses: Int = 2,
- maxDepth: Int = 30,
+ maxDepth: Int = 8,
maxBins: Int = 32,
numIterations: Int = 20,
learningRate: Double = 0.1,
@@ -93,7 +91,7 @@ object XGBoost {
val mllibRDD: RDD[LabeledPoint] = sc.objectFile(dataPath)
// Convert to ML LabeledPoint and to DataFrame
val mlRDD: RDD[NewLabeledPoint] = mllibRDD.map { p => NewLabeledPoint(p.label, p.features.asML) }
- val data = mlRDD.toDF
+ val data = mlRDD.toDF("label", "features")
// Split the data into training and test sets (30% held out for testing)
val splits = data.randomSplit(Array(0.7, 0.3))
@@ -123,7 +121,9 @@ object XGBoost {
setFeaturesCol("features").
setLabelCol("label")
- val model = xgbClassifier.fit(trainingData)
+ val pipeline = new Pipeline().setStages(Array(xgbClassifier))
+
+ val model = pipeline.fit(trainingData)
// Make predictions.
val predictions = model.transform(testData)
diff --git a/travis/benchmarks_ml.lst b/travis/benchmarks_ml.lst
index 6e4894a5a..362df64d4 100644
--- a/travis/benchmarks_ml.lst
+++ b/travis/benchmarks_ml.lst
@@ -9,3 +9,4 @@ ml.linear
ml.lda
ml.svm
ml.gmm
+ml.xgboost