diff --git a/src/main/scala/com/amazon/deequ/suggestions/rules/RetainCompletenessRule.scala b/src/main/scala/com/amazon/deequ/suggestions/rules/RetainCompletenessRule.scala index 7ac015fb..c7a079ca 100644 --- a/src/main/scala/com/amazon/deequ/suggestions/rules/RetainCompletenessRule.scala +++ b/src/main/scala/com/amazon/deequ/suggestions/rules/RetainCompletenessRule.scala @@ -21,7 +21,7 @@ import com.amazon.deequ.profiles.ColumnProfile import com.amazon.deequ.suggestions.CommonConstraintSuggestion import com.amazon.deequ.suggestions.ConstraintSuggestion import com.amazon.deequ.suggestions.rules.RetainCompletenessRule._ -import com.amazon.deequ.suggestions.rules.interval.{ConfidenceIntervalStrategy, WilsonScoreIntervalStrategy} +import com.amazon.deequ.suggestions.rules.interval.{ConfidenceIntervalStrategy, WaldIntervalStrategy, WilsonScoreIntervalStrategy} /** * If a column is incomplete in the sample, we model its completeness as a binomial variable, @@ -71,5 +71,5 @@ case class RetainCompletenessRule( object RetainCompletenessRule { private val defaultMinCompleteness: Double = 0.2 private val defaultMaxCompleteness: Double = 1.0 - private val defaultIntervalStrategy: ConfidenceIntervalStrategy = WilsonScoreIntervalStrategy() + private val defaultIntervalStrategy: ConfidenceIntervalStrategy = WaldIntervalStrategy() } diff --git a/src/test/scala/com/amazon/deequ/suggestions/rules/interval/IntervalStrategyTest.scala b/src/test/scala/com/amazon/deequ/suggestions/rules/interval/IntervalStrategyTest.scala index 7759477e..54e6cd1e 100644 --- a/src/test/scala/com/amazon/deequ/suggestions/rules/interval/IntervalStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/suggestions/rules/interval/IntervalStrategyTest.scala @@ -26,10 +26,12 @@ import org.scalatest.wordspec.AnyWordSpec class IntervalStrategyTest extends AnyWordSpec with FixtureSupport with SparkContextSpec with MockFactory { + "ConfidenceIntervalStrategy" should { "be calculated correctly" in { val waldStrategy = WaldIntervalStrategy() val wilsonStrategy = WilsonScoreIntervalStrategy() + val table = Table( ("strategy", "pHat", "numRecord", "lowerBound", "upperBound"), (waldStrategy, 1.0, 20L, 1.0, 1.0), @@ -38,14 +40,16 @@ class IntervalStrategyTest extends AnyWordSpec with FixtureSupport with SparkCon (waldStrategy, 0.6, 100L, 0.5, 0.7), (waldStrategy, 0.9, 100L, 0.84, 0.96), (waldStrategy, 1.0, 100L, 1.0, 1.0), + (wilsonStrategy, 0.01, 20L, 0.00, 0.18), (wilsonStrategy, 1.0, 20L, 0.83, 1.0), (wilsonStrategy, 0.5, 100L, 0.4, 0.6), (wilsonStrategy, 0.4, 100L, 0.3, 0.5), (wilsonStrategy, 0.6, 100L, 0.5, 0.7), (wilsonStrategy, 0.9, 100L, 0.82, 0.95), - (wilsonStrategy, 1.0, 100L, 0.96, 1.0), + (wilsonStrategy, 1.0, 100L, 0.96, 1.0) ) + forAll(table) { case (strategy, pHat, numRecords, lowerBound, upperBound) => val actualInterval = strategy.calculateTargetConfidenceInterval(pHat, numRecords) assert(actualInterval == ConfidenceInterval(lowerBound, upperBound))