Skip to content

Commit

Permalink
Make WaldInterval the default strategy for now
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed May 21, 2024
1 parent 71d6e3f commit 387ab81
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.amazon.deequ.profiles.ColumnProfile
import com.amazon.deequ.suggestions.CommonConstraintSuggestion
import com.amazon.deequ.suggestions.ConstraintSuggestion
import com.amazon.deequ.suggestions.rules.RetainCompletenessRule._
import com.amazon.deequ.suggestions.rules.interval.{ConfidenceIntervalStrategy, WilsonScoreIntervalStrategy}
import com.amazon.deequ.suggestions.rules.interval.{ConfidenceIntervalStrategy, WaldIntervalStrategy, WilsonScoreIntervalStrategy}

/**
* If a column is incomplete in the sample, we model its completeness as a binomial variable,
Expand Down Expand Up @@ -71,5 +71,5 @@ case class RetainCompletenessRule(
object RetainCompletenessRule {
private val defaultMinCompleteness: Double = 0.2
private val defaultMaxCompleteness: Double = 1.0
private val defaultIntervalStrategy: ConfidenceIntervalStrategy = WilsonScoreIntervalStrategy()
private val defaultIntervalStrategy: ConfidenceIntervalStrategy = WaldIntervalStrategy()
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ import org.scalatest.wordspec.AnyWordSpec

class IntervalStrategyTest extends AnyWordSpec with FixtureSupport with SparkContextSpec
with MockFactory {

"ConfidenceIntervalStrategy" should {
"be calculated correctly" in {
val waldStrategy = WaldIntervalStrategy()
val wilsonStrategy = WilsonScoreIntervalStrategy()

val table = Table(
("strategy", "pHat", "numRecord", "lowerBound", "upperBound"),
(waldStrategy, 1.0, 20L, 1.0, 1.0),
Expand All @@ -38,14 +40,16 @@ class IntervalStrategyTest extends AnyWordSpec with FixtureSupport with SparkCon
(waldStrategy, 0.6, 100L, 0.5, 0.7),
(waldStrategy, 0.9, 100L, 0.84, 0.96),
(waldStrategy, 1.0, 100L, 1.0, 1.0),

(wilsonStrategy, 0.01, 20L, 0.00, 0.18),
(wilsonStrategy, 1.0, 20L, 0.83, 1.0),
(wilsonStrategy, 0.5, 100L, 0.4, 0.6),
(wilsonStrategy, 0.4, 100L, 0.3, 0.5),
(wilsonStrategy, 0.6, 100L, 0.5, 0.7),
(wilsonStrategy, 0.9, 100L, 0.82, 0.95),
(wilsonStrategy, 1.0, 100L, 0.96, 1.0),
(wilsonStrategy, 1.0, 100L, 0.96, 1.0)
)

forAll(table) { case (strategy, pHat, numRecords, lowerBound, upperBound) =>
val actualInterval = strategy.calculateTargetConfidenceInterval(pHat, numRecords)
assert(actualInterval == ConfidenceInterval(lowerBound, upperBound))
Expand Down

0 comments on commit 387ab81

Please sign in to comment.