-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathChapter8_Splink_Settings.json
49 lines (49 loc) · 1.98 KB
/
Chapter8_Splink_Settings.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{
"link_type": "link_only",
"blocking_rules_to_generate_predictions": [
"l.Postcode = r.Postcode",
"l.CompanyName = r.CompanyName"
],
"comparisons": [
{
"output_column_name": "CompanyName",
"comparison_levels": [
{
"sql_condition": "`CompanyName_l` IS NULL OR `CompanyName_r` IS NULL",
"label_for_charts": "Null",
"is_null_level": true
},
{
"sql_condition": "`CompanyName_l` = `CompanyName_r`",
"label_for_charts": "Exact match",
"m_probability": 0.40195033252096213,
"u_probability": 1.1259854624767596e-07
},
{
"sql_condition": "jaro_winkler(`CompanyName_l`, `CompanyName_r`) >= 0.9",
"label_for_charts": "Jaro_winkler >= 0.9",
"m_probability": 0.19262495295577506,
"u_probability": 3.1715257193095396e-06
},
{
"sql_condition": "jaro_winkler(`CompanyName_l`, `CompanyName_r`) >= 0.8",
"label_for_charts": "Jaro_winkler >= 0.8",
"m_probability": 0.40324986252162015,
"u_probability": 0.00024773556816926176
},
{
"sql_condition": "ELSE",
"label_for_charts": "All other comparisons",
"m_probability": 0.0021748520016426133,
"u_probability": 0.9997489803075652
}
],
"comparison_description": "Exact match vs. Companyname within jaro_winkler thresholds 0.9, 0.8 vs. anything else"
}
],
"retain_intermediate_calculation_columns": true,
"retain_matching_columns": true,
"sql_dialect": "spark",
"linker_uid": "tgZUJPpg",
"probability_two_random_records_match": 0.0001
}