Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor hints sqlresult processing to remove duplicate code #1171

Merged
merged 1 commit into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
import bio.terra.tanagra.api.query.hint.HintInstance;
import bio.terra.tanagra.api.query.hint.HintQueryRequest;
import bio.terra.tanagra.api.query.hint.HintQueryResult;
import bio.terra.tanagra.api.shared.DataType;
import bio.terra.tanagra.api.shared.Literal;
import bio.terra.tanagra.api.shared.OrderByDirection;
import bio.terra.tanagra.api.shared.ValueDisplay;
import bio.terra.tanagra.app.configuration.ExportConfiguration;
import bio.terra.tanagra.app.configuration.UnderlayConfiguration;
import bio.terra.tanagra.indexing.job.bigquery.WriteEntityLevelDisplayHints;
Expand All @@ -34,6 +31,7 @@
import bio.terra.tanagra.service.accesscontrol.ResourceId;
import bio.terra.tanagra.underlay.ConfigReader;
import bio.terra.tanagra.underlay.Underlay;
import bio.terra.tanagra.underlay.entitymodel.Attribute;
import bio.terra.tanagra.underlay.entitymodel.Entity;
import bio.terra.tanagra.underlay.serialization.SZService;
import bio.terra.tanagra.underlay.serialization.SZUnderlay;
Expand All @@ -44,6 +42,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.slf4j.Logger;
Expand Down Expand Up @@ -216,13 +215,12 @@ public HintQueryResult getEntityLevelHints(
String bqFilterSql = bqTranslator.translator(entityFilter).buildSql(sqlParams, null);

// For each attribute with a hint, calculate new hints with the filter
StringBuilder allSql = new StringBuilder();
List<HintInstance> allHintInstances = new ArrayList<>();
entityLevelHints.getHintInstances().stream()
.map(HintInstance::getAttribute)
.parallel()
StringBuffer allSql = new StringBuffer();
List<HintInstance> allHintInstances = Collections.synchronizedList(new ArrayList<>());
entityLevelHints.getHintInstances().parallelStream()
.map(
attribute -> {
hintInstance -> {
Attribute attribute = hintInstance.getAttribute();
if (isRangeHint(attribute)) {
String sql =
WriteEntityLevelDisplayHints.buildRangeHintSql(
Expand All @@ -233,22 +231,10 @@ public HintQueryResult getEntityLevelHints(
underlay
.getQueryRunner()
.run(new SqlQueryRequest(sql, sqlParams, null, null, false));

List<HintInstance> attrHintInstances = new ArrayList<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult -> {
Double min =
sqlRowResult.get(MIN_VAL_ALIAS, DataType.DOUBLE).getDoubleVal();
Double max =
sqlRowResult.get(MAX_VAL_ALIAS, DataType.DOUBLE).getDoubleVal();
if (min != null && max != null) {
attrHintInstances.add(new HintInstance(attribute, min, max));
}
});
return new HintQueryResult(sql, attrHintInstances);
return new HintQueryResult(
sql,
HintInstance.rangeInstances(
sqlQueryResult, attribute, MIN_VAL_ALIAS, MAX_VAL_ALIAS));

} else if (isEnumHintForValueDisplay(attribute)) {
String sql =
Expand All @@ -260,22 +246,14 @@ public HintQueryResult getEntityLevelHints(
underlay
.getQueryRunner()
.run(new SqlQueryRequest(sql, sqlParams, null, null, false));

Map<ValueDisplay, Long> attrEnumValues = new HashMap<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult -> {
Literal enumVal = sqlRowResult.get(ENUM_VAL_ALIAS, DataType.INT64);
String enumDisplay =
sqlRowResult.get(ENUM_DISP_ALIAS, DataType.STRING).getStringVal();
Long enumCount =
sqlRowResult.get(ENUM_COUNT_ALIAS, DataType.INT64).getInt64Val();
attrEnumValues.put(new ValueDisplay(enumVal, enumDisplay), enumCount);
});
return new HintQueryResult(
sql, List.of(new HintInstance(attribute, attrEnumValues)));
sql,
HintInstance.valueDisplayInstance(
sqlQueryResult,
attribute,
ENUM_VAL_ALIAS,
ENUM_DISP_ALIAS,
ENUM_COUNT_ALIAS));

} else if (isEnumHintForRepeatedStringValue(attribute)) {
String sql =
Expand All @@ -287,20 +265,10 @@ public HintQueryResult getEntityLevelHints(
underlay
.getQueryRunner()
.run(new SqlQueryRequest(sql, sqlParams, null, null, false));

Map<ValueDisplay, Long> attrEnumValues = new HashMap<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult -> {
Literal enumVal = sqlRowResult.get(ENUM_VAL_ALIAS, DataType.STRING);
Long enumCount =
sqlRowResult.get(ENUM_COUNT_ALIAS, DataType.INT64).getInt64Val();
attrEnumValues.put(new ValueDisplay(enumVal), enumCount);
});
return new HintQueryResult(
sql, List.of(new HintInstance(attribute, attrEnumValues)));
sql,
HintInstance.repeatedStringInstance(
sqlQueryResult, attribute, ENUM_VAL_ALIAS, ENUM_COUNT_ALIAS));

} else {
LOGGER.info(
Expand All @@ -310,7 +278,7 @@ public HintQueryResult getEntityLevelHints(
return null;
}
})
.toList()
.filter(Objects::nonNull)
.forEach(
hqr -> {
allSql.append(hqr.getSql()).append(';');
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
package bio.terra.tanagra.api.query.hint;

import bio.terra.tanagra.api.shared.DataType;
import bio.terra.tanagra.api.shared.Literal;
import bio.terra.tanagra.api.shared.ValueDisplay;
import bio.terra.tanagra.query.sql.SqlQueryResult;
import bio.terra.tanagra.query.sql.SqlRowResult;
import bio.terra.tanagra.underlay.entitymodel.Attribute;
import com.google.common.collect.ImmutableMap;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;

Expand All @@ -15,6 +22,8 @@ public final class HintInstance {
private final double min;
private final double max;
private final Map<ValueDisplay, Long> enumValueCounts;
// TODO(dexamundsen): BENCH-5178: use attribute.getEmptyValueDisplay()
private static final String ATTR_EMPTY_VALUE_DISPLAY = "n/a";

public HintInstance(Attribute attribute, double min, double max) {
this.attribute = attribute;
Expand All @@ -32,6 +41,10 @@ public HintInstance(Attribute attribute, Map<ValueDisplay, Long> enumValueCounts
this.enumValueCounts = new HashMap<>(enumValueCounts);
}

public void addEnumValueCount(ValueDisplay valueDisplay, Long count) {
this.enumValueCounts.put(valueDisplay, count);
}

public Attribute getAttribute() {
return attribute;
}
Expand Down Expand Up @@ -63,6 +76,86 @@ public Optional<String> getEnumDisplay(Literal enumValue) {
.findAny();
}

public static List<HintInstance> rangeInstances(
SqlQueryResult sqlQueryResult, Attribute attribute, String minAlias, String maxAlias) {
List<HintInstance> hintInstances = new ArrayList<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult ->
newRangeInstance(sqlRowResult, attribute, minAlias, maxAlias)
.ifPresent(hintInstances::add));
return hintInstances;
}

public static Optional<HintInstance> newRangeInstance(
SqlRowResult sqlRowResult, Attribute attribute, String minAlias, String maxAlias) {
Double min = sqlRowResult.get(minAlias, DataType.DOUBLE).getDoubleVal();
Double max = sqlRowResult.get(maxAlias, DataType.DOUBLE).getDoubleVal();
return (min != null && max != null)
? Optional.of(new HintInstance(attribute, min, max))
: Optional.empty();
}

public static List<HintInstance> valueDisplayInstance(
SqlQueryResult sqlQueryResult,
Attribute attribute,
String valueAlias,
String displayAlias,
String countAlias) {
Map<ValueDisplay, Long> attrEnumValues = new HashMap<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult -> {
Entry<ValueDisplay, Long> entry =
newValueDisplayInstance(
sqlRowResult, attribute, valueAlias, displayAlias, countAlias);
attrEnumValues.put(entry.getKey(), entry.getValue());
});
return List.of(new HintInstance(attribute, attrEnumValues));
}

public static Entry<ValueDisplay, Long> newValueDisplayInstance(
SqlRowResult sqlRowResult,
Attribute attribute,
String valueAlias,
String displayAlias,
String countAlias) {
Literal enumVal = sqlRowResult.get(valueAlias, DataType.INT64);
String enumDisplay =
Optional.ofNullable(sqlRowResult.get(displayAlias, DataType.STRING).getStringVal())
.orElse(ATTR_EMPTY_VALUE_DISPLAY);
Long enumCount = sqlRowResult.get(countAlias, DataType.INT64).getInt64Val();
return new AbstractMap.SimpleEntry<>(new ValueDisplay(enumVal, enumDisplay), enumCount);
}

public static List<HintInstance> repeatedStringInstance(
SqlQueryResult sqlQueryResult, Attribute attribute, String valAlias, String countAlias) {
Map<ValueDisplay, Long> attrEnumValues = new HashMap<>();
sqlQueryResult
.rowResults()
.iterator()
.forEachRemaining(
sqlRowResult -> {
Entry<ValueDisplay, Long> entry =
newRepeatedStringEntry(sqlRowResult, attribute, valAlias, countAlias);
attrEnumValues.put(entry.getKey(), entry.getValue());
});
return List.of(new HintInstance(attribute, attrEnumValues));
}

public static Entry<ValueDisplay, Long> newRepeatedStringEntry(
SqlRowResult sqlRowResult, Attribute attribute, String valAlias, String countAlias) {
Literal enumVal = sqlRowResult.get(valAlias, DataType.STRING);
String enumDisplay =
Optional.ofNullable(enumVal.getStringVal()).orElse(ATTR_EMPTY_VALUE_DISPLAY);
Long enumCount = sqlRowResult.get(countAlias, DataType.INT64).getInt64Val();
return new AbstractMap.SimpleEntry<>(new ValueDisplay(enumVal, enumDisplay), enumCount);
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand Down
Loading
Loading