diff --git a/service/src/main/java/bio/terra/tanagra/service/UnderlayService.java b/service/src/main/java/bio/terra/tanagra/service/UnderlayService.java index 235a4cbc6..06ac04f3f 100644 --- a/service/src/main/java/bio/terra/tanagra/service/UnderlayService.java +++ b/service/src/main/java/bio/terra/tanagra/service/UnderlayService.java @@ -19,10 +19,7 @@ import bio.terra.tanagra.api.query.hint.HintInstance; import bio.terra.tanagra.api.query.hint.HintQueryRequest; import bio.terra.tanagra.api.query.hint.HintQueryResult; -import bio.terra.tanagra.api.shared.DataType; -import bio.terra.tanagra.api.shared.Literal; import bio.terra.tanagra.api.shared.OrderByDirection; -import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.app.configuration.ExportConfiguration; import bio.terra.tanagra.app.configuration.UnderlayConfiguration; import bio.terra.tanagra.indexing.job.bigquery.WriteEntityLevelDisplayHints; @@ -34,6 +31,7 @@ import bio.terra.tanagra.service.accesscontrol.ResourceId; import bio.terra.tanagra.underlay.ConfigReader; import bio.terra.tanagra.underlay.Underlay; +import bio.terra.tanagra.underlay.entitymodel.Attribute; import bio.terra.tanagra.underlay.entitymodel.Entity; import bio.terra.tanagra.underlay.serialization.SZService; import bio.terra.tanagra.underlay.serialization.SZUnderlay; @@ -44,6 +42,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -216,13 +215,12 @@ public HintQueryResult getEntityLevelHints( String bqFilterSql = bqTranslator.translator(entityFilter).buildSql(sqlParams, null); // For each attribute with a hint, calculate new hints with the filter - StringBuilder allSql = new StringBuilder(); - List allHintInstances = new ArrayList<>(); - entityLevelHints.getHintInstances().stream() - .map(HintInstance::getAttribute) - .parallel() + StringBuffer allSql = new StringBuffer(); + List allHintInstances = Collections.synchronizedList(new ArrayList<>()); + entityLevelHints.getHintInstances().parallelStream() .map( - attribute -> { + hintInstance -> { + Attribute attribute = hintInstance.getAttribute(); if (isRangeHint(attribute)) { String sql = WriteEntityLevelDisplayHints.buildRangeHintSql( @@ -233,22 +231,10 @@ public HintQueryResult getEntityLevelHints( underlay .getQueryRunner() .run(new SqlQueryRequest(sql, sqlParams, null, null, false)); - - List attrHintInstances = new ArrayList<>(); - sqlQueryResult - .rowResults() - .iterator() - .forEachRemaining( - sqlRowResult -> { - Double min = - sqlRowResult.get(MIN_VAL_ALIAS, DataType.DOUBLE).getDoubleVal(); - Double max = - sqlRowResult.get(MAX_VAL_ALIAS, DataType.DOUBLE).getDoubleVal(); - if (min != null && max != null) { - attrHintInstances.add(new HintInstance(attribute, min, max)); - } - }); - return new HintQueryResult(sql, attrHintInstances); + return new HintQueryResult( + sql, + HintInstance.rangeInstances( + sqlQueryResult, attribute, MIN_VAL_ALIAS, MAX_VAL_ALIAS)); } else if (isEnumHintForValueDisplay(attribute)) { String sql = @@ -260,22 +246,14 @@ public HintQueryResult getEntityLevelHints( underlay .getQueryRunner() .run(new SqlQueryRequest(sql, sqlParams, null, null, false)); - - Map attrEnumValues = new HashMap<>(); - sqlQueryResult - .rowResults() - .iterator() - .forEachRemaining( - sqlRowResult -> { - Literal enumVal = sqlRowResult.get(ENUM_VAL_ALIAS, DataType.INT64); - String enumDisplay = - sqlRowResult.get(ENUM_DISP_ALIAS, DataType.STRING).getStringVal(); - Long enumCount = - sqlRowResult.get(ENUM_COUNT_ALIAS, DataType.INT64).getInt64Val(); - attrEnumValues.put(new ValueDisplay(enumVal, enumDisplay), enumCount); - }); return new HintQueryResult( - sql, List.of(new HintInstance(attribute, attrEnumValues))); + sql, + HintInstance.valueDisplayInstance( + sqlQueryResult, + attribute, + ENUM_VAL_ALIAS, + ENUM_DISP_ALIAS, + ENUM_COUNT_ALIAS)); } else if (isEnumHintForRepeatedStringValue(attribute)) { String sql = @@ -287,20 +265,10 @@ public HintQueryResult getEntityLevelHints( underlay .getQueryRunner() .run(new SqlQueryRequest(sql, sqlParams, null, null, false)); - - Map attrEnumValues = new HashMap<>(); - sqlQueryResult - .rowResults() - .iterator() - .forEachRemaining( - sqlRowResult -> { - Literal enumVal = sqlRowResult.get(ENUM_VAL_ALIAS, DataType.STRING); - Long enumCount = - sqlRowResult.get(ENUM_COUNT_ALIAS, DataType.INT64).getInt64Val(); - attrEnumValues.put(new ValueDisplay(enumVal), enumCount); - }); return new HintQueryResult( - sql, List.of(new HintInstance(attribute, attrEnumValues))); + sql, + HintInstance.repeatedStringInstance( + sqlQueryResult, attribute, ENUM_VAL_ALIAS, ENUM_COUNT_ALIAS)); } else { LOGGER.info( @@ -310,7 +278,7 @@ public HintQueryResult getEntityLevelHints( return null; } }) - .toList() + .filter(Objects::nonNull) .forEach( hqr -> { allSql.append(hqr.getSql()).append(';'); diff --git a/underlay/src/main/java/bio/terra/tanagra/api/query/hint/HintInstance.java b/underlay/src/main/java/bio/terra/tanagra/api/query/hint/HintInstance.java index 12233799c..4e0b5386f 100644 --- a/underlay/src/main/java/bio/terra/tanagra/api/query/hint/HintInstance.java +++ b/underlay/src/main/java/bio/terra/tanagra/api/query/hint/HintInstance.java @@ -1,11 +1,18 @@ package bio.terra.tanagra.api.query.hint; +import bio.terra.tanagra.api.shared.DataType; import bio.terra.tanagra.api.shared.Literal; import bio.terra.tanagra.api.shared.ValueDisplay; +import bio.terra.tanagra.query.sql.SqlQueryResult; +import bio.terra.tanagra.query.sql.SqlRowResult; import bio.terra.tanagra.underlay.entitymodel.Attribute; import com.google.common.collect.ImmutableMap; +import java.util.AbstractMap; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; @@ -15,6 +22,8 @@ public final class HintInstance { private final double min; private final double max; private final Map enumValueCounts; + // TODO(dexamundsen): BENCH-5178: use attribute.getEmptyValueDisplay() + private static final String ATTR_EMPTY_VALUE_DISPLAY = "n/a"; public HintInstance(Attribute attribute, double min, double max) { this.attribute = attribute; @@ -32,6 +41,10 @@ public HintInstance(Attribute attribute, Map enumValueCounts this.enumValueCounts = new HashMap<>(enumValueCounts); } + public void addEnumValueCount(ValueDisplay valueDisplay, Long count) { + this.enumValueCounts.put(valueDisplay, count); + } + public Attribute getAttribute() { return attribute; } @@ -63,6 +76,86 @@ public Optional getEnumDisplay(Literal enumValue) { .findAny(); } + public static List rangeInstances( + SqlQueryResult sqlQueryResult, Attribute attribute, String minAlias, String maxAlias) { + List hintInstances = new ArrayList<>(); + sqlQueryResult + .rowResults() + .iterator() + .forEachRemaining( + sqlRowResult -> + newRangeInstance(sqlRowResult, attribute, minAlias, maxAlias) + .ifPresent(hintInstances::add)); + return hintInstances; + } + + public static Optional newRangeInstance( + SqlRowResult sqlRowResult, Attribute attribute, String minAlias, String maxAlias) { + Double min = sqlRowResult.get(minAlias, DataType.DOUBLE).getDoubleVal(); + Double max = sqlRowResult.get(maxAlias, DataType.DOUBLE).getDoubleVal(); + return (min != null && max != null) + ? Optional.of(new HintInstance(attribute, min, max)) + : Optional.empty(); + } + + public static List valueDisplayInstance( + SqlQueryResult sqlQueryResult, + Attribute attribute, + String valueAlias, + String displayAlias, + String countAlias) { + Map attrEnumValues = new HashMap<>(); + sqlQueryResult + .rowResults() + .iterator() + .forEachRemaining( + sqlRowResult -> { + Entry entry = + newValueDisplayInstance( + sqlRowResult, attribute, valueAlias, displayAlias, countAlias); + attrEnumValues.put(entry.getKey(), entry.getValue()); + }); + return List.of(new HintInstance(attribute, attrEnumValues)); + } + + public static Entry newValueDisplayInstance( + SqlRowResult sqlRowResult, + Attribute attribute, + String valueAlias, + String displayAlias, + String countAlias) { + Literal enumVal = sqlRowResult.get(valueAlias, DataType.INT64); + String enumDisplay = + Optional.ofNullable(sqlRowResult.get(displayAlias, DataType.STRING).getStringVal()) + .orElse(ATTR_EMPTY_VALUE_DISPLAY); + Long enumCount = sqlRowResult.get(countAlias, DataType.INT64).getInt64Val(); + return new AbstractMap.SimpleEntry<>(new ValueDisplay(enumVal, enumDisplay), enumCount); + } + + public static List repeatedStringInstance( + SqlQueryResult sqlQueryResult, Attribute attribute, String valAlias, String countAlias) { + Map attrEnumValues = new HashMap<>(); + sqlQueryResult + .rowResults() + .iterator() + .forEachRemaining( + sqlRowResult -> { + Entry entry = + newRepeatedStringEntry(sqlRowResult, attribute, valAlias, countAlias); + attrEnumValues.put(entry.getKey(), entry.getValue()); + }); + return List.of(new HintInstance(attribute, attrEnumValues)); + } + + public static Entry newRepeatedStringEntry( + SqlRowResult sqlRowResult, Attribute attribute, String valAlias, String countAlias) { + Literal enumVal = sqlRowResult.get(valAlias, DataType.STRING); + String enumDisplay = + Optional.ofNullable(enumVal.getStringVal()).orElse(ATTR_EMPTY_VALUE_DISPLAY); + Long enumCount = sqlRowResult.get(countAlias, DataType.INT64).getInt64Val(); + return new AbstractMap.SimpleEntry<>(new ValueDisplay(enumVal, enumDisplay), enumCount); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java index 1b708dd62..36335a17b 100644 --- a/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java +++ b/underlay/src/main/java/bio/terra/tanagra/query/bigquery/BQQueryRunner.java @@ -18,7 +18,6 @@ import bio.terra.tanagra.api.query.list.ListQueryResult; import bio.terra.tanagra.api.query.list.OrderBy; import bio.terra.tanagra.api.shared.DataType; -import bio.terra.tanagra.api.shared.Literal; import bio.terra.tanagra.api.shared.ValueDisplay; import bio.terra.tanagra.exception.InvalidConfigException; import bio.terra.tanagra.exception.InvalidQueryException; @@ -48,6 +47,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.NotImplementedException; @@ -253,84 +253,83 @@ public HintQueryResult run(HintQueryRequest hintQueryRequest) { SqlQueryResult sqlQueryResult = bigQueryExecutor.run(sqlQueryRequest); // Process the rows returned. - List hintInstances = new ArrayList<>(); - Map> enumValues = new HashMap<>(); + String attributeColName; + String minColName; + String maxColName; + String enumValColName; + String enumDisplayColName; + String enumCountColName; + if (hintQueryRequest.isEntityLevel()) { + attributeColName = + ITEntityLevelDisplayHints.Column.ATTRIBUTE_NAME.getSchema().getColumnName(); + minColName = ITEntityLevelDisplayHints.Column.MIN.getSchema().getColumnName(); + maxColName = ITEntityLevelDisplayHints.Column.MAX.getSchema().getColumnName(); + enumValColName = ITEntityLevelDisplayHints.Column.ENUM_VALUE.getSchema().getColumnName(); + enumDisplayColName = + ITEntityLevelDisplayHints.Column.ENUM_DISPLAY.getSchema().getColumnName(); + enumCountColName = ITEntityLevelDisplayHints.Column.ENUM_COUNT.getSchema().getColumnName(); + } else { + attributeColName = + ITInstanceLevelDisplayHints.Column.ATTRIBUTE_NAME.getSchema().getColumnName(); + minColName = ITInstanceLevelDisplayHints.Column.MIN.getSchema().getColumnName(); + maxColName = ITInstanceLevelDisplayHints.Column.MAX.getSchema().getColumnName(); + enumValColName = ITInstanceLevelDisplayHints.Column.ENUM_VALUE.getSchema().getColumnName(); + enumDisplayColName = + ITInstanceLevelDisplayHints.Column.ENUM_DISPLAY.getSchema().getColumnName(); + enumCountColName = ITInstanceLevelDisplayHints.Column.ENUM_COUNT.getSchema().getColumnName(); + } + + List allHintInstances = new ArrayList<>(); + Map attrHintInstanceMap = new HashMap<>(); sqlQueryResult .rowResults() .iterator() .forEachRemaining( sqlRowResult -> { - String attributeColName; - String minColName; - String maxColName; - String enumValColName; - String enumDisplayColName; - String enumCountColName; - if (hintQueryRequest.isEntityLevel()) { - attributeColName = - ITEntityLevelDisplayHints.Column.ATTRIBUTE_NAME.getSchema().getColumnName(); - minColName = ITEntityLevelDisplayHints.Column.MIN.getSchema().getColumnName(); - maxColName = ITEntityLevelDisplayHints.Column.MAX.getSchema().getColumnName(); - enumValColName = - ITEntityLevelDisplayHints.Column.ENUM_VALUE.getSchema().getColumnName(); - enumDisplayColName = - ITEntityLevelDisplayHints.Column.ENUM_DISPLAY.getSchema().getColumnName(); - enumCountColName = - ITEntityLevelDisplayHints.Column.ENUM_COUNT.getSchema().getColumnName(); - } else { - attributeColName = - ITInstanceLevelDisplayHints.Column.ATTRIBUTE_NAME.getSchema().getColumnName(); - minColName = ITInstanceLevelDisplayHints.Column.MIN.getSchema().getColumnName(); - maxColName = ITInstanceLevelDisplayHints.Column.MAX.getSchema().getColumnName(); - enumValColName = - ITInstanceLevelDisplayHints.Column.ENUM_VALUE.getSchema().getColumnName(); - enumDisplayColName = - ITInstanceLevelDisplayHints.Column.ENUM_DISPLAY.getSchema().getColumnName(); - enumCountColName = - ITInstanceLevelDisplayHints.Column.ENUM_COUNT.getSchema().getColumnName(); - } - Attribute attribute = hintQueryRequest .hintedEntity() .getAttribute( sqlRowResult.get(attributeColName, DataType.STRING).getStringVal()); + if (attribute.isValueDisplay()) { // This is one (value,count) pair of an enum values hint. - Literal enumVal = sqlRowResult.get(enumValColName, DataType.INT64); - String enumDisplay = - sqlRowResult.get(enumDisplayColName, DataType.STRING).getStringVal(); - Long enumCount = sqlRowResult.get(enumCountColName, DataType.INT64).getInt64Val(); - Map enumValuesForAttr = - enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); - enumValuesForAttr.put(new ValueDisplay(enumVal, enumDisplay), enumCount); - enumValues.put(attribute, enumValuesForAttr); + HintInstance hintInstance = attrHintInstanceMap.get(attribute); + if (hintInstance == null) { + hintInstance = new HintInstance(attribute, new HashMap<>()); + allHintInstances.add(hintInstance); + attrHintInstanceMap.put(attribute, hintInstance); + } + Entry entry = + HintInstance.newValueDisplayInstance( + sqlRowResult, + attribute, + enumValColName, + enumDisplayColName, + enumCountColName); + hintInstance.addEnumValueCount(entry.getKey(), entry.getValue()); } else if (attribute.getRuntimeDataType().equals(DataType.STRING)) { // repeated attribute - Literal enumVal = sqlRowResult.get(enumDisplayColName, DataType.STRING); - Long enumCount = sqlRowResult.get(enumCountColName, DataType.INT64).getInt64Val(); - Map enumValuesForAttr = - enumValues.containsKey(attribute) ? enumValues.get(attribute) : new HashMap<>(); - enumValuesForAttr.put(new ValueDisplay(enumVal), enumCount); - enumValues.put(attribute, enumValuesForAttr); + HintInstance hintInstance = attrHintInstanceMap.get(attribute); + if (hintInstance == null) { + hintInstance = new HintInstance(attribute, new HashMap<>()); + allHintInstances.add(hintInstance); + attrHintInstanceMap.put(attribute, hintInstance); + } + Entry entry = + HintInstance.newRepeatedStringEntry( + sqlRowResult, attribute, enumDisplayColName, enumCountColName); + hintInstance.addEnumValueCount(entry.getKey(), entry.getValue()); } else { // This is a range hint. - Double min = sqlRowResult.get(minColName, DataType.DOUBLE).getDoubleVal(); - Double max = sqlRowResult.get(maxColName, DataType.DOUBLE).getDoubleVal(); - if (min != null && max != null) { - hintInstances.add(new HintInstance(attribute, min, max)); - } + HintInstance.newRangeInstance(sqlRowResult, attribute, minColName, maxColName) + .ifPresent(allHintInstances::add); } }); - // Assemble the value/count pairs into a single enum values hint for each attribute. - enumValues.forEach( - (attribute, enumValuesForAttr) -> - hintInstances.add(new HintInstance(attribute, enumValuesForAttr))); - - return new HintQueryResult(sql.toString(), hintInstances); + return new HintQueryResult(sql.toString(), allHintInstances); } @Override @@ -555,8 +554,6 @@ public SqlQueryRequest buildListQuerySqlAgainstSourceData(ListQueryRequest listQ // Build the inner SQL query against the index data first. // Select only the id attribute, which we need to JOIN to the source table. // SELECT id FROM [index table] WHERE [filter] - StringBuilder sql = new StringBuilder(50); - BQApiTranslator bqTranslator = new BQApiTranslator(); AttributeField indexIdAttributeField = new AttributeField( listQueryRequest.getUnderlay(), @@ -595,6 +592,7 @@ public SqlQueryRequest buildListQuerySqlAgainstSourceData(ListQueryRequest listQ // e.g. tableJoinAliases: sourceQuery.valueFieldName, sourceQuery.displayFieldTable -> // joinTableAlias Table tableJoinAliases = HashBasedTable.create(); + BQApiTranslator bqTranslator = new BQApiTranslator(); listQueryRequest .getSelectFields() .forEach( @@ -625,20 +623,17 @@ public SqlQueryRequest buildListQuerySqlAgainstSourceData(ListQueryRequest listQ .getDisplayFieldTable(); if (displayTableJoins.get(valueFieldName, displayFieldTable) == null) { // Add new table joins and aliases - StringBuilder joinSql = - new StringBuilder() - .append(" LEFT JOIN ") - .append( - fromFullTablePath(attrSourcePointer.getDisplayFieldTable()) - .render()) - .append(" AS ") - .append(joinTableAlias) - .append(" ON ") - .append(displayTableJoinField.renderForSelect(joinTableAlias)) - .append(" = ") - .append(valueSqlField.renderForSelect(sourceTableAlias)); - - displayTableJoins.put(valueFieldName, displayFieldTable, joinSql.toString()); + String joinSql = + " LEFT JOIN " + + fromFullTablePath(attrSourcePointer.getDisplayFieldTable()).render() + + " AS " + + joinTableAlias + + " ON " + + displayTableJoinField.renderForSelect(joinTableAlias) + + " = " + + valueSqlField.renderForSelect(sourceTableAlias); + + displayTableJoins.put(valueFieldName, displayFieldTable, joinSql); tableJoinAliases.put(valueFieldName, displayFieldTable, joinTableAlias); selectFields.add(displaySqlField.renderForSelect(joinTableAlias)); } else { @@ -663,20 +658,21 @@ public SqlQueryRequest buildListQuerySqlAgainstSourceData(ListQueryRequest listQ // SELECT [select fields] FROM [source table] // JOIN [display table] ON [display join field] // WHERE [source id field] IN [inner query against index data] - sql.append("SELECT ") - .append(String.join(", ", selectFields)) - .append(" FROM ") - .append(fromFullTablePath(listQueryRequest.getEntity().getSourceQueryTableName()).render()) - .append(" AS ") - .append(sourceTableAlias) - .append(String.join("", displayTableJoins.values().stream().toList())) - .append(" WHERE ") - .append(sourceIdAttrSqlField.renderForSelect(sourceTableAlias)) - .append(" IN (") - .append(indexDataSqlRequest.sql()) - .append(')'); + String sql = + "SELECT " + + String.join(", ", selectFields) + + " FROM " + + fromFullTablePath(listQueryRequest.getEntity().getSourceQueryTableName()).render() + + " AS " + + sourceTableAlias + + String.join("", displayTableJoins.values().stream().toList()) + + " WHERE " + + sourceIdAttrSqlField.renderForSelect(sourceTableAlias) + + " IN (" + + indexDataSqlRequest.sql() + + ')'; return new SqlQueryRequest( - sql.toString(), + sql, sqlParams, listQueryRequest.getPageMarker(), listQueryRequest.getPageSize(),