Skip to content

Commit 30ea1dc

Browse files
FourFriendsluoxin5
and
luoxin5
authored
[#6238] improvement(storage): Improve get role performance when roles is bound to many metadata. (#6455)
### What changes were proposed in this pull request? fix issue #6238 improve performance when a single role is bound to many metadata. ### Why are the changes needed? Use batch queries when getting role securable object full names instead of loop queries to get each securable object full name. Fix: #6238 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests and integration tests have all passed, this feature has been running internally at Xiaomi for two weeks. Co-authored-by: luoxin5 <luoxin5@xiaomi.com>
1 parent 74435c9 commit 30ea1dc

12 files changed

+255
-26
lines changed

core/src/main/java/org/apache/gravitino/storage/relational/mapper/CatalogMetaMapper.java

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ public interface CatalogMetaMapper {
4141
@SelectProvider(type = CatalogMetaSQLProviderFactory.class, method = "listCatalogPOsByMetalakeId")
4242
List<CatalogPO> listCatalogPOsByMetalakeId(@Param("metalakeId") Long metalakeId);
4343

44+
@SelectProvider(type = CatalogMetaSQLProviderFactory.class, method = "listCatalogPOsByCatalogIds")
45+
List<CatalogPO> listCatalogPOsByCatalogIds(@Param("catalogIds") List<Long> catalogIds);
46+
4447
@SelectProvider(
4548
type = CatalogMetaSQLProviderFactory.class,
4649
method = "selectCatalogIdByMetalakeIdAndName")

core/src/main/java/org/apache/gravitino/storage/relational/mapper/CatalogMetaSQLProviderFactory.java

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.apache.gravitino.storage.relational.mapper;
2121

2222
import com.google.common.collect.ImmutableMap;
23+
import java.util.List;
2324
import java.util.Map;
2425
import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType;
2526
import org.apache.gravitino.storage.relational.mapper.provider.base.CatalogMetaBaseSQLProvider;
@@ -56,6 +57,10 @@ public static String listCatalogPOsByMetalakeId(@Param("metalakeId") Long metala
5657
return getProvider().listCatalogPOsByMetalakeId(metalakeId);
5758
}
5859

60+
public static String listCatalogPOsByCatalogIds(@Param("catalogIds") List<Long> catalogIds) {
61+
return getProvider().listCatalogPOsByCatalogIds(catalogIds);
62+
}
63+
5964
public static String selectCatalogIdByMetalakeIdAndName(
6065
@Param("metalakeId") Long metalakeId, @Param("catalogName") String name) {
6166
return getProvider().selectCatalogIdByMetalakeIdAndName(metalakeId, name);

core/src/main/java/org/apache/gravitino/storage/relational/mapper/FilesetMetaMapper.java

+25
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,31 @@ public interface FilesetMetaMapper {
6767
@SelectProvider(type = FilesetMetaSQLProviderFactory.class, method = "listFilesetPOsBySchemaId")
6868
List<FilesetPO> listFilesetPOsBySchemaId(@Param("schemaId") Long schemaId);
6969

70+
@Results({
71+
@Result(property = "filesetId", column = "fileset_id"),
72+
@Result(property = "filesetName", column = "fileset_name"),
73+
@Result(property = "metalakeId", column = "metalake_id"),
74+
@Result(property = "catalogId", column = "catalog_id"),
75+
@Result(property = "schemaId", column = "schema_id"),
76+
@Result(property = "type", column = "type"),
77+
@Result(property = "auditInfo", column = "audit_info"),
78+
@Result(property = "currentVersion", column = "current_version"),
79+
@Result(property = "lastVersion", column = "last_version"),
80+
@Result(property = "deletedAt", column = "deleted_at"),
81+
@Result(property = "filesetVersionPO.id", column = "id"),
82+
@Result(property = "filesetVersionPO.metalakeId", column = "version_metalake_id"),
83+
@Result(property = "filesetVersionPO.catalogId", column = "version_catalog_id"),
84+
@Result(property = "filesetVersionPO.schemaId", column = "version_schema_id"),
85+
@Result(property = "filesetVersionPO.filesetId", column = "version_fileset_id"),
86+
@Result(property = "filesetVersionPO.version", column = "version"),
87+
@Result(property = "filesetVersionPO.filesetComment", column = "fileset_comment"),
88+
@Result(property = "filesetVersionPO.properties", column = "properties"),
89+
@Result(property = "filesetVersionPO.storageLocation", column = "storage_location"),
90+
@Result(property = "filesetVersionPO.deletedAt", column = "version_deleted_at")
91+
})
92+
@SelectProvider(type = FilesetMetaSQLProviderFactory.class, method = "listFilesetPOsByFilesetIds")
93+
List<FilesetPO> listFilesetPOsByFilesetIds(@Param("filesetIds") List<Long> filesetIds);
94+
7095
@SelectProvider(
7196
type = FilesetMetaSQLProviderFactory.class,
7297
method = "selectFilesetIdBySchemaIdAndName")

core/src/main/java/org/apache/gravitino/storage/relational/mapper/FilesetMetaSQLProviderFactory.java

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.apache.gravitino.storage.relational.mapper;
2121

2222
import com.google.common.collect.ImmutableMap;
23+
import java.util.List;
2324
import java.util.Map;
2425
import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType;
2526
import org.apache.gravitino.storage.relational.mapper.provider.base.FilesetMetaBaseSQLProvider;
@@ -55,6 +56,10 @@ public static String listFilesetPOsBySchemaId(@Param("schemaId") Long schemaId)
5556
return getProvider().listFilesetPOsBySchemaId(schemaId);
5657
}
5758

59+
public static String listFilesetPOsByFilesetIds(@Param("filesetIds") List<Long> filesetIds) {
60+
return getProvider().listFilesetPOsByFilesetIds(filesetIds);
61+
}
62+
5863
public static String selectFilesetIdBySchemaIdAndName(
5964
@Param("schemaId") Long schemaId, @Param("filesetName") String name) {
6065
return getProvider().selectFilesetIdBySchemaIdAndName(schemaId, name);

core/src/main/java/org/apache/gravitino/storage/relational/mapper/SchemaMetaMapper.java

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ public interface SchemaMetaMapper {
4141
@SelectProvider(type = SchemaMetaSQLProviderFactory.class, method = "listSchemaPOsByCatalogId")
4242
List<SchemaPO> listSchemaPOsByCatalogId(@Param("catalogId") Long catalogId);
4343

44+
@SelectProvider(type = SchemaMetaSQLProviderFactory.class, method = "listSchemaPOsBySchemaIds")
45+
List<SchemaPO> listSchemaPOsBySchemaIds(@Param("schemaIds") List<Long> schemaIds);
46+
4447
@SelectProvider(
4548
type = SchemaMetaSQLProviderFactory.class,
4649
method = "selectSchemaIdByCatalogIdAndName")

core/src/main/java/org/apache/gravitino/storage/relational/mapper/SchemaMetaSQLProviderFactory.java

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.gravitino.storage.relational.mapper;
2020

2121
import com.google.common.collect.ImmutableMap;
22+
import java.util.List;
2223
import java.util.Map;
2324
import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType;
2425
import org.apache.gravitino.storage.relational.mapper.provider.base.SchemaMetaBaseSQLProvider;
@@ -50,6 +51,10 @@ static class SchemaMetaMySQLProvider extends SchemaMetaBaseSQLProvider {}
5051

5152
static class SchemaMetaH2Provider extends SchemaMetaBaseSQLProvider {}
5253

54+
public static String listSchemaPOsBySchemaIds(@Param("schemaIds") List<Long> schemaIds) {
55+
return getProvider().listSchemaPOsBySchemaIds(schemaIds);
56+
}
57+
5358
public static String listSchemaPOsByCatalogId(@Param("catalogId") Long catalogId) {
5459
return getProvider().listSchemaPOsByCatalogId(catalogId);
5560
}

core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/CatalogMetaBaseSQLProvider.java

+19
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import static org.apache.gravitino.storage.relational.mapper.CatalogMetaMapper.TABLE_NAME;
2323

24+
import java.util.List;
2425
import org.apache.gravitino.storage.relational.po.CatalogPO;
2526
import org.apache.ibatis.annotations.Param;
2627

@@ -36,6 +37,24 @@ public String listCatalogPOsByMetalakeId(@Param("metalakeId") Long metalakeId) {
3637
+ " WHERE metalake_id = #{metalakeId} AND deleted_at = 0";
3738
}
3839

40+
public String listCatalogPOsByCatalogIds(@Param("catalogIds") List<Long> catalogIds) {
41+
return "<script>"
42+
+ "SELECT catalog_id as catalogId, catalog_name as catalogName,"
43+
+ " metalake_id as metalakeId, type, provider,"
44+
+ " catalog_comment as catalogComment, properties, audit_info as auditInfo,"
45+
+ " current_version as currentVersion, last_version as lastVersion,"
46+
+ " deleted_at as deletedAt"
47+
+ " FROM "
48+
+ TABLE_NAME
49+
+ " WHERE catalog_id in ("
50+
+ "<foreach collection='catalogIds' item='catalogId' separator=','>"
51+
+ "#{catalogId}"
52+
+ "</foreach>"
53+
+ ") "
54+
+ " AND deleted_at = 0"
55+
+ "</script>";
56+
}
57+
3958
public String selectCatalogIdByMetalakeIdAndName(
4059
@Param("metalakeId") Long metalakeId, @Param("catalogName") String name) {
4160
return "SELECT catalog_id as catalogId FROM "

core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/FilesetMetaBaseSQLProvider.java

+23
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.apache.gravitino.storage.relational.mapper.FilesetMetaMapper.META_TABLE_NAME;
2323
import static org.apache.gravitino.storage.relational.mapper.FilesetMetaMapper.VERSION_TABLE_NAME;
2424

25+
import java.util.List;
2526
import org.apache.gravitino.storage.relational.po.FilesetPO;
2627
import org.apache.ibatis.annotations.Param;
2728

@@ -50,6 +51,28 @@ public String selectFilesetIdBySchemaIdAndName(
5051
+ " AND deleted_at = 0";
5152
}
5253

54+
public String listFilesetPOsByFilesetIds(@Param("filesetIds") List<Long> filesetIds) {
55+
return "<script>"
56+
+ "SELECT fm.fileset_id, fm.fileset_name, fm.metalake_id, fm.catalog_id, fm.schema_id,"
57+
+ " fm.type, fm.audit_info, fm.current_version, fm.last_version, fm.deleted_at,"
58+
+ " vi.id, vi.metalake_id as version_metalake_id, vi.catalog_id as version_catalog_id,"
59+
+ " vi.schema_id as version_schema_id, vi.fileset_id as version_fileset_id,"
60+
+ " vi.version, vi.fileset_comment, vi.properties, vi.storage_location,"
61+
+ " vi.deleted_at as version_deleted_at"
62+
+ " FROM "
63+
+ META_TABLE_NAME
64+
+ " fm INNER JOIN "
65+
+ VERSION_TABLE_NAME
66+
+ " vi ON fm.fileset_id = vi.fileset_id AND fm.current_version = vi.version"
67+
+ " WHERE fm.fileset_id in ("
68+
+ "<foreach collection='filesetIds' item='filesetId' separator=','>"
69+
+ "#{filesetId}"
70+
+ "</foreach>"
71+
+ ") "
72+
+ " AND fm.deleted_at = 0 AND vi.deleted_at = 0"
73+
+ "</script>";
74+
}
75+
5376
public String selectFilesetMetaBySchemaIdAndName(
5477
@Param("schemaId") Long schemaId, @Param("filesetName") String name) {
5578
return "SELECT fm.fileset_id, fm.fileset_name, fm.metalake_id, fm.catalog_id, fm.schema_id,"

core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/SchemaMetaBaseSQLProvider.java

+19
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static org.apache.gravitino.storage.relational.mapper.SchemaMetaMapper.TABLE_NAME;
2222

23+
import java.util.List;
2324
import org.apache.gravitino.storage.relational.po.SchemaPO;
2425
import org.apache.ibatis.annotations.Param;
2526

@@ -35,6 +36,24 @@ public String listSchemaPOsByCatalogId(@Param("catalogId") Long catalogId) {
3536
+ " WHERE catalog_id = #{catalogId} AND deleted_at = 0";
3637
}
3738

39+
public String listSchemaPOsBySchemaIds(@Param("schemaIds") List<Long> schemaIds) {
40+
return "<script>"
41+
+ "SELECT schema_id as schemaId, schema_name as schemaName,"
42+
+ " metalake_id as metalakeId, catalog_id as catalogId,"
43+
+ " schema_comment as schemaComment, properties, audit_info as auditInfo,"
44+
+ " current_version as currentVersion, last_version as lastVersion,"
45+
+ " deleted_at as deletedAt"
46+
+ " FROM "
47+
+ TABLE_NAME
48+
+ " WHERE schema_id in ("
49+
+ "<foreach collection='schemaIds' item='schemaId' separator=','>"
50+
+ "#{schemaId}"
51+
+ "</foreach>"
52+
+ ") "
53+
+ " AND deleted_at = 0"
54+
+ "</script>";
55+
}
56+
3857
public String selectSchemaIdByCatalogIdAndName(
3958
@Param("catalogId") Long catalogId, @Param("schemaName") String name) {
4059
return "SELECT schema_id as schemaId FROM "

core/src/main/java/org/apache/gravitino/storage/relational/service/RoleMetaService.java

+124-15
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@
1818
*/
1919
package org.apache.gravitino.storage.relational.service;
2020

21+
import com.google.common.base.Joiner;
2122
import com.google.common.base.Preconditions;
2223
import com.google.common.collect.Lists;
2324
import com.google.common.collect.Sets;
2425
import java.io.IOException;
2526
import java.util.Collections;
27+
import java.util.Comparator;
28+
import java.util.HashMap;
2629
import java.util.HashSet;
2730
import java.util.List;
31+
import java.util.Map;
2832
import java.util.Objects;
2933
import java.util.Set;
3034
import java.util.function.Function;
@@ -38,12 +42,18 @@
3842
import org.apache.gravitino.authorization.SecurableObject;
3943
import org.apache.gravitino.exceptions.NoSuchEntityException;
4044
import org.apache.gravitino.meta.RoleEntity;
45+
import org.apache.gravitino.storage.relational.mapper.CatalogMetaMapper;
46+
import org.apache.gravitino.storage.relational.mapper.FilesetMetaMapper;
4147
import org.apache.gravitino.storage.relational.mapper.GroupRoleRelMapper;
4248
import org.apache.gravitino.storage.relational.mapper.OwnerMetaMapper;
4349
import org.apache.gravitino.storage.relational.mapper.RoleMetaMapper;
50+
import org.apache.gravitino.storage.relational.mapper.SchemaMetaMapper;
4451
import org.apache.gravitino.storage.relational.mapper.SecurableObjectMapper;
4552
import org.apache.gravitino.storage.relational.mapper.UserRoleRelMapper;
53+
import org.apache.gravitino.storage.relational.po.CatalogPO;
54+
import org.apache.gravitino.storage.relational.po.FilesetPO;
4655
import org.apache.gravitino.storage.relational.po.RolePO;
56+
import org.apache.gravitino.storage.relational.po.SchemaPO;
4757
import org.apache.gravitino.storage.relational.po.SecurableObjectPO;
4858
import org.apache.gravitino.storage.relational.utils.ExceptionUtils;
4959
import org.apache.gravitino.storage.relational.utils.POConverters;
@@ -54,6 +64,8 @@
5464

5565
/** The service class for role metadata. It provides the basic database operations for role. */
5666
public class RoleMetaService {
67+
private static final String DOT = ".";
68+
private static final Joiner DOT_JOINER = Joiner.on(DOT);
5769

5870
private static final Logger LOG = LoggerFactory.getLogger(RoleMetaService.class);
5971
private static final RoleMetaService INSTANCE = new RoleMetaService();
@@ -353,21 +365,58 @@ private static List<SecurableObject> listSecurableObjects(RolePO po) {
353365
List<SecurableObjectPO> securableObjectPOs = listSecurableObjectsByRoleId(po.getRoleId());
354366
List<SecurableObject> securableObjects = Lists.newArrayList();
355367

356-
for (SecurableObjectPO securableObjectPO : securableObjectPOs) {
357-
String fullName =
358-
MetadataObjectService.getMetadataObjectFullName(
359-
securableObjectPO.getType(), securableObjectPO.getMetadataObjectId());
360-
if (fullName != null) {
361-
securableObjects.add(
362-
POConverters.fromSecurableObjectPO(
363-
fullName, securableObjectPO, getType(securableObjectPO.getType())));
364-
} else {
365-
LOG.warn(
366-
"The securable object {} {} may be deleted",
367-
securableObjectPO.getMetadataObjectId(),
368-
securableObjectPO.getType());
369-
}
370-
}
368+
securableObjectPOs.stream()
369+
.collect(Collectors.groupingBy(SecurableObjectPO::getType))
370+
.forEach(
371+
(type, objects) -> {
372+
// If the type is Fileset, use the batch retrieval interface;
373+
// otherwise, use the single retrieval interface
374+
if (type.equals(MetadataObject.Type.FILESET.name())) {
375+
List<Long> filesetIds =
376+
objects.stream()
377+
.map(SecurableObjectPO::getMetadataObjectId)
378+
.collect(Collectors.toList());
379+
380+
Map<Long, String> filesetIdAndNameMap = getFilesetObjectFullNames(filesetIds);
381+
382+
for (SecurableObjectPO securableObjectPO : objects) {
383+
String fullName =
384+
filesetIdAndNameMap.get(securableObjectPO.getMetadataObjectId());
385+
if (fullName != null) {
386+
securableObjects.add(
387+
POConverters.fromSecurableObjectPO(
388+
fullName, securableObjectPO, getType(securableObjectPO.getType())));
389+
} else {
390+
LOG.warn(
391+
"The securable object {} {} may be deleted",
392+
securableObjectPO.getMetadataObjectId(),
393+
securableObjectPO.getType());
394+
}
395+
}
396+
} else {
397+
// todo:to get other securable object fullNames using batch retrieving
398+
for (SecurableObjectPO securableObjectPO : objects) {
399+
String fullName =
400+
MetadataObjectService.getMetadataObjectFullName(
401+
securableObjectPO.getType(), securableObjectPO.getMetadataObjectId());
402+
if (fullName != null) {
403+
securableObjects.add(
404+
POConverters.fromSecurableObjectPO(
405+
fullName, securableObjectPO, getType(securableObjectPO.getType())));
406+
} else {
407+
LOG.warn(
408+
"The securable object {} {} may be deleted",
409+
securableObjectPO.getMetadataObjectId(),
410+
securableObjectPO.getType());
411+
}
412+
}
413+
}
414+
});
415+
416+
// To ensure that the order of the returned securable objects remains consistent,
417+
// the securable objects are sorted by fullName here,
418+
// since the order of securable objects after grouping by is different each time.
419+
securableObjects.sort(Comparator.comparing(MetadataObject::fullName));
371420

372421
return securableObjects;
373422
}
@@ -394,4 +443,64 @@ private static MetadataObject.Type getType(String type) {
394443
private static String getEntityType(SecurableObject securableObject) {
395444
return securableObject.type().name();
396445
}
446+
447+
public static Map<Long, String> getFilesetObjectFullNames(List<Long> ids) {
448+
List<FilesetPO> filesetPOs =
449+
SessionUtils.getWithoutCommit(
450+
FilesetMetaMapper.class, mapper -> mapper.listFilesetPOsByFilesetIds(ids));
451+
452+
if (filesetPOs == null || filesetPOs.isEmpty()) {
453+
return new HashMap<>();
454+
}
455+
456+
List<Long> catalogIds =
457+
filesetPOs.stream().map(FilesetPO::getCatalogId).collect(Collectors.toList());
458+
List<Long> schemaIds =
459+
filesetPOs.stream().map(FilesetPO::getSchemaId).collect(Collectors.toList());
460+
461+
Map<Long, String> catalogIdAndNameMap = getCatalogIdAndNameMap(catalogIds);
462+
Map<Long, String> schemaIdAndNameMap = getSchemaIdAndNameMap(schemaIds);
463+
464+
HashMap<Long, String> filesetIdAndNameMap = new HashMap<>();
465+
466+
filesetPOs.forEach(
467+
filesetPO -> {
468+
// since the catalog or schema can be deleted, we need to check the null value,
469+
// and when catalog or schema is deleted, we will set catalogName or schemaName to null
470+
String catalogName = catalogIdAndNameMap.getOrDefault(filesetPO.getCatalogId(), null);
471+
if (catalogName == null) {
472+
LOG.warn("The catalog of fileset {} may be deleted", filesetPO.getFilesetId());
473+
filesetIdAndNameMap.put(filesetPO.getFilesetId(), null);
474+
return;
475+
}
476+
477+
String schemaName = schemaIdAndNameMap.getOrDefault(filesetPO.getSchemaId(), null);
478+
if (schemaName == null) {
479+
LOG.warn("The schema of fileset {} may be deleted", filesetPO.getFilesetId());
480+
filesetIdAndNameMap.put(filesetPO.getFilesetId(), null);
481+
return;
482+
}
483+
484+
String fullName = DOT_JOINER.join(catalogName, schemaName, filesetPO.getFilesetName());
485+
filesetIdAndNameMap.put(filesetPO.getFilesetId(), fullName);
486+
});
487+
488+
return filesetIdAndNameMap;
489+
}
490+
491+
public static Map<Long, String> getSchemaIdAndNameMap(List<Long> schemaIds) {
492+
List<SchemaPO> schemaPOS =
493+
SessionUtils.getWithoutCommit(
494+
SchemaMetaMapper.class, mapper -> mapper.listSchemaPOsBySchemaIds(schemaIds));
495+
return schemaPOS.stream()
496+
.collect(Collectors.toMap(SchemaPO::getSchemaId, SchemaPO::getSchemaName));
497+
}
498+
499+
public static Map<Long, String> getCatalogIdAndNameMap(List<Long> catalogIds) {
500+
List<CatalogPO> catalogPOs =
501+
SessionUtils.getWithoutCommit(
502+
CatalogMetaMapper.class, mapper -> mapper.listCatalogPOsByCatalogIds(catalogIds));
503+
return catalogPOs.stream()
504+
.collect(Collectors.toMap(CatalogPO::getCatalogId, CatalogPO::getCatalogName));
505+
}
397506
}

0 commit comments

Comments
 (0)