Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] Move public utility types out of experimental #17665

Merged
merged 9 commits into from
Feb 12, 2025
4 changes: 2 additions & 2 deletions gui/browsable/src/RFieldHolder.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ class RFieldHolder : public ROOT::Browsable::RHolder {
std::shared_ptr<ROOT::Experimental::RNTupleReader> fNtplReader;
std::string fParentName;

ROOT::Experimental::DescriptorId_t fFieldId;
ROOT::DescriptorId_t fFieldId;

public:
RFieldHolder(std::shared_ptr<ROOT::Experimental::RNTupleReader> ntplReader, const std::string &parent_name,
ROOT::Experimental::DescriptorId_t id)
ROOT::DescriptorId_t id)
: fNtplReader(ntplReader), fParentName(parent_name), fFieldId(id)
{
}
Expand Down
12 changes: 6 additions & 6 deletions gui/browsable/src/RNTupleBrowseProvider.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ class RFieldElement : public RElement {

std::string fParentName;

ROOT::Experimental::DescriptorId_t fFieldId;
ROOT::DescriptorId_t fFieldId;

public:
RFieldElement(std::shared_ptr<ROOT::Experimental::RNTupleReader> ntplReader, const std::string &parent_name,
const ROOT::Experimental::DescriptorId_t id)
const ROOT::DescriptorId_t id)
: RElement(), fNtplReader(ntplReader), fParentName(parent_name), fFieldId(id)
{
}
Expand Down Expand Up @@ -153,13 +153,13 @@ class RNTupleElement : public RElement {
class RFieldsIterator : public RLevelIter {

std::shared_ptr<ROOT::Experimental::RNTupleReader> fNtplReader;
std::vector<ROOT::Experimental::DescriptorId_t> fFieldIds;
std::vector<ROOT::DescriptorId_t> fFieldIds;
std::string fParentName;
int fCounter{-1};

public:
RFieldsIterator(std::shared_ptr<ROOT::Experimental::RNTupleReader> ntplReader,
std::vector<ROOT::Experimental::DescriptorId_t> &&ids, const std::string &parent_name = ""s)
std::vector<ROOT::DescriptorId_t> &&ids, const std::string &parent_name = ""s)
: fNtplReader(ntplReader), fFieldIds(ids), fParentName(parent_name)
{
}
Expand Down Expand Up @@ -210,7 +210,7 @@ class RFieldsIterator : public RLevelIter {

std::unique_ptr<RLevelIter> RFieldElement::GetChildsIter()
{
std::vector<ROOT::Experimental::DescriptorId_t> ids;
std::vector<ROOT::DescriptorId_t> ids;
std::string prefix;

for (auto &f : fNtplReader->GetDescriptor().GetFieldIterable(fFieldId))
Expand All @@ -229,7 +229,7 @@ std::unique_ptr<RLevelIter> RFieldElement::GetChildsIter()

std::unique_ptr<RLevelIter> RNTupleElement::GetChildsIter()
{
std::vector<ROOT::Experimental::DescriptorId_t> ids;
std::vector<ROOT::DescriptorId_t> ids;

for (auto &f : fNtplReader->GetDescriptor().GetTopLevelFields())
ids.emplace_back(f.GetId());
Expand Down
11 changes: 7 additions & 4 deletions tree/dataframe/inc/ROOT/RNTupleDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
/// Connects the IDs of active proto fields and their subfields to their fully qualified name (a.b.c.d).
/// This enables the column reader to rewire the field IDs when the file changes (chain),
/// using the fully qualified name as a search key in the descriptor of the other page sources.
std::unordered_map<ROOT::Experimental::DescriptorId_t, std::string> fFieldId2QualifiedName;
std::unordered_map<ROOT::DescriptorId_t, std::string> fFieldId2QualifiedName;
std::vector<std::string> fColumnNames;
std::vector<std::string> fColumnTypes;
/// List of column readers returned by GetColumnReaders() organized by slot. Used to reconnect readers
Expand Down Expand Up @@ -120,10 +120,13 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {

/// \brief Holds useful information about fields added to the RNTupleDS
struct RFieldInfo {
DescriptorId_t fFieldId;
ROOT::DescriptorId_t fFieldId;
std::size_t fNRepetitions;
// Enable `std::vector::emplace_back` for this type
RFieldInfo(DescriptorId_t fieldId, std::size_t nRepetitions) : fFieldId(fieldId), fNRepetitions(nRepetitions) {}
RFieldInfo(ROOT::DescriptorId_t fieldId, std::size_t nRepetitions)
: fFieldId(fieldId), fNRepetitions(nRepetitions)
{
}
};

/// Provides the RDF column "colName" given the field identified by fieldID. For records and collections,
Expand All @@ -135,7 +138,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
/// float eta;
/// };
/// AddField will recurse into Jet.pt and Jet.eta and provide the two inner fields as std::vector<float> each.
void AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId,
void AddField(const RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId,
std::vector<RFieldInfo> fieldInfos);

/// The main function of the fThreadStaging background thread
Expand Down
25 changes: 14 additions & 11 deletions tree/dataframe/src/RNTupleDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class RRDFCardinalityField final : public ROOT::Experimental::RFieldBase {
public:
static std::string TypeName() { return "std::size_t"; }
RRDFCardinalityField()
: ROOT::Experimental::RFieldBase("", TypeName(), ENTupleStructure::kLeaf, false /* isSimple */)
: ROOT::Experimental::RFieldBase("", TypeName(), ROOT::ENTupleStructure::kLeaf, false /* isSimple */)
{
}
RRDFCardinalityField(RRDFCardinalityField &&other) = default;
Expand All @@ -94,19 +94,19 @@ class RRDFCardinalityField final : public ROOT::Experimental::RFieldBase {
size_t GetAlignment() const final { return alignof(std::size_t); }

/// Get the number of elements of the collection identified by globalIndex
void ReadGlobalImpl(ROOT::Experimental::NTupleSize_t globalIndex, void *to) final
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final
{
RNTupleLocalIndex collectionStart;
NTupleSize_t size;
ROOT::NTupleSize_t size;
fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &size);
*static_cast<std::size_t *>(to) = size;
}

/// Get the number of elements of the collection identified by clusterIndex
void ReadInClusterImpl(ROOT::Experimental::RNTupleLocalIndex localIndex, void *to) final
void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final
{
RNTupleLocalIndex collectionStart;
NTupleSize_t size;
ROOT::NTupleSize_t size;
fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &size);
*static_cast<std::size_t *>(to) = size;
}
Expand All @@ -128,15 +128,18 @@ class RArraySizeField final : public ROOT::Experimental::RFieldBase {
}
void GenerateColumns() final { assert(false && "RArraySizeField fields must only be used for reading"); }
void GenerateColumns(const ROOT::Experimental::RNTupleDescriptor &) final {}
void ReadGlobalImpl(NTupleSize_t /*globalIndex*/, void *to) final { *static_cast<std::size_t *>(to) = fArrayLength; }
void ReadGlobalImpl(ROOT::NTupleSize_t /*globalIndex*/, void *to) final
{
*static_cast<std::size_t *>(to) = fArrayLength;
}
void ReadInClusterImpl(RNTupleLocalIndex /*localIndex*/, void *to) final
{
*static_cast<std::size_t *>(to) = fArrayLength;
}

public:
RArraySizeField(std::size_t arrayLength)
: ROOT::Experimental::RFieldBase("", "std::size_t", ENTupleStructure::kLeaf, false /* isSimple */),
: ROOT::Experimental::RFieldBase("", "std::size_t", ROOT::ENTupleStructure::kLeaf, false /* isSimple */),
fArrayLength(arrayLength)
{
}
Expand Down Expand Up @@ -227,7 +230,7 @@ class RNTupleColumnReader : public ROOT::Detail::RDF::RColumnReaderBase {

RNTupleDS::~RNTupleDS() = default;

void RNTupleDS::AddField(const RNTupleDescriptor &desc, std::string_view colName, DescriptorId_t fieldId,
void RNTupleDS::AddField(const RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId,
std::vector<RNTupleDS::RFieldInfo> fieldInfos)
{
// As an example for the mapping of RNTuple fields to RDF columns, let's consider an RNTuple
Expand Down Expand Up @@ -262,14 +265,14 @@ void RNTupleDS::AddField(const RNTupleDescriptor &desc, std::string_view colName

const auto &fieldDesc = desc.GetFieldDescriptor(fieldId);
const auto &nRepetitions = fieldDesc.GetNRepetitions();
if ((fieldDesc.GetStructure() == ENTupleStructure::kCollection) || (nRepetitions > 0)) {
if ((fieldDesc.GetStructure() == ROOT::ENTupleStructure::kCollection) || (nRepetitions > 0)) {
// The field is a collection or a fixed-size array.
// We open a new collection scope with fieldID being the inner most collection. E.g. for "event.tracks.hits",
// fieldInfos would already contain the fieldID of "event.tracks"
fieldInfos.emplace_back(fieldId, nRepetitions);
}

if (fieldDesc.GetStructure() == ENTupleStructure::kCollection) {
if (fieldDesc.GetStructure() == ROOT::ENTupleStructure::kCollection) {
// Inner fields of collections are provided as projected collections of only that inner field,
// E.g. we provide a projected collection RVec<RVec<float>> for "event.tracks.hits.x" in the example
// above.
Expand Down Expand Up @@ -299,7 +302,7 @@ void RNTupleDS::AddField(const RNTupleDescriptor &desc, std::string_view colName
const auto &f = *desc.GetFieldIterable(fieldDesc.GetId()).begin();
AddField(desc, colName, f.GetId(), fieldInfos);
return;
} else if (fieldDesc.GetStructure() == ENTupleStructure::kRecord) {
} else if (fieldDesc.GetStructure() == ROOT::ENTupleStructure::kRecord) {
// Inner fields of records are provided as individual RDF columns, e.g. "event.id"
for (const auto &f : desc.GetFieldIterable(fieldDesc.GetId())) {
auto innerName = colName.empty() ? f.GetFieldName() : (std::string(colName) + "." + f.GetFieldName());
Expand Down
1 change: 0 additions & 1 deletion tree/ntuple/v7/doc/Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ and it provides the `Merge(...)` interface for the `TFileMerger`.
The page source and sink can read and write pages and clusters from and to a storage backend.
There are concrete class implementations for an RNTuple stored in a ROOT file (local or remote), and for an RNTuple stored in a DAOS object store.
There is a virtual page sink for buffered writes, which also groups pages of the same column before flushing them to disk.
There is a virtual page source for aligned friend datasets (horizontal data combination).

Page sources and sinks do not operate entry-based but based on pages/indices of columns.
For instance, there is no API in the page sink to write an entry, but only to write pages of columns.
Expand Down
24 changes: 12 additions & 12 deletions tree/ntuple/v7/inc/ROOT/RCluster.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ public:
/// On-disk pages within a page source are identified by the column and page number. The key is used for
/// associative collections of on-disk pages.
struct Key {
DescriptorId_t fPhysicalColumnId;
ROOT::DescriptorId_t fPhysicalColumnId;
std::uint64_t fPageNo;
Key(DescriptorId_t columnId, std::uint64_t pageNo) : fPhysicalColumnId(columnId), fPageNo(pageNo) {}
Key(ROOT::DescriptorId_t columnId, std::uint64_t pageNo) : fPhysicalColumnId(columnId), fPageNo(pageNo) {}
friend bool operator ==(const Key &lhs, const Key &rhs) {
return lhs.fPhysicalColumnId == rhs.fPhysicalColumnId && lhs.fPageNo == rhs.fPageNo;
}
Expand All @@ -79,9 +79,9 @@ struct hash<ROOT::Experimental::Internal::ROnDiskPage::Key> {
// TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
size_t operator()(const ROOT::Experimental::Internal::ROnDiskPage::Key &key) const
{
return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fPhysicalColumnId) ^
(hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >>
1);
return (
(std::hash<ROOT::DescriptorId_t>()(key.fPhysicalColumnId) ^ (hash<ROOT::NTupleSize_t>()(key.fPageNo) << 1)) >>
1);
}
};
}
Expand Down Expand Up @@ -151,16 +151,16 @@ Binds together several page maps that represent all the pages of certain columns
// clang-format on
class RCluster {
public:
using ColumnSet_t = std::unordered_set<DescriptorId_t>;
using ColumnSet_t = std::unordered_set<ROOT::DescriptorId_t>;
/// The identifiers that specifies the content of a (partial) cluster
struct RKey {
DescriptorId_t fClusterId = kInvalidDescriptorId;
ROOT::DescriptorId_t fClusterId = ROOT::kInvalidDescriptorId;
ColumnSet_t fPhysicalColumnSet;
};

protected:
/// References the cluster identifier in the page source that created the cluster
DescriptorId_t fClusterId;
ROOT::DescriptorId_t fClusterId;
/// Multiple page maps can be combined in a single RCluster
std::vector<std::unique_ptr<ROnDiskPageMap>> fPageMaps;
/// Set of the (complete) columns represented by the RCluster
Expand All @@ -169,7 +169,7 @@ protected:
std::unordered_map<ROnDiskPage::Key, ROnDiskPage> fOnDiskPages;

public:
explicit RCluster(DescriptorId_t clusterId) : fClusterId(clusterId) {}
explicit RCluster(ROOT::DescriptorId_t clusterId) : fClusterId(clusterId) {}
RCluster(const RCluster &other) = delete;
RCluster(RCluster &&other) = default;
RCluster &operator =(const RCluster &other) = delete;
Expand All @@ -189,12 +189,12 @@ public:
/// Marks the column as complete; must be done for all columns, even empty ones without associated pages,
/// before the cluster is given from the page storage to the cluster pool. Marking the available columns is
/// typically the last step of RPageSouce::LoadCluster().
void SetColumnAvailable(DescriptorId_t physicalColumnId);
void SetColumnAvailable(ROOT::DescriptorId_t physicalColumnId);
const ROnDiskPage *GetOnDiskPage(const ROnDiskPage::Key &key) const;

DescriptorId_t GetId() const { return fClusterId; }
ROOT::DescriptorId_t GetId() const { return fClusterId; }
const ColumnSet_t &GetAvailPhysicalColumns() const { return fAvailPhysicalColumns; }
bool ContainsColumn(DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
bool ContainsColumn(ROOT::DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
}; // class RCluster

Expand Down
6 changes: 3 additions & 3 deletions tree/ntuple/v7/inc/ROOT/RClusterPool.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ private:
std::thread fThreadIo;

/// Every cluster id has at most one corresponding RCluster pointer in the pool
RCluster *FindInPool(DescriptorId_t clusterId) const;
RCluster *FindInPool(ROOT::DescriptorId_t clusterId) const;
/// Returns an index of an unused element in fPool; callers of this function (GetCluster() and WaitFor())
/// make sure that a free slot actually exists
size_t FindFreeSlot() const;
Expand All @@ -115,7 +115,7 @@ private:
/// Returns the given cluster from the pool, which needs to contain at least the columns `physicalColumns`.
/// Executed at the end of GetCluster when all missing data pieces have been sent to the load queue.
/// Ideally, the function returns without blocking if the cluster is already in the pool.
RCluster *WaitFor(DescriptorId_t clusterId, const RCluster::ColumnSet_t &physicalColumns);
RCluster *WaitFor(ROOT::DescriptorId_t clusterId, const RCluster::ColumnSet_t &physicalColumns);

public:
static constexpr unsigned int kDefaultClusterBunchSize = 1;
Expand All @@ -131,7 +131,7 @@ public:
/// `physicalColumns` and possibly pages of other columns, too. If implicit multi-threading is turned on, the
/// uncompressed pages of the returned cluster are already pushed into the page pool associated with the page source
/// upon return. The cluster remains valid until the next call to GetCluster().
RCluster *GetCluster(DescriptorId_t clusterId, const RCluster::ColumnSet_t &physicalColumns);
RCluster *GetCluster(ROOT::DescriptorId_t clusterId, const RCluster::ColumnSet_t &physicalColumns);

/// Used by the unit tests to drain the queue of clusters to be preloaded
void WaitForInFlightClusters();
Expand Down
Loading
Loading