Skip to content

Commit

Permalink
[ntuple] RNTupleMerger: skip auxiliary missing columns
Browse files Browse the repository at this point in the history
  • Loading branch information
silverweed committed Sep 20, 2024
1 parent 4899fec commit 79e97d8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
2 changes: 1 addition & 1 deletion tree/ntuple/v7/inc/ROOT/RNTupleMerger.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class RNTupleMerger final {

public:
RNTupleMerger();

/// Merge a given set of sources into the destination.
RResult<void> Merge(std::span<RPageSource *> sources, RPageSink &destination,
const RNTupleMergeOptions &mergeOpts = RNTupleMergeOptions());
Expand Down
5 changes: 2 additions & 3 deletions tree/ntuple/v7/inc/ROOT/RPageStorage.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -707,9 +707,8 @@ public:
/// Helper for unstreaming a page. This is commonly used in derived, concrete page sources. The implementation
/// currently always makes a memory copy, even if the sealed page is uncompressed and in the final memory layout.
/// The optimization of directly mapping pages is left to the concrete page source implementations.
RResult<RPage>
static UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element, DescriptorId_t physicalColumnId,
RPageAllocator &pageAlloc);
RResult<RPage> static UnsealPage(const RSealedPage &sealedPage, const RColumnElementBase &element,
DescriptorId_t physicalColumnId, RPageAllocator &pageAlloc);

EPageStorageType GetType() final { return EPageStorageType::kSource; }
const RNTupleReadOptions &GetReadOptions() const { return fOptions; }
Expand Down
26 changes: 16 additions & 10 deletions tree/ntuple/v7/src/RNTupleMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,13 @@ CompareDescriptorStructure(const RNTupleDescriptor &dst, const RNTupleDescriptor
errors.push_back(ss.str());
} else if (field.fSrc->IsProjectedField()) {
// if both fields are projected, verify that they point to the same real field
// FIXME: we cannot compare the ids as they belong to different RNTuples.
// Check the real field's FQ name instead.
const auto srcId = field.fSrc->GetProjectionSourceId();
const auto dstId = field.fDst->GetProjectionSourceId();
if (srcId != dstId) {
const auto srcName = src.GetQualifiedFieldName(field.fSrc->GetProjectionSourceId());
const auto dstName = dst.GetQualifiedFieldName(field.fDst->GetProjectionSourceId());
if (srcName != dstName) {
std::stringstream ss;
ss << "Field `" << fieldName
<< "` is projected to a different field than a previously-seen field with the same name (old: " << dstId
<< ", new: " << srcId << ")";
<< "` is projected to a different field than a previously-seen field with the same name (old: " << dstName
<< ", new: " << srcName << ")";
errors.push_back(ss.str());
}
}
Expand Down Expand Up @@ -331,6 +329,9 @@ CompareDescriptorStructure(const RNTupleDescriptor &dst, const RNTupleDescriptor
res.fCommonFields.emplace_back(srcField);
}

// TODO(gparolini): we should exhaustively check the field tree rather than just the top level fields,
// in case the user forgets to change the version number on one field.

return RResult(res);
}

Expand Down Expand Up @@ -377,8 +378,9 @@ void RNTupleMerger::MergeCommonColumns(RClusterPool &clusterPool, DescriptorId_t
return;

const RCluster *cluster = clusterPool.GetCluster(clusterId, commonColumnSet);
if (!cluster)
return;
// we expect the cluster pool to contain the requested set of columns, since they were
// validated by CompareDescriptorStructures().
assert(cluster);

const auto &clusterDesc = mergeData.fSrcDescriptor->GetClusterDescriptor(clusterId);

Expand Down Expand Up @@ -466,10 +468,14 @@ static void GenerateExtraDstColumns(size_t nClusterEntries, std::span<RColumnInf
for (const auto &column : extraDstColumns) {
const auto &columnId = column.fInputId;
const auto &columnDesc = mergeData.fDstDescriptor.GetColumnDescriptor(columnId);
const RFieldDescriptor *field = column.fParentField;

// Skip all auxiliary columns
if (field->GetLogicalColumnIds()[0] != columnId)
continue;

// Check if this column is a child of a Collection or a Variant. If so, it has no data
// and can be skipped.
const RFieldDescriptor *field = column.fParentField;
bool skipColumn = false;
auto nRepetitions = std::max<std::uint64_t>(field->GetNRepetitions(), 1);
for (auto parentId = field->GetParentId(); parentId != kInvalidDescriptorId;) {
Expand Down

0 comments on commit 79e97d8

Please sign in to comment.