Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion tree/dataframe/inc/ROOT/RDF/RInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1523,7 +1523,17 @@ public:
// RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
RDFInternal::RemoveDuplicates(columnNames);

auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
std::vector<std::string> selectedColumns;
try {
selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
}
catch (const std::runtime_error &e){
// No columns were found, try again but consider all input data source columns
if (auto ds = GetDataSource())
selectedColumns = RDFInternal::ConvertRegexToColumns(ds->GetColumnNames(), columnNameRegexp, "Snapshot");
else
throw e;
}

if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") {
RDFInternal::RemoveRNTupleSubFields(selectedColumns);
Expand Down
1 change: 1 addition & 0 deletions tree/dataframe/inc/ROOT/RDF/RInterfaceBase.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ public:
RInterfaceBase(RDFDetail::RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister);

ColumnNames_t GetColumnNames();
ColumnNames_t GetDatasetTopLevelFieldNames();

std::string GetColumnType(std::string_view column);

Expand Down
2 changes: 2 additions & 0 deletions tree/dataframe/inc/ROOT/RNTupleDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
std::unordered_map<ROOT::DescriptorId_t, std::string> fFieldId2QualifiedName;
std::vector<std::string> fColumnNames;
std::vector<std::string> fColumnTypes;
std::vector<std::string> fTopLevelFieldNames;
/// List of column readers returned by GetColumnReaders() organized by slot. Used to reconnect readers
/// to new page sources when the files in the chain change.
std::vector<std::vector<ROOT::Internal::RDF::RNTupleColumnReader *>> fActiveColumnReaders;
Expand Down Expand Up @@ -222,6 +223,7 @@ public:
void SetNSlots(unsigned int nSlots) final;
std::size_t GetNFiles() const final { return fFileNames.empty() ? 1 : fFileNames.size(); }
const std::vector<std::string> &GetColumnNames() const final { return fColumnNames; }
const std::vector<std::string> &GetTopLevelFieldNames() const final { return fTopLevelFieldNames; }
bool HasColumn(std::string_view colName) const final;
std::string GetTypeName(std::string_view colName) const final;
std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
Expand Down
22 changes: 22 additions & 0 deletions tree/dataframe/src/RInterfaceBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,28 @@ ROOT::RDF::ColumnNames_t ROOT::RDF::RInterfaceBase::GetColumnNames()
return ret;
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Retrieve the names of top-level field names
///
/// For data sources that support hierarchical dataset schemas, such as TTree
/// or RNTuple, this function will retrieve the names of top-level fields. For
/// example, if the schema contains a user class with a data member, only
/// the name of the top-level field containing the user class object would be
/// reported, but not the name of the data member sub-field.
///
/// For all other data sources, returns the list of all available dataset columns.
ROOT::RDF::ColumnNames_t ROOT::RDF::RInterfaceBase::GetDatasetTopLevelFieldNames()
{
ROOT::RDF::ColumnNames_t ret;
if (auto ds = GetDataSource()) {
ret = ROOT::Internal::RDF::GetTopLevelFieldNames(*ds);
// Sorting to be consistent with GetColumnNames
std::sort(ret.begin(), ret.end());
}

return ret;
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Return the type of a given column as a string.
/// \return the type of the required column.
Expand Down
6 changes: 6 additions & 0 deletions tree/dataframe/src/RNTupleDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ ROOT::RDF::RNTupleDS::RNTupleDS(std::unique_ptr<ROOT::Internal::RPageSource> pag

AddField(fPrincipalDescriptor, "", fPrincipalDescriptor.GetFieldZeroId(),
std::vector<ROOT::RDF::RNTupleDS::RFieldInfo>());

auto topLevelFields = fPrincipalDescriptor.GetTopLevelFields();
const auto nTopLevelFields = std::distance(topLevelFields.begin(), topLevelFields.end());
fTopLevelFieldNames.reserve(nTopLevelFields);
for (const auto &field : topLevelFields)
fTopLevelFieldNames.push_back(field.GetFieldName());
}

namespace {
Expand Down
12 changes: 12 additions & 0 deletions tree/dataframe/test/datasource_ntuple.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -828,3 +828,15 @@ TEST(RNTupleDS, Int8)
std::vector<std::int8_t> expected{0, 1, 2, 3, 4};
EXPECT_EQ(expected, df.Take<std::int8_t>("x").GetValue());
}

TEST_F(RNTupleDSTest, GetTopLevelFieldNames)
{
ROOT::RDataFrame df{fNtplName, fFileName};

EXPECT_VEC_EQ(
df.GetDatasetTopLevelFieldNames(),
std::vector<std::string>{"VecElectron", "electron", "energy", "jets", "nElectron", "nnlo", "pt", "rvec", "tag"});
EXPECT_VEC_EQ(df.GetColumnNames(),
std::vector<std::string>{"VecElectron", "VecElectron.pt", "electron", "electron.pt", "energy", "jets",
"nElectron", "nnlo", "pt", "rvec", "tag"});
}
9 changes: 9 additions & 0 deletions tree/dataframe/test/datasource_tree.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ TEST(RTTreeDS, BranchWithNestedSameName)
expect_vec_eq(branchNames, expectedBranchNames);
}

TEST(RTTreeDS, GetDatasetTopLevelFieldNames)
{
InputTreeRAII dataset{};

ROOT::RDataFrame df{dataset.fTreeName, dataset.fFileName};
auto branchNames = df.GetDatasetTopLevelFieldNames();
expect_vec_eq(branchNames, std::vector<std::string>{"toplevel"});
}

#ifdef R__USE_IMT
struct Dataset20164RAIII {
const char *fTreeName{"tree_20164"};
Expand Down
Loading