diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h new file mode 100644 index 0000000000000..ef4105f4195e6 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h @@ -0,0 +1,17 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageDevice_h +#define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageDevice_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageHost.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using SiPixelImageDevice = PortableCollection; + using SiPixelImageMorphDevice = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelImageDevice, SiPixelImageHost) +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelImageMorphDevice, SiPixelImageMorphHost) + +#endif // RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageDevice_h diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageHost.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageHost.h new file mode 100644 index 0000000000000..811cc63951deb --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageHost.h @@ -0,0 +1,10 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageHost_h +#define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageHost_h + +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +using SiPixelImageHost = PortableHostCollection; +using SiPixelImageMorphHost = PortableHostCollection; + +#endif // RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageHost_h diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h new file mode 100644 index 0000000000000..a0bf2e7105161 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h @@ -0,0 +1,23 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageSoA_h +#define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageSoA_h + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +using SiPixelImage = std::array, 418>; +using SiPixelImageMorph = std::array, 420>; + +GENERATE_SOA_LAYOUT(SiPixelImageLayout, SOA_COLUMN(SiPixelImage, clus)) + +GENERATE_SOA_LAYOUT(SiPixelImageMorphLayout, SOA_COLUMN(SiPixelImageMorph, clus)) + +using SiPixelImageSoA = SiPixelImageLayout<>; +using SiPixelImageSoAView = SiPixelImageSoA::View; +using SiPixelImageSoAConstView = SiPixelImageSoA::ConstView; + +using SiPixelImageMorphSoA = SiPixelImageMorphLayout<>; +using SiPixelImageMorphSoAView = SiPixelImageMorphSoA::View; +using SiPixelImageMorphSoAConstView = SiPixelImageMorphSoA::ConstView; + +#endif // RecoLocalTracker_SiPixelClusterizer_interface_SiPixelImageDevice_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 83bdae62636e0..797f948314b80 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -9,6 +9,8 @@ + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index a8c89f7484722..fdc3c9a7bb5cc 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -16,6 +16,9 @@ #include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h" +#include "SiPixelMorphingConfig.h" //#define GPU_DEBUG @@ -29,7 +32,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { // Phase-1 pixel modules constexpr uint32_t pixelSizeX = pixelTopology::Phase1::numRowsInModule; constexpr uint32_t pixelSizeY = pixelTopology::Phase1::numColsInModule; - + constexpr uint16_t empVal = std::numeric_limits::max() - + 2; // TODO: Move this to DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h + constexpr uint16_t fakeVal = std::numeric_limits::max() - + 4; // TODO: Move this to DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h + constexpr uint16_t eroded = std::numeric_limits::max() - + 3; // TODO: Move this to DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h // Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; @@ -90,6 +98,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } // namespace pixelStatus + ALPAKA_FN_ACC + bool isMorphingModule(uint32_t moduleId, const uint32_t* morphingModules, uint32_t nMorphingModules) { + for (uint32_t i = 0; i < nMorphingModules; ++i) { + if (morphingModules[i] == moduleId) + return true; + } + return false; + } + template struct CountModules { ALPAKA_FN_ACC void operator()(Acc1D const& acc, @@ -128,7 +145,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } }; - template + template struct FindClus { // assume that we can cover the whole module with up to 16 blockDimension-wide iterations static constexpr uint32_t maxIterGPU = 16; @@ -139,16 +156,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { ALPAKA_FN_ACC void operator()(Acc1D const& acc, SiPixelDigisSoAView digi_view, + typename ImageType::View images, + int offset, + int32_t* kernel1, + int32_t* kernel2, + uint32_t* morphingModules, + uint32_t nMorphingModules, SiPixelClustersSoAView clus_view, const unsigned int numElements) const { static_assert(TrackerTraits::numberOfModules < ::pixelClustering::maxNumModules); auto& lastPixel = alpaka::declareSharedVar(acc); + // block id, used to choose the image to use as scratch space + int block = alpaka::getIdx(acc)[0]; + auto image = images[block]; + const uint32_t lastModule = clus_view[0].moduleStart(); for (uint32_t module : cms::alpakatools::independent_groups(acc, lastModule)) { auto firstPixel = clus_view[1 + module].moduleStart(); uint32_t thisModuleId = digi_view[firstPixel].moduleId(); + uint32_t rawModuleId = digi_view[firstPixel].rawIdArr(); ALPAKA_ASSERT_ACC(thisModuleId < TrackerTraits::numberOfModules); #ifdef GPU_DEBUG @@ -176,19 +204,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } - using Hist = cms::alpakatools::HistoContainer; - constexpr int warpSize = cms::alpakatools::warpSize; - auto& hist = alpaka::declareSharedVar(acc); - auto& ws = alpaka::declareSharedVar(acc); - for (uint32_t j : cms::alpakatools::independent_group_elements(acc, Hist::totbins())) { - hist.off[j] = 0; - } - alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_ACC((lastPixel == numElements) or ((lastPixel < numElements) and (digi_view[lastPixel].moduleId() != thisModuleId))); // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) @@ -204,13 +219,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { alpaka::syncBlockThreads(acc); ALPAKA_ASSERT_ACC(lastPixel - firstPixel <= TrackerTraits::maxPixInModule); -#ifdef GPU_DEBUG - auto& totGood = alpaka::declareSharedVar(acc); - totGood = 0; + uint32_t imageSizeX = pixelStatus::pixelSizeX + offset * 2; + uint32_t imageSizeY = pixelStatus::pixelSizeY + offset * 2; + uint32_t imgsize = imageSizeX * imageSizeY; + uint32_t pixsize = pixelStatus::pixelSizeX * pixelStatus::pixelSizeY; + for (uint32_t j : cms::alpakatools::independent_group_elements(acc, imgsize)) { + uint16_t row = j / imageSizeY; + uint16_t col = j % imageSizeY; + image.clus()[col][row] = pixelStatus::empVal; + } alpaka::syncBlockThreads(acc); -#endif // remove duplicate pixels +#if 0 + constexpr bool isPhase2 = std::is_base_of::value; + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + continue; + int fpX = digi_view[i].xx() + offset; + int fpY = digi_view[i].yy() + offset; + if constexpr (not isPhase2) { + int32_t kEmp = pixelStatus::empVal; + int32_t old_value = alpaka::atomicCas(acc, &images[module].clus()[fpY][fpX], kEmp, digi_view[i].clus()); + if (old_value != pixelStatus::empVal) { + digi_view[i].moduleId() = ::pixelClustering::invalidModuleId; + digi_view[i].rawIdArr() = 0; + } + } else { + images[module].clus()[fpY][fpX] = digi_view[i].clus(); + } + } + alpaka::syncBlockThreads(acc); +#endif + constexpr bool isPhase2 = std::is_base_of::value; if constexpr (not isPhase2) { // packed words array used to store the pixelStatus of each pixel @@ -222,7 +263,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } alpaka::syncBlockThreads(acc); - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel - 1)) { + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { // skip invalid pixels if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) continue; @@ -230,188 +271,112 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } alpaka::syncBlockThreads(acc); - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel - 1)) { + alpaka::syncBlockThreads(acc); + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { // skip invalid pixels if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) continue; + if (pixelStatus::isDuplicate(status, digi_view[i].xx(), digi_view[i].yy())) { digi_view[i].moduleId() = ::pixelClustering::invalidModuleId; digi_view[i].rawIdArr() = 0; + continue; } + int fpX = digi_view[i].xx() + offset; + int fpY = digi_view[i].yy() + offset; + image.clus()[fpY][fpX] = static_cast(digi_view[i].clus() - firstPixel); } alpaka::syncBlockThreads(acc); } - } - - // fill histo - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { - // skip invalid pixels - if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { - hist.count(acc, digi_view[i].yy()); -#ifdef GPU_DEBUG - alpaka::atomicAdd(acc, &totGood, 1u, alpaka::hierarchy::Blocks{}); -#endif + } else { + auto& clusterCounter = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + clusterCounter = 0; } - } - alpaka::syncBlockThreads(acc); // FIXME this can be removed - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, warpSize)) { - ws[i] = 0; // used by prefix scan... - } - alpaka::syncBlockThreads(acc); - hist.finalize(acc, ws); - alpaka::syncBlockThreads(acc); -#ifdef GPU_DEBUG - ALPAKA_ASSERT_ACC(hist.size() == totGood); - if (thisModuleId % 100 == 1) - if (cms::alpakatools::once_per_block(acc)) - printf("histo size %d\n", hist.size()); -#endif - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { - // skip invalid pixels - if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { - hist.fill(acc, digi_view[i].yy(), i - firstPixel); + alpaka::syncBlockThreads(acc); + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + continue; + int fpX = digi_view[i].xx() + offset; + int fpY = digi_view[i].yy() + offset; + int32_t uniqueClusterId = + alpaka::atomicInc(acc, &clusterCounter, int(0xFFFFFFFF), alpaka::hierarchy::Blocks{}); + ALPAKA_ASSERT_ACC(uniqueClusterId < static_cast(std::numeric_limits::max() - 5)); + image.clus()[fpY][fpX] = static_cast(uniqueClusterId); } } -#ifdef GPU_DEBUG - // look for anomalous high occupancy - auto& n40 = alpaka::declareSharedVar(acc); - auto& n60 = alpaka::declareSharedVar(acc); - if (cms::alpakatools::once_per_block(acc)) { - n40 = 0; - n60 = 0; - } - alpaka::syncBlockThreads(acc); - for (uint32_t j : cms::alpakatools::independent_group_elements(acc, Hist::nbins())) { - if (hist.size(j) > 60) - alpaka::atomicAdd(acc, &n60, 1u, alpaka::hierarchy::Blocks{}); - if (hist.size(j) > 40) - alpaka::atomicAdd(acc, &n40, 1u, alpaka::hierarchy::Blocks{}); - } - alpaka::syncBlockThreads(acc); - if (cms::alpakatools::once_per_block(acc)) { - if (n60 > 0) - printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); - else if (n40 > 0) - printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); - } - alpaka::syncBlockThreads(acc); -#endif - - [[maybe_unused]] const uint32_t blockDimension = alpaka::getWorkDiv(acc)[0u]; - // assume that we can cover the whole module with up to maxIterGPU blockDimension-wide iterations - ALPAKA_ASSERT_ACC((hist.size() / blockDimension) < maxIterGPU); - - // number of elements per thread - constexpr uint32_t maxElements = - cms::alpakatools::requires_single_thread_per_block_v ? maxElementsPerBlock : 1; - ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0u] <= maxElements)); - - constexpr unsigned int maxIter = maxIterGPU * maxElements; - - // nearest neighbours (nn) - // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event - constexpr int maxNeighbours = 10; - uint16_t nn[maxIter][maxNeighbours]; - uint8_t nnn[maxIter]; // number of nn - for (uint32_t k = 0; k < maxIter; ++k) { - nnn[k] = 0; - } - - alpaka::syncBlockThreads(acc); // for hit filling! - - // fill the nearest neighbours - uint32_t k = 0; - for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_ACC(k < maxIter); - auto p = hist.begin() + j; - auto i = *p + firstPixel; - ALPAKA_ASSERT_ACC(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); - ALPAKA_ASSERT_ACC(digi_view[i].moduleId() == thisModuleId); // same module - auto bin = Hist::bin(digi_view[i].yy() + 1); - auto end = hist.end(bin); - ++p; - ALPAKA_ASSERT_ACC(0 == nnn[k]); - for (; p < end; ++p) { - auto m = *p + firstPixel; - ALPAKA_ASSERT_ACC(m != i); - ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); - ALPAKA_ASSERT_ACC(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); - if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { - auto l = nnn[k]++; - ALPAKA_ASSERT_ACC(l < maxNeighbours); - nn[k][l] = *p; + if (offset > 1 && isMorphingModule(rawModuleId, morphingModules, nMorphingModules)) { + uint32_t morphingsize = (pixelStatus::pixelSizeX + 2) * (pixelStatus::pixelSizeY + 2); + //Morphing: Dilation + for (uint32_t j : cms::alpakatools::independent_group_elements(acc, morphingsize)) { + uint16_t row = j / (pixelStatus::pixelSizeY + 2) + 1; + uint16_t col = j % (pixelStatus::pixelSizeY + 2) + 1; + for (int i = 0; i < 3 && image.clus()[col][row] == pixelStatus::empVal; i++) { + for (int jj = 0; jj < 3; jj++) { + if (image.clus()[col + i - 1][row + jj - 1] != pixelStatus::empVal && + image.clus()[col + i - 1][row + jj - 1] != pixelStatus::fakeVal && kernel1[i * 3 + jj]) { + image.clus()[col][row] = pixelStatus::fakeVal; + } + } + } + } + alpaka::syncBlockThreads(acc); + //Morphing: Erosion + for (uint32_t j : cms::alpakatools::independent_group_elements(acc, morphingsize)) { + uint16_t row = j / (pixelStatus::pixelSizeY + 2) + 1; + uint16_t col = j % (pixelStatus::pixelSizeY + 2) + 1; + for (int i = 0; i < 3 && image.clus()[col][row] == pixelStatus::fakeVal; i++) { + for (int jj = 0; jj < 3; jj++) { + if (image.clus()[col + i - 1][row + jj - 1] == pixelStatus::empVal && kernel2[i * 3 + jj]) { + image.clus()[col][row] = pixelStatus::eroded; + break; + } + } } } - ++k; + alpaka::syncBlockThreads(acc); } - // for each pixel, look at all the pixels until the end of the module; // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; // after the loop, all the pixel in each cluster should have the id equeal to the lowest // pixel in the cluster ( clus[i] == i ). bool more = true; - /* - int nloops = 0; - */ + //Clustering while (alpaka::syncBlockThreadsPredicate(acc, more)) { - /* - if (nloops % 2 == 0) { - // even iterations of the outer loop - */ more = false; - uint32_t k = 0; - for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - ALPAKA_ASSERT_ACC(k < maxIter); - auto p = hist.begin() + j; - auto i = *p + firstPixel; - for (int kk = 0; kk < nnn[k]; ++kk) { - auto l = nn[k][kk]; - auto m = l + firstPixel; - ALPAKA_ASSERT_ACC(m != i); - // FIXME ::Threads ? - auto old = alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); - if (old != digi_view[i].clus()) { - // end the loop only if no changes were applied - more = true; + for (uint32_t j : cms::alpakatools::independent_group_elements(acc, pixsize)) { + uint16_t row = j / pixelStatus::pixelSizeY + offset; + uint16_t col = j % pixelStatus::pixelSizeY + offset; + if (image.clus()[col][row] >= pixelStatus::eroded) { + continue; + } + int32_t tmp = pixelStatus::empVal; + + for (int kk = -1; kk < 2; kk++) { + for (int jj = -1; jj < 2; jj++) { + int32_t clus = image.clus()[col + kk][row + jj]; + tmp = alpaka::math::min(acc, tmp, clus); } - // FIXME ::Threads ? - alpaka::atomicMin(acc, &digi_view[i].clus(), old, alpaka::hierarchy::Blocks{}); - } // neighbours loop - ++k; - } // pixel loop - /* - // use the outer loop to force a synchronisation - } else { - // odd iterations of the outer loop - */ - alpaka::syncBlockThreads(acc); - for (uint32_t j : cms::alpakatools::independent_group_elements(acc, hist.size())) { - auto p = hist.begin() + j; - auto i = *p + firstPixel; - auto m = digi_view[i].clus(); - while (m != digi_view[m].clus()) - m = digi_view[m].clus(); - digi_view[i].clus() = m; - } - /* } - ++nloops; - */ + if (image.clus()[col][row] != tmp) { + image.clus()[col][row] = tmp; + more = true; + } + } + alpaka::syncBlockThreads(acc); } // end while - /* - // check that all threads in the block have executed the same number of iterations - auto& n0 = alpaka::declareSharedVar(acc); - if (cms::alpakatools::once_per_block(acc)) - n0 = nloops; - alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_ACC(alpaka::syncBlockThreadsPredicate(acc, nloops == n0)); - if (thisModuleId % 100 == 1) - if (cms::alpakatools::once_per_block(acc)) - printf("# loops %d\n", nloops); - */ + alpaka::syncBlockThreads(acc); + for (uint32_t i : cms::alpakatools::independent_group_elements(acc, firstPixel, lastPixel)) { + int fpX = digi_view[i].xx() + offset; + int fpY = digi_view[i].yy() + offset; + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + continue; + digi_view[i].clus() = image.clus()[fpY][fpX] + firstPixel; + } + alpaka::syncBlockThreads(acc); auto& foundClusters = alpaka::declareSharedVar(acc); foundClusters = 0; alpaka::syncBlockThreads(acc); @@ -451,21 +416,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelClustering { } } alpaka::syncBlockThreads(acc); - if (cms::alpakatools::once_per_block(acc)) { clus_view[thisModuleId].clusInModule() = foundClusters; clus_view[module].moduleId() = thisModuleId; -#ifdef GPU_DEBUG - if (foundClusters > gMaxHit) { - gMaxHit = foundClusters; - if (foundClusters > 8) - printf("max hit %d in %d\n", foundClusters, thisModuleId); - } - if (thisModuleId % 100 == 1) - printf("%d clusters in module %d\n", foundClusters, thisModuleId); -#endif } - } // module loop + + alpaka::syncBlockThreads(acc); + + } // block loop } }; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h new file mode 100644 index 0000000000000..2689293bc8df2 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelMorphingConfig.h @@ -0,0 +1,13 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h + +#include +#include + +struct SiPixelMorphingConfig { + std::vector morphingModules; + std::array kernel1; + std::array kernel2; +}; + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_SiPixelMorphingConfig_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc index b687a8f02a465..5c05fc373ebaf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc @@ -4,6 +4,7 @@ #include #include #include +#include #include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" #include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" @@ -35,8 +36,18 @@ #include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h" +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "DataFormats/DetId/interface/DetId.h" #include "SiPixelRawToClusterKernel.h" +#include "SiPixelMorphingConfig.h" + +typedef std::pair range; +typedef std::vector region; namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -52,6 +63,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + void beginRun(edm::Run const& iRun, edm::EventSetup const& iSetup) override; + std::vector parseRegions(const std::vector& regionStrings, size_t size); + bool skipDetId(const TrackerTopology* tTopo, + const DetId& detId, + const std::vector& theBarrelRegions, + const std::vector& theEndcapRegions) const; edm::EDGetTokenT rawGetToken_; edm::EDPutTokenT fmtErrorToken_; @@ -63,6 +80,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const device::ESGetToken mapToken_; const device::ESGetToken gainsToken_; const edm::ESGetToken cablingMapToken_; + const edm::ESGetToken cablingMapTokenBeginRun_; std::unique_ptr cabling_; std::vector fedIds_; @@ -74,9 +92,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const bool includeErrors_; const bool useQuality_; + const bool doDigiMorphing_; const bool verbose_; uint32_t nDigis_; const SiPixelClusterThresholds clusterThresholds_; + SiPixelMorphingConfig digiMorphingConfig_; + edm::ESGetToken trackerTopologyToken_; + + const std::vector theBarrelRegions_; + const std::vector theEndcapRegions_; }; template @@ -89,15 +113,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { gainsToken_(esConsumes()), cablingMapToken_(esConsumes( edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + cablingMapTokenBeginRun_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), includeErrors_(iConfig.getParameter("IncludeErrors")), useQuality_(iConfig.getParameter("UseQualityInfo")), + doDigiMorphing_(iConfig.getParameter("DoDigiMorphing")), verbose_(iConfig.getParameter("verbose")), clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), iConfig.getParameter("clusterThreshold_otherLayers"), static_cast(iConfig.getParameter("VCaltoElectronGain")), static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), static_cast(iConfig.getParameter("VCaltoElectronOffset")), - static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { + static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))}, + trackerTopologyToken_(esConsumes()), + theBarrelRegions_(parseRegions(iConfig.getParameter>("barrelRegions"), 3)), + theEndcapRegions_(parseRegions(iConfig.getParameter>("endcapRegions"), 4)) { if (includeErrors_) { digiErrorPutToken_ = produces(); fmtErrorToken_ = produces(); @@ -107,8 +137,109 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (!iConfig.getParameter("Regions").getParameterNames().empty()) { regions_ = std::make_unique(iConfig, consumesCollector()); } + if (doDigiMorphing_) { + edm::ParameterSet digiPSet = iConfig.getParameter("DigiMorphing"); + digiMorphingConfig_.kernel1 = digiPSet.getParameter>("kernel1"); + digiMorphingConfig_.kernel2 = digiPSet.getParameter>("kernel2"); + } + } + + template + std::vector SiPixelRawToCluster::parseRegions(const std::vector& regionStrings, + size_t size) { + std::vector regions; + + for (auto const& str : regionStrings) { + region reg; + + std::vector ranges; + boost::split(ranges, str, boost::is_any_of(",")); + + if (ranges.size() != size) { + throw cms::Exception("Configuration") << "[SiPixelDigiMorphing]:" + << " invalid number of coordinates provided in " << str << " (" << size + << " expected, " << ranges.size() << " provided)\n"; + } + + for (auto const& r : ranges) { + std::vector limits; + boost::split(limits, r, boost::is_any_of("-")); + + try { + // if range specified + if (limits.size() > 1) { + reg.push_back(std::make_pair(std::stoi(limits.at(0)), std::stoi(limits.at(1)))); + // otherwise store single value as a range + } else { + reg.push_back(std::make_pair(std::stoi(limits.at(0)), std::stoi(limits.at(0)))); + } + } catch (...) { + throw cms::Exception("Configuration") << "[SiPixelDigiMorphing]:" + << " invalid coordinate value provided in " << str << "\n"; + } + } + regions.push_back(reg); + } + + return regions; } + // apply regional digi morphing logic + template + bool SiPixelRawToCluster::skipDetId(const TrackerTopology* tTopo, + const DetId& detId, + const std::vector& theBarrelRegions, + const std::vector& theEndcapRegions) const { + // barrel + if (detId.subdetId() == static_cast(PixelSubdetector::PixelBarrel)) { + // no barrel region specified + if (theBarrelRegions.empty()) { + return true; + } else { + uint32_t layer = tTopo->pxbLayer(detId.rawId()); + uint32_t ladder = tTopo->pxbLadder(detId.rawId()); + uint32_t module = tTopo->pxbModule(detId.rawId()); + + bool inRegion = false; + + for (auto const& reg : theBarrelRegions) { + if ((layer >= reg.at(0).first && layer <= reg.at(0).second) && + (ladder >= reg.at(1).first && ladder <= reg.at(1).second) && + (module >= reg.at(2).first && module <= reg.at(2).second)) { + inRegion = true; + break; + } + } + + return !inRegion; + } + // endcap + } else { + // no endcap region specified + if (theEndcapRegions.empty()) { + return true; + } else { + uint32_t disk = tTopo->pxfDisk(detId.rawId()); + uint32_t blade = tTopo->pxfBlade(detId.rawId()); + uint32_t side = tTopo->pxfSide(detId.rawId()); + uint32_t panel = tTopo->pxfPanel(detId.rawId()); + + bool inRegion = false; + + for (auto const& reg : theEndcapRegions) { + if ((disk >= reg.at(0).first && disk <= reg.at(0).second) && + (blade >= reg.at(1).first && blade <= reg.at(1).second) && + (side >= reg.at(2).first && side <= reg.at(2).second) && + (panel >= reg.at(3).first && panel <= reg.at(3).second)) { + inRegion = true; + break; + } + } + + return !inRegion; + } + } + } template void SiPixelRawToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -125,6 +256,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { desc.add("VCaltoElectronGain_L1", 50.f); desc.add("VCaltoElectronOffset", -60.f); desc.add("VCaltoElectronOffset_L1", -670.f); + desc.add("DoDigiMorphing", false); desc.add("InputLabel", edm::InputTag("rawDataCollector")); { @@ -136,9 +268,41 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { desc.add("Regions", psd0) ->setComment("## Empty Regions PSet means complete unpacking"); } + { + edm::ParameterSetDescription psd1; + psd1.addOptional>("kernel1"); + psd1.addOptional>("kernel2"); + desc.add("DigiMorphing", psd1) + ->setComment("## Parameter settings for digi morphing to heal split clusters"); + } + + // LAYER,LADDER,MODULE (coordinates can also be specified as a range FIRST-LAST where appropriate) + desc.add>("barrelRegions", {"1,1-12,1-2", "1,1-12,7-8", "2,1-28,1", "2,1-28,8"}); + // DISK,BLADE,SIDE,PANEL (coordinates can also be specified as a range FIRST-LAST where appropriate) + desc.add>("endcapRegions", {}); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav descriptions.addWithDefaultLabel(desc); } + template + void SiPixelRawToCluster::beginRun(edm::Run const&, edm::EventSetup const& iSetup) { + // TODO: update the morphingModules only if the eventsetup configuration has changed, instead of for every run, using an ESWatcher: + // https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideHowToGetDataFromES#Getting_data_each_time_a_Record + const SiPixelFedCablingMap& cablingMapBeginRun_ = iSetup.getData(cablingMapTokenBeginRun_); + const TrackerTopology* tTopo = &iSetup.getData(trackerTopologyToken_); + + digiMorphingConfig_.morphingModules.clear(); + for (const auto& connection : cablingMapBeginRun_.det2fedMap()) { + auto rawId = connection.first; + if (rawId == 0) + continue; + + DetId detId(rawId); + if (!skipDetId(tTopo, detId, theBarrelRegions_, theEndcapRegions_)) { + digiMorphingConfig_.morphingModules.push_back(rawId); + } + } + } template void SiPixelRawToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { @@ -153,6 +317,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cabling_ = cablingMap_->cablingTree(); LogDebug("map version:") << cablingMap_->version(); } +#if 0 + if(!digiMorphingConfig_.morphingModules) + { + for (const auto& connection : cablingMap_->det2fedMap()) { + auto rawId = connection.first; + if (rawId == 0) continue; + + DetId detId(rawId); + if (!skipDetId(tTopo, detId, theBarrelRegions_, theEndcapRegions_)) { + digiMorphingConfig_.morphingModules.push_back(rawId); + } + } + } +#endif // if used, the buffer is guaranteed to stay alive until the after the execution of makePhase1ClustersAsync completes std::optional> modulesToUnpackRegional; @@ -245,17 +423,37 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { for (uint32_t i = 0; i < fedIds_.size(); ++i) { wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]); } - Algo_.makePhase1ClustersAsync(iEvent.queue(), - clusterThresholds_, - hMap.const_view(), - modulesToUnpack, - dGains.const_view(), - wordFedAppender, - wordCounter, - fedCounter, - useQuality_, - includeErrors_, - verbose_); + if (doDigiMorphing_) { + Algo_.template makePhase1ClustersAsync( + iEvent.queue(), + clusterThresholds_, + doDigiMorphing_, + &digiMorphingConfig_, // TODO it may be more efficient to pass these to the kernel by value + hMap.const_view(), + modulesToUnpack, + dGains.const_view(), + wordFedAppender, + wordCounter, + fedCounter, + useQuality_, + includeErrors_, + verbose_); + } else { + Algo_.template makePhase1ClustersAsync( + iEvent.queue(), + clusterThresholds_, + doDigiMorphing_, + &digiMorphingConfig_, // TODO it may be more efficient to pass these to the kernel by value + hMap.const_view(), + modulesToUnpack, + dGains.const_view(), + wordFedAppender, + wordCounter, + fedCounter, + useQuality_, + includeErrors_, + verbose_); + } } template diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 924b8dc9cbf0c..ce4b59f1b1888 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -29,6 +29,8 @@ #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h" #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h" // local includes #include "CalibPixel.h" @@ -442,9 +444,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // Interface to outside template + template void SiPixelRawToClusterKernel::makePhase1ClustersAsync( Queue &queue, const SiPixelClusterThresholds clusterThresholds, + bool doDigiMorphing, + const SiPixelMorphingConfig *digiMorphingConfig, const SiPixelMappingSoAConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainCalibrationForHLTSoAConstView &gains, @@ -522,14 +527,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace pixelClustering; // calibrations using namespace calibPixel; - const int threadsPerBlockOrElementsPerThread = []() { - if constexpr (std::is_same_v) { - // NB: MPORTANT: This could be tuned to benefit from innermost loop. - return 32; - } else { - return 256; - } - }(); + + const int threadsPerBlockOrElementsPerThread = + cms::alpakatools::requires_single_thread_per_block_v ? 32 : 256; const auto blocks = cms::alpakatools::divide_up_by(std::max(wordCounter, numberOfModules), threadsPerBlockOrElementsPerThread); const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); @@ -565,15 +565,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u); alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); - const auto elementsPerBlockFindClus = FindClus::maxElementsPerBlock; - const auto workDivMaxNumModules = - cms::alpakatools::make_workdiv(numberOfModules, elementsPerBlockFindClus); + const auto elementsPerBlockFindClus = FindClus::maxElementsPerBlock; #ifdef GPU_DEBUG std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus << " threadsPerBlockOrElementsPerThread\n"; #endif - alpaka::exec( - queue, workDivMaxNumModules, FindClus{}, digis_d->view(), clusters_d->view(), wordCounter); + int offset = int(doDigiMorphing) + 1; + + auto kernel1_d = cms::alpakatools::make_device_buffer(queue, digiMorphingConfig->kernel1.size()); + auto kernel2_d = cms::alpakatools::make_device_buffer(queue, digiMorphingConfig->kernel2.size()); + auto morphingModules_d = + cms::alpakatools::make_device_buffer(queue, digiMorphingConfig->morphingModules.size()); + auto kernel1_h = + cms::alpakatools::make_host_view(digiMorphingConfig->kernel1.data(), digiMorphingConfig->kernel1.size()); + auto kernel2_h = + cms::alpakatools::make_host_view(digiMorphingConfig->kernel2.data(), digiMorphingConfig->kernel2.size()); + auto morphingModules_h = cms::alpakatools::make_host_view(digiMorphingConfig->morphingModules.data(), + digiMorphingConfig->morphingModules.size()); + + alpaka::memcpy(queue, kernel1_d, kernel1_h); + alpaka::memcpy(queue, kernel2_d, kernel2_h); + alpaka::memcpy(queue, morphingModules_d, morphingModules_h); + + const uint32_t blocksFindClus = 64; // TODO arbitrary, to be optimised + const auto workDivMaxNumModules = + cms::alpakatools::make_workdiv(blocksFindClus, elementsPerBlockFindClus); + ImageType images(blocksFindClus, queue); + alpaka::exec(queue, + workDivMaxNumModules, + FindClus{}, + digis_d->view(), + images.view(), + offset, + kernel1_d.data(), + kernel2_d.data(), + morphingModules_d.data(), + digiMorphingConfig->morphingModules.size(), + clusters_d->view(), + wordCounter); + //alpaka::exec( + // queue, workDivMaxNumModules, FindClus{}, digis_d->view(), clusters_d->view(), wordCounter); #ifdef GPU_DEBUG alpaka::wait(queue); #endif @@ -634,6 +665,70 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // end clusterizer scope } + using Phase1Kernel = SiPixelRawToClusterKernel; + using Phase2Kernel = SiPixelRawToClusterKernel; + using HIonPhase1Kernel = SiPixelRawToClusterKernel; + template void Phase1Kernel::makePhase1ClustersAsync( + Queue &, + const SiPixelClusterThresholds, + // SiPixelImageDevice::View, + bool, + const SiPixelMorphingConfig *, + const SiPixelMappingSoAConstView &, + const unsigned char *, + const SiPixelGainCalibrationForHLTSoAConstView &, + const WordFedAppender &, + const uint32_t, + const uint32_t, + bool, + bool, + bool); + template void Phase1Kernel::makePhase1ClustersAsync( + Queue &, + const SiPixelClusterThresholds, + // SiPixelImageMorphDevice::View, + bool, + const SiPixelMorphingConfig *, + const SiPixelMappingSoAConstView &, + const unsigned char *, + const SiPixelGainCalibrationForHLTSoAConstView &, + const WordFedAppender &, + const uint32_t, + const uint32_t, + bool, + bool, + bool); + template void HIonPhase1Kernel::makePhase1ClustersAsync( + Queue &, + const SiPixelClusterThresholds, + // SiPixelImageDevice::View, + bool, + const SiPixelMorphingConfig *, + const SiPixelMappingSoAConstView &, + const unsigned char *, + const SiPixelGainCalibrationForHLTSoAConstView &, + const WordFedAppender &, + const uint32_t, + const uint32_t, + bool, + bool, + bool); + template void HIonPhase1Kernel::makePhase1ClustersAsync( + Queue &, + const SiPixelClusterThresholds, + // SiPixelImageMorphDevice::View, + bool, + const SiPixelMorphingConfig *, + const SiPixelMappingSoAConstView &, + const unsigned char *, + const SiPixelGainCalibrationForHLTSoAConstView &, + const WordFedAppender &, + const uint32_t, + const uint32_t, + bool, + bool, + bool); + template void SiPixelRawToClusterKernel::makePhase2ClustersAsync( Queue &queue, @@ -664,16 +759,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto moduleStartFirstElement = cms::alpakatools::make_device_view(queue, clusters_d->view().moduleStart(), 1u); alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); - const auto elementsPerBlockFindClus = FindClus::maxElementsPerBlock; - const auto workDivMaxNumModules = - cms::alpakatools::make_workdiv(numberOfModules, elementsPerBlockFindClus); + /**const auto elementsPerBlockFindClus = FindClus::maxElementsPerBlock; + //const auto workDivMaxNumModules = + // cms::alpakatools::make_workdiv(numberOfModules, elementsPerBlockFindClus); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << elementsPerBlockFindClus << " threadsPerBlockOrElementsPerThread\n"; #endif - alpaka::exec( - queue, workDivMaxNumModules, FindClus{}, digis_view, clusters_d->view(), numDigis); + //alpaka::exec( + // queue, workDivMaxNumModules, SparseToDense{}, digis_view, clusters_d->view(), numDigis); + //alpaka::exec( + // queue, workDivMaxNumModules, FindClus{}, digis_view, clusters_d->view(), numDigis); #ifdef GPU_DEBUG alpaka::wait(queue); #endif @@ -720,6 +817,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + **/ #ifdef GPU_DEBUG alpaka::wait(queue); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h index aed00c9a48ed2..e01ce510b9217 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -23,6 +23,9 @@ #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" #include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" #include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageSoA.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelImageDevice.h" +#include "SiPixelMorphingConfig.h" namespace pixelDetails { @@ -150,8 +153,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { SiPixelRawToClusterKernel& operator=(const SiPixelRawToClusterKernel&) = delete; SiPixelRawToClusterKernel& operator=(SiPixelRawToClusterKernel&&) = delete; + template void makePhase1ClustersAsync(Queue& queue, const SiPixelClusterThresholds clusterThresholds, + //ImageType::View images_, + bool doDigiMorphing, + const SiPixelMorphingConfig* digiMorphingConfig, const SiPixelMappingSoAConstView& cablingMap, const unsigned char* modToUnp, const SiPixelGainCalibrationForHLTSoAConstView& gains,