diff --git a/CUDADataFormats/BeamSpot/BuildFile.xml b/CUDADataFormats/BeamSpot/BuildFile.xml deleted file mode 100644 index f639ac4de6f9b..0000000000000 --- a/CUDADataFormats/BeamSpot/BuildFile.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h deleted file mode 100644 index 7b04fac67b9f1..0000000000000 --- a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h -#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h - -#include - -#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -class BeamSpotCUDA { -public: - // default constructor, required by cms::cuda::Product - BeamSpotCUDA() = default; - - // constructor that allocates cached device memory on the given CUDA stream - BeamSpotCUDA(cudaStream_t stream) { data_d_ = cms::cuda::make_device_unique(stream); } - - // movable, non-copiable - BeamSpotCUDA(BeamSpotCUDA const&) = delete; - BeamSpotCUDA(BeamSpotCUDA&&) = default; - BeamSpotCUDA& operator=(BeamSpotCUDA const&) = delete; - BeamSpotCUDA& operator=(BeamSpotCUDA&&) = default; - - BeamSpotPOD* data() { return data_d_.get(); } - BeamSpotPOD const* data() const { return data_d_.get(); } - - cms::cuda::device::unique_ptr& ptr() { return data_d_; } - cms::cuda::device::unique_ptr const& ptr() const { return data_d_; } - -private: - cms::cuda::device::unique_ptr data_d_; -}; - -#endif // CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h diff --git a/CUDADataFormats/BeamSpot/src/classes.h b/CUDADataFormats/BeamSpot/src/classes.h deleted file mode 100644 index 5aebe536f8a01..0000000000000 --- a/CUDADataFormats/BeamSpot/src/classes.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CUDADataFormats_BeamSpot_classes_h -#define CUDADataFormats_BeamSpot_classes_h - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_BeamSpot_classes_h diff --git a/CUDADataFormats/BeamSpot/src/classes_def.xml b/CUDADataFormats/BeamSpot/src/classes_def.xml deleted file mode 100644 index 198edeebe7c73..0000000000000 --- a/CUDADataFormats/BeamSpot/src/classes_def.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml deleted file mode 100644 index 1bf72a85ddc0a..0000000000000 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h deleted file mode 100644 index 7f461bef6d2f9..0000000000000 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h -#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h - -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" - -#include "DataFormats/SoATemplate/interface/SoALayout.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" - -#include - -GENERATE_SOA_LAYOUT(SiPixelClustersCUDALayout, - SOA_COLUMN(uint32_t, moduleStart), - SOA_COLUMN(uint32_t, clusInModule), - SOA_COLUMN(uint32_t, moduleId), - SOA_COLUMN(uint32_t, clusModuleStart)) - -using SiPixelClustersCUDASoA = SiPixelClustersCUDALayout<>; -using SiPixelClustersCUDASOAView = SiPixelClustersCUDALayout<>::View; -using SiPixelClustersCUDASOAConstView = SiPixelClustersCUDALayout<>::ConstView; - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition, i.e., -// by adding a public class attribute like: -// cms::cuda::Portabledevicecollection> collection; -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -class SiPixelClustersCUDA : public cms::cuda::PortableDeviceCollection> { -public: - SiPixelClustersCUDA() = default; - ~SiPixelClustersCUDA() = default; - - explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) - : PortableDeviceCollection>(maxModules + 1, stream) {} - - SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default; - SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default; - - void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { - nClusters_h = nClusters; - offsetBPIX2_h = offsetBPIX2; - } - - uint32_t nClusters() const { return nClusters_h; } - int32_t offsetBPIX2() const { return offsetBPIX2_h; } - -private: - uint32_t nClusters_h = 0; - int32_t offsetBPIX2_h = 0; -}; - -#endif // CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h deleted file mode 100644 index 923ebaaa5446c..0000000000000 --- a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h -#define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h - -#include -#include - -namespace gpuClustering { -#ifdef GPU_SMALL_EVENTS - // kept for testing and debugging - constexpr uint32_t maxHitsInIter() { return 64; } -#else - // optimized for real data PU 50 - // tested on MC events with 55-75 pileup events - constexpr uint32_t maxHitsInIter() { return 160; } //TODO better tuning for PU 140-200 -#endif - - constexpr uint16_t clusterThresholdLayerOne = 2000; - constexpr uint16_t clusterThresholdOtherLayers = 4000; - - constexpr uint32_t maxNumDigis = 3 * 256 * 1024; // @PU=200 µ=530 σ=50k this is >4σ away - constexpr uint16_t maxNumModules = 4000; - - constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; - constexpr int invalidClusterId = -9999; - static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules - -} // namespace gpuClustering - -#endif // CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h deleted file mode 100644 index 3eee5a1fce009..0000000000000 --- a/CUDADataFormats/SiPixelCluster/src/classes.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CUDADataFormats_SiPixelCluster_src_classes_h -#define CUDADataFormats_SiPixelCluster_src_classes_h - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml deleted file mode 100644 index 70decb9f27df7..0000000000000 --- a/CUDADataFormats/SiPixelCluster/src/classes_def.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml deleted file mode 100644 index 784f42c4441a4..0000000000000 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h deleted file mode 100644 index eff550feeb22e..0000000000000 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h -#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h - -#include - -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -class SiPixelDigiErrorsCUDA { -public: - using SiPixelErrorCompactVector = cms::cuda::SimpleVector; - - SiPixelDigiErrorsCUDA() = default; - explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream); - ~SiPixelDigiErrorsCUDA() = default; - - SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; - SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete; - SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default; - SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default; - - const SiPixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } - - SiPixelErrorCompactVector* error() { return error_d.get(); } - SiPixelErrorCompactVector const* error() const { return error_d.get(); } - - using HostDataError = std::pair>; - HostDataError dataErrorToHostAsync(cudaStream_t stream) const; - - void copyErrorToHostAsync(cudaStream_t stream); - int nErrorWords() const { return nErrorWords_; } - -private: - cms::cuda::device::unique_ptr data_d; - cms::cuda::device::unique_ptr error_d; - cms::cuda::host::unique_ptr error_h; - SiPixelFormatterErrors formatterErrors_h; - int nErrorWords_ = 0; -}; - -#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h deleted file mode 100644 index 3beeaa4830c83..0000000000000 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h -#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h - -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" -#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection { -public: - SiPixelDigisCUDA() = default; - explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) - : PortableDeviceCollection(maxFedWords + 1, stream) {} - - ~SiPixelDigisCUDA() = default; - - SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default; - SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default; - - void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { - nModules_h = nModules; - nDigis_h = nDigis; - } - - uint32_t nModules() const { return nModules_h; } - uint32_t nDigis() const { return nDigis_h; } - -private: - uint32_t nModules_h = 0; - uint32_t nDigis_h = 0; -}; - -#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc deleted file mode 100644 index e81b1b2b592af..0000000000000 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ /dev/null @@ -1,42 +0,0 @@ -#include - -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h" - -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) - : data_d(cms::cuda::make_device_unique(maxFedWords, stream)), - error_d(cms::cuda::make_device_unique(stream)), - error_h(cms::cuda::make_host_unique(stream)), - formatterErrors_h(std::move(errors)), - nErrorWords_(maxFedWords) { - assert(maxFedWords != 0); - cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); - - cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); - assert(error_h->empty()); - assert(error_h->capacity() == static_cast(maxFedWords)); - - cms::cuda::copyAsync(error_d, error_h, stream); -} - -void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { - cms::cuda::copyAsync(error_h, error_d, stream); -} - -SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { - // On one hand size() could be sufficient. On the other hand, if - // someone copies the SimpleVector<>, (s)he might expect the data - // buffer to actually have space for capacity() elements. - auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); - - // but transfer only the required amount - if (not error_h->empty()) { - cms::cuda::copyAsync(data, data_d, error_h->size(), stream); - } - auto err = *error_h; - err.set_data(data.get()); - return HostDataError(err, std::move(data)); -} diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h deleted file mode 100644 index fc5d318fad688..0000000000000 --- a/CUDADataFormats/SiPixelDigi/src/classes.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CUDADataFormats_SiPixelDigi_src_classes_h -#define CUDADataFormats_SiPixelDigi_src_classes_h - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_SiPixelDigi_src_classes_h diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml deleted file mode 100644 index ff775afdc2046..0000000000000 --- a/CUDADataFormats/SiPixelDigi/src/classes_def.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml deleted file mode 100644 index cf07e3b540f24..0000000000000 --- a/CUDADataFormats/Track/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/Track/README.md b/CUDADataFormats/Track/README.md deleted file mode 100644 index 8f66d9e4c4467..0000000000000 --- a/CUDADataFormats/Track/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Track CUDA Data Formats - -`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for -storing information about `Track`s created during the Pixel-local Reconstruction -chain. It stores data in an SoA manner. It combines the data contained in the -deprecated `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` classes. - -The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`, -while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h` - -Both formats use the same SoA Layout (`TrackSoAHeterogeneousLayout`) which is generated -via the `GENERATE_SOA_LAYOUT` macro in the `PixelTrackUtilities.h` file. - -## Notes - --`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the -layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA -`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size) -is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this. -- Host and Device classes should **not** be created via inheritance, as they're done here, -but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). - -## TrackSoAHeterogeneousHost - -The version of the data format to be used for storing `Track` data on the CPU. -Instances of this class are to be used for: - -- Having a place to copy data to host from device, via `cudaMemcpy`, or -- Running host-side algorithms using data stored in an SoA manner. - -## TrackSoAHeterogeneousDevice - -The version of the data format to be used for storing `Track` data on the GPU. - -Instances of `TrackSoAHeterogeneousDevice` are to be created on host and be -used on device only. To do so, the instance's `view()` method is to be called -to pass a `View` to any kernel launched. Accessing data from the `view()` is not -possible on the host side. - -## Utilities - -`PixelTrackUtilities.h` contains a collection of methods which were originally -defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` -which have been adapted to operate on `View` instances, so that they are callable -from within `__global__` kernels, on both CPU and CPU. - -## Use case - -See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation, -processing and copying from device to host. diff --git a/CUDADataFormats/Track/interface/PixelTrackUtilities.h b/CUDADataFormats/Track/interface/PixelTrackUtilities.h deleted file mode 100644 index 6d7ea258be8d2..0000000000000 --- a/CUDADataFormats/Track/interface/PixelTrackUtilities.h +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef CUDADataFormats_Track_PixelTrackUtilities_h -#define CUDADataFormats_Track_PixelTrackUtilities_h - -#include -#include -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -namespace pixelTrack { - - enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality }; - constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)}; - const std::string qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"}; - inline Quality qualityByName(std::string const &name) { - auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName; - return static_cast(qp); - } - -} // namespace pixelTrack - -template -struct TrackSoA { - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; - static constexpr int32_t H = TrackerTraits::avgHitsPerTrack; - // Aliases in order to not confuse the GENERATE_SOA_LAYOUT - // macro with weird colons and angled brackets. - using Vector5f = Eigen::Matrix; - using Vector15f = Eigen::Matrix; - using Quality = pixelTrack::Quality; - - using hindex_type = uint32_t; - - using HitContainer = cms::cuda::OneToManyAssoc; - - GENERATE_SOA_LAYOUT(TrackSoALayout, - SOA_COLUMN(Quality, quality), - SOA_COLUMN(float, chi2), - SOA_COLUMN(int8_t, nLayers), - SOA_COLUMN(float, eta), - SOA_COLUMN(float, pt), - SOA_EIGEN_COLUMN(Vector5f, state), - SOA_EIGEN_COLUMN(Vector15f, covariance), - SOA_SCALAR(int, nTracks), - SOA_SCALAR(HitContainer, hitIndices), - SOA_SCALAR(HitContainer, detIndices)) -}; - -// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods. - -template -struct TracksUtilities { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using hindex_type = typename TrackSoA::hindex_type; - - // State at the Beam spot - // phi,tip,1/pt,cotan(theta),zip - static __host__ __device__ inline float charge(const TrackSoAConstView &tracks, int32_t i) { - return std::copysign(1.f, tracks[i].state()(2)); - } - - static constexpr __host__ __device__ inline float phi(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(0); - } - - static constexpr __host__ __device__ inline float tip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(1); - } - - static constexpr __host__ __device__ inline float zip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(4); - } - - static constexpr __host__ __device__ inline bool isTriplet(const TrackSoAConstView &tracks, int i) { - return tracks[i].nLayers() == 3; - } - - template - static constexpr __host__ __device__ inline void copyFromCircle( - TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) { - tracks[i].state() << cp.template cast(), lp.template cast(); - - tracks[i].state()(2) = tracks[i].state()(2) * b; - auto cov = tracks[i].covariance(); - cov(0) = ccov(0, 0); - cov(1) = ccov(0, 1); - cov(2) = b * float(ccov(0, 2)); - cov(4) = cov(3) = 0; - cov(5) = ccov(1, 1); - cov(6) = b * float(ccov(1, 2)); - cov(8) = cov(7) = 0; - cov(9) = b * b * float(ccov(2, 2)); - cov(11) = cov(10) = 0; - cov(12) = lcov(0, 0); - cov(13) = lcov(0, 1); - cov(14) = lcov(1, 1); - } - - template - static constexpr __host__ __device__ inline void copyFromDense(TrackSoAView &tracks, - V5 const &v, - M5 const &cov, - int32_t i) { - tracks[i].state() = v.template cast(); - for (int j = 0, ind = 0; j < 5; ++j) - for (auto k = j; k < 5; ++k) - tracks[i].covariance()(ind++) = cov(j, k); - } - - template - static constexpr __host__ __device__ inline void copyToDense(const TrackSoAConstView &tracks, - V5 &v, - M5 &cov, - int32_t i) { - v = tracks[i].state().template cast(); - for (int j = 0, ind = 0; j < 5; ++j) { - cov(j, j) = tracks[i].covariance()(ind++); - for (auto k = j + 1; k < 5; ++k) - cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++); - } - } - - static constexpr __host__ __device__ inline int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i) { - auto pdet = tracks.detIndices().begin(i); - int nl = 1; - auto ol = pixelTopology::getLayer(*pdet); - for (; pdet < tracks.detIndices().end(i); ++pdet) { - auto il = pixelTopology::getLayer(*pdet); - if (il != ol) - ++nl; - ol = il; - } - return nl; - } - - static constexpr __host__ __device__ inline int nHits(const TrackSoAConstView &tracks, int i) { - return tracks.detIndices().size(i); - } -}; - -namespace pixelTrack { - - template - struct QualityCutsT {}; - - template - struct QualityCutsT> { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using tracksHelper = TracksUtilities; - // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3]))) - float chi2Coeff[4]; - float chi2MaxPt; // GeV - float chi2Scale; - - struct Region { - float maxTip; // cm - float minPt; // GeV - float maxZip; // cm - }; - - Region triplet; - Region quadruplet; - - __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { - // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip) - // default cuts: - // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm - // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm - // (see CAHitNtupletGeneratorGPU.cc) - auto const ®ion = (nHits > 3) ? quadruplet : triplet; - return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip); - } - - __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const { - auto roughLog = [](float x) { - // max diff [0.5,12] at 1.25 0.16143 - // average diff 0.0662998 - union IF { - uint32_t i; - float f; - }; - IF z; - z.f = x; - uint32_t lsb = 1 < 21; - z.i += lsb; - z.i >>= 21; - auto f = z.i & 3; - int ex = int(z.i >> 2) - 127; - - // log2(1+0.25*f) - // averaged over bins - const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f}; - return float(ex) + frac[f]; - }; - - float pt = std::min(tracks.pt(it), chi2MaxPt); - float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]); - if (tracks.chi2(it) >= chi2Cut) { -#ifdef NTUPLE_FIT_DEBUG - printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it)); -#endif - return true; - } - return false; - } - }; - - template - struct QualityCutsT> { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using tracksHelper = TracksUtilities; - - float maxChi2; - float minPt; - float maxTip; - float maxZip; - - __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { - return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < maxZip); - } - __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const { - return tracks.chi2(it) >= maxChi2; - } - }; - -} // namespace pixelTrack - -template -using TrackLayout = typename TrackSoA::template TrackSoALayout<>; -template -using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; -template -using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - -template struct TracksUtilities; -template struct TracksUtilities; - -#endif diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h deleted file mode 100644 index 04d286a767ab0..0000000000000 --- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef CUDADataFormats_Track_TrackHeterogeneousDevice_H -#define CUDADataFormats_Track_TrackHeterogeneousDevice_H - -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" - -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TrackSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection> { -public: - using cms::cuda::PortableDeviceCollection>::view; - using cms::cuda::PortableDeviceCollection>::const_view; - using cms::cuda::PortableDeviceCollection>::buffer; - using cms::cuda::PortableDeviceCollection>::bufferSize; - - TrackSoAHeterogeneousDevice() = default; // cms::cuda::Product needs this - - // Constructor which specifies the SoA size - explicit TrackSoAHeterogeneousDevice(cudaStream_t stream) - : cms::cuda::PortableDeviceCollection>(TrackerTraits::maxNumberOfTuples, stream) {} -}; - -namespace pixelTrack { - - using TrackSoADevicePhase1 = TrackSoAHeterogeneousDevice; - using TrackSoADevicePhase2 = TrackSoAHeterogeneousDevice; - using TrackSoADeviceHIonPhase1 = TrackSoAHeterogeneousDevice; - -} // namespace pixelTrack - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h deleted file mode 100644 index 39e83491e1769..0000000000000 --- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef CUDADataFormats_Track_TrackHeterogeneousHost_H -#define CUDADataFormats_Track_TrackHeterogeneousHost_H - -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" - -// TODO: The class is created via inheritance of the PortableHostCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TrackSoAHeterogeneousHost : public cms::cuda::PortableHostCollection> { -public: - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime - explicit TrackSoAHeterogeneousHost() : cms::cuda::PortableHostCollection>(S) {} - - using cms::cuda::PortableHostCollection>::view; - using cms::cuda::PortableHostCollection>::const_view; - using cms::cuda::PortableHostCollection>::buffer; - using cms::cuda::PortableHostCollection>::bufferSize; - - // Constructor which specifies the SoA size - explicit TrackSoAHeterogeneousHost(cudaStream_t stream) - : cms::cuda::PortableHostCollection>(S, stream) {} -}; - -namespace pixelTrack { - - using TrackSoAHostPhase1 = TrackSoAHeterogeneousHost; - using TrackSoAHostPhase2 = TrackSoAHeterogeneousHost; - using TrackSoAHostHIonPhase1 = TrackSoAHeterogeneousHost; -} // namespace pixelTrack - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/Track/src/classes.h b/CUDADataFormats/Track/src/classes.h deleted file mode 100644 index 2e07adddcddd0..0000000000000 --- a/CUDADataFormats/Track/src/classes.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef CUDADataFormats_Track_src_classes_h -#define CUDADataFormats_Track_src_classes_h - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" - -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_Track_src_classes_h diff --git a/CUDADataFormats/Track/src/classes_def.xml b/CUDADataFormats/Track/src/classes_def.xml deleted file mode 100644 index 5314f3f20b0d7..0000000000000 --- a/CUDADataFormats/Track/src/classes_def.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml deleted file mode 100644 index 32256c87ed577..0000000000000 --- a/CUDADataFormats/Track/test/BuildFile.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp deleted file mode 100644 index dafa75e2e18d7..0000000000000 --- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/** - Simple test for the pixelTrack::TrackSoA data structure - which inherits from PortableDeviceCollection. - - Creates an instance of the class (automatically allocates - memory on device), passes the view of the SoA data to - the CUDA kernels which: - - Fill the SoA with data. - - Verify that the data written is correct. - - Then, the SoA data are copied back to Host, where - a temporary host-side view (tmp_view) is created using - the same Layout to access the data on host and print it. - */ - -#include -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -namespace testTrackSoA { - - template - void runKernels(TrackSoAView &tracks_view, cudaStream_t stream); -} - -int main() { - cms::cudatest::requireDevices(); - - cudaStream_t stream; - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - - // Inner scope to deallocate memory before destroying the stream - { - // Instantiate tracks on device. PortableDeviceCollection allocates - // SoA on device automatically. - TrackSoAHeterogeneousDevice tracks_d(stream); - testTrackSoA::runKernels(tracks_d.view(), stream); - - // Instantate tracks on host. This is where the data will be - // copied to from device. - TrackSoAHeterogeneousHost tracks_h(stream); - - cudaCheck(cudaMemcpyAsync( - tracks_h.buffer().get(), tracks_d.const_buffer().get(), tracks_d.bufferSize(), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaStreamSynchronize(stream)); - - // Print results - std::cout << "pt" - << "\t" - << "eta" - << "\t" - << "chi2" - << "\t" - << "quality" - << "\t" - << "nLayers" - << "\t" - << "hitIndices off" << std::endl; - - for (int i = 0; i < 10; ++i) { - std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2() - << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t" - << tracks_h.view().hitIndices().off[i] << std::endl; - } - } - cudaCheck(cudaStreamDestroy(stream)); - - return 0; -} diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu deleted file mode 100644 index 8e8595eb43e94..0000000000000 --- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu +++ /dev/null @@ -1,63 +0,0 @@ -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/OneToManyAssoc.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -namespace testTrackSoA { - - // Kernel which fills the TrackSoAView with data - // to test writing to it - template - __global__ void fill(TrackSoAView tracks_view) { - int i = threadIdx.x; - if (i == 0) { - tracks_view.nTracks() = 420; - } - - for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) { - tracks_view[j].pt() = (float)j; - tracks_view[j].eta() = (float)j; - tracks_view[j].chi2() = (float)j; - tracks_view[j].quality() = (pixelTrack::Quality)(j % 256); - tracks_view[j].nLayers() = j % 128; - tracks_view.hitIndices().off[j] = j; - } - } - - // Kernel which reads from the TrackSoAView to verify - // that it was written correctly from the fill kernel - template - __global__ void verify(TrackSoAConstView tracks_view) { - int i = threadIdx.x; - - if (i == 0) { - printf("SoA size: % d, block dims: % d\n", tracks_view.metadata().size(), blockDim.x); - assert(tracks_view.nTracks() == 420); - } - for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) { - assert(abs(tracks_view[j].pt() - (float)j) < .0001); - assert(abs(tracks_view[j].eta() - (float)j) < .0001); - assert(abs(tracks_view[j].chi2() - (float)j) < .0001); - assert(tracks_view[j].quality() == (pixelTrack::Quality)(j % 256)); - assert(tracks_view[j].nLayers() == j % 128); - assert(tracks_view.hitIndices().off[j] == j); - } - } - - // Host function which invokes the two kernels above - template - void runKernels(TrackSoAView& tracks_view, cudaStream_t stream) { - fill<<<1, 1024, 0, stream>>>(tracks_view); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaDeviceSynchronize()); - - verify<<<1, 1024, 0, stream>>>(tracks_view); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaDeviceSynchronize()); - } - - template void runKernels(TrackSoAView& tracks_view, - cudaStream_t stream); - template void runKernels(TrackSoAView& tracks_view, - cudaStream_t stream); - -} // namespace testTrackSoA diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp deleted file mode 100644 index d6ff539a642b0..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu deleted file mode 100644 index d6ff539a642b0..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu +++ /dev/null @@ -1 +0,0 @@ -#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h deleted file mode 100644 index 6ba0eaa5c986e..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h +++ /dev/null @@ -1,85 +0,0 @@ -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" - -using Vector5d = Eigen::Matrix; -using Matrix5d = Eigen::Matrix; -using helper = TracksUtilities; - -__host__ __device__ Matrix5d loadCov(Vector5d const& e) { - Matrix5d cov; - for (int i = 0; i < 5; ++i) - cov(i, i) = e(i) * e(i); - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < i; ++j) { - double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j)); // this makes the matrix pos defined - cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v; - cov(j, i) = cov(i, j); - } - } - return cov; -} - -template -__global__ void testTSSoA(TrackSoAView ts) { - Vector5d par0; - par0 << 0.2, 0.1, 3.5, 0.8, 0.1; - Vector5d e0; - e0 << 0.01, 0.01, 0.035, -0.03, -0.01; - auto cov0 = loadCov(e0); - - int first = threadIdx.x + blockIdx.x * blockDim.x; - - for (int i = first; i < ts.metadata().size(); i += blockDim.x * gridDim.x) { - helper::copyFromDense(ts, par0, cov0, i); - Vector5d par1; - Matrix5d cov1; - helper::copyToDense(ts, par1, cov1, i); - Vector5d delV = par1 - par0; - Matrix5d delM = cov1 - cov0; - for (int j = 0; j < 5; ++j) { - assert(std::abs(delV(j)) < 1.e-5); - for (auto k = j; k < 5; ++k) { - assert(cov0(k, j) == cov0(j, k)); - assert(cov1(k, j) == cov1(j, k)); - assert(std::abs(delM(k, j)) < 1.e-5); - } - } - } -} - -#ifdef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#endif - -int main() { -#ifdef __CUDACC__ - cms::cudatest::requireDevices(); - cudaStream_t stream; - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); -#endif - -#ifdef __CUDACC__ - // Since we are going to copy data from ts_d to ts_h, we - // need to initialize the Host collection with a stream. - TrackSoAHeterogeneousHost ts_h(stream); - TrackSoAHeterogeneousDevice ts_d(stream); -#else - // If CUDA is not available, Host collection must not be initialized - // with a stream. - TrackSoAHeterogeneousHost ts_h; -#endif - -#ifdef __CUDACC__ - testTSSoA<<<1, 64, 0, stream>>>(ts_d.view()); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpyAsync( - ts_h.buffer().get(), ts_d.const_buffer().get(), ts_d.bufferSize(), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaStreamSynchronize(stream)); -#else - testTSSoA(ts_h.view()); -#endif -} diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml deleted file mode 100644 index 388d342a44497..0000000000000 --- a/CUDADataFormats/TrackingRecHit/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h b/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h deleted file mode 100644 index 13322ce3952b7..0000000000000 --- a/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CUDADataFormats_TrackingRecHit_interface_SiPixelHitStatus_H -#define CUDADataFormats_TrackingRecHit_interface_SiPixelHitStatus_H - -#include - -// more information on bit fields : https://en.cppreference.com/w/cpp/language/bit_field -struct SiPixelHitStatus { - bool isBigX : 1; // ∈[0,1] - bool isOneX : 1; // ∈[0,1] - bool isBigY : 1; // ∈[0,1] - bool isOneY : 1; // ∈[0,1] - uint8_t qBin : 3; // ∈[0,1,...,7] -}; - -struct SiPixelHitStatusAndCharge { - SiPixelHitStatus status; - uint32_t charge : 24; -}; - -#endif diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h deleted file mode 100644 index 89a70369fa08f..0000000000000 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef CUDADataFormats_RecHits_TrackingRecHitsDevice_h -#define CUDADataFormats_RecHits_TrackingRecHitsDevice_h - -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -template -class TrackingRecHitSoADevice : public cms::cuda::PortableDeviceCollection> { -public: - using hitSoA = TrackingRecHitSoA; - //Need to decorate the class with the inherited portable accessors being now a template - using cms::cuda::PortableDeviceCollection>::view; - using cms::cuda::PortableDeviceCollection>::const_view; - using cms::cuda::PortableDeviceCollection>::buffer; - using cms::cuda::PortableDeviceCollection>::bufferSize; - - TrackingRecHitSoADevice() = default; // cms::cuda::Product needs this - - using AverageGeometry = typename hitSoA::AverageGeometry; - using ParamsOnGPU = typename hitSoA::ParamsOnGPU; - - // Constructor which specifies the SoA size - explicit TrackingRecHitSoADevice(uint32_t nHits, - int32_t offsetBPIX2, - ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - cudaStream_t stream) - : cms::cuda::PortableDeviceCollection>(nHits, stream), - offsetBPIX2_(offsetBPIX2) { - cudaCheck(cudaMemcpyAsync(&(view().nHits()), &nHits, sizeof(uint32_t), cudaMemcpyDefault, stream)); - // hitsModuleStart is on Device - cudaCheck(cudaMemcpyAsync(view().hitsModuleStart().data(), - hitsModuleStart, - sizeof(uint32_t) * int(TrackerTraits::numberOfModules + 1), - cudaMemcpyDefault, - stream)); - cudaCheck(cudaMemcpyAsync(&(view().offsetBPIX2()), &offsetBPIX2, sizeof(int32_t), cudaMemcpyDefault, stream)); - - // cpeParams argument is a pointer to device memory, copy - // its contents into the Layout. - cudaCheck(cudaMemcpyAsync(&(view().cpeParams()), cpeParams, int(sizeof(ParamsOnGPU)), cudaMemcpyDefault, stream)); - } - - cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); - size_t rowSize = sizeof(float) * nHits(); - - size_t srcPitch = ptrdiff_t(view().yLocal()) - ptrdiff_t(view().xLocal()); - cudaCheck( - cudaMemcpy2DAsync(ret.get(), rowSize, view().xLocal(), srcPitch, rowSize, 4, cudaMemcpyDeviceToHost, stream)); - - return ret; - } //move to utilities - - cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(TrackerTraits::numberOfModules + 1, stream); - cudaCheck(cudaMemcpyAsync(ret.get(), - view().hitsModuleStart().data(), - sizeof(uint32_t) * (TrackerTraits::numberOfModules + 1), - cudaMemcpyDefault, - stream)); - return ret; - } - - uint32_t nHits() const { return view().metadata().size(); } - uint32_t offsetBPIX2() const { - return offsetBPIX2_; - } //offsetBPIX2 is used on host functions so is useful to have it also stored in the class and not only in the layout -private: - uint32_t offsetBPIX2_ = 0; -}; - -//Classes definition for Phase1/Phase2, to make the classes_def lighter. Not actually used in the code. -using TrackingRecHitSoADevicePhase1 = TrackingRecHitSoADevice; -using TrackingRecHitSoADevicePhase2 = TrackingRecHitSoADevice; -using TrackingRecHitSoADeviceHIonPhase1 = TrackingRecHitSoADevice; - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h deleted file mode 100644 index bfac27b2b71e6..0000000000000 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef CUDADataFormats_RecHits_TrackingRecHitsHost_h -#define CUDADataFormats_RecHits_TrackingRecHitsHost_h - -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -template -class TrackingRecHitSoAHost : public cms::cuda::PortableHostCollection> { -public: - using hitSoA = TrackingRecHitSoA; - //Need to decorate the class with the inherited portable accessors being now a template - using cms::cuda::PortableHostCollection>::view; - using cms::cuda::PortableHostCollection>::const_view; - using cms::cuda::PortableHostCollection>::buffer; - using cms::cuda::PortableHostCollection>::bufferSize; - - TrackingRecHitSoAHost() = default; - - using AverageGeometry = typename hitSoA::AverageGeometry; - using ParamsOnGPU = typename hitSoA::ParamsOnGPU; - using PhiBinnerStorageType = typename hitSoA::PhiBinnerStorageType; - using PhiBinner = typename hitSoA::PhiBinner; - - // This SoA Host is used basically only for DQM - // so we just need a slim constructor - explicit TrackingRecHitSoAHost(uint32_t nHits) - : cms::cuda::PortableHostCollection>(nHits) {} - - explicit TrackingRecHitSoAHost(uint32_t nHits, cudaStream_t stream) - : cms::cuda::PortableHostCollection>(nHits, stream) {} - - explicit TrackingRecHitSoAHost(uint32_t nHits, - int32_t offsetBPIX2, - ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart) - : cms::cuda::PortableHostCollection>(nHits), offsetBPIX2_(offsetBPIX2) { - view().nHits() = nHits; - std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().begin()); - memcpy(&(view().cpeParams()), cpeParams, sizeof(ParamsOnGPU)); - view().offsetBPIX2() = offsetBPIX2; - } - - explicit TrackingRecHitSoAHost(uint32_t nHits, - int32_t offsetBPIX2, - ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - cudaStream_t stream) - : cms::cuda::PortableHostCollection>(nHits, stream), - offsetBPIX2_(offsetBPIX2) { - view().nHits() = nHits; - std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().begin()); - memcpy(&(view().cpeParams()), cpeParams, sizeof(ParamsOnGPU)); - view().offsetBPIX2() = offsetBPIX2; - } - - uint32_t nHits() const { return view().metadata().size(); } - uint32_t offsetBPIX2() const { - return offsetBPIX2_; - } //offsetBPIX2 is used on host functions so is useful to have it also stored in the class and not only in the layout -private: - uint32_t offsetBPIX2_ = 0; -}; - -using TrackingRecHitSoAHostPhase1 = TrackingRecHitSoAHost; -using TrackingRecHitSoAHostPhase2 = TrackingRecHitSoAHost; -using TrackingRecHitSoAHostHIonPhase1 = TrackingRecHitSoAHost; - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h deleted file mode 100644 index 7e28cb97becc8..0000000000000 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef CUDADataFormats_RecHits_TrackingRecHitsUtilities_h -#define CUDADataFormats_RecHits_TrackingRecHitsUtilities_h - -#include -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "DataFormats/SoATemplate/interface/SoALayout.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "SiPixelHitStatus.h" - -template -struct TrackingRecHitSoA { - using hindex_type = typename TrackerTraits::hindex_type; - using PhiBinner = cms::cuda::HistoContainer; //28 for phase2 geometry - - using PhiBinnerStorageType = typename PhiBinner::index_type; - using AverageGeometry = pixelTopology::AverageGeometryT; - using ParamsOnGPU = pixelCPEforGPU::ParamsOnGPUT; - - using HitLayerStartArray = std::array; - using HitModuleStartArray = std::array; - - //Is it better to have two split? - GENERATE_SOA_LAYOUT(TrackingRecHitSoALayout, - SOA_COLUMN(float, xLocal), - SOA_COLUMN(float, yLocal), - SOA_COLUMN(float, xerrLocal), - SOA_COLUMN(float, yerrLocal), - SOA_COLUMN(float, xGlobal), - SOA_COLUMN(float, yGlobal), - SOA_COLUMN(float, zGlobal), - SOA_COLUMN(float, rGlobal), - SOA_COLUMN(int16_t, iphi), - SOA_COLUMN(SiPixelHitStatusAndCharge, chargeAndStatus), - SOA_COLUMN(int16_t, clusterSizeX), - SOA_COLUMN(int16_t, clusterSizeY), - SOA_COLUMN(uint16_t, detectorIndex), - - SOA_SCALAR(uint32_t, nHits), - SOA_SCALAR(int32_t, offsetBPIX2), - //These above could be separated in a specific - //layout since they don't depends on the template - //for the moment I'm keeping them here - SOA_COLUMN(PhiBinnerStorageType, phiBinnerStorage), - SOA_SCALAR(HitModuleStartArray, hitsModuleStart), - SOA_SCALAR(HitLayerStartArray, hitsLayerStart), - SOA_SCALAR(ParamsOnGPU, cpeParams), - SOA_SCALAR(AverageGeometry, averageGeometry), - SOA_SCALAR(PhiBinner, phiBinner)); -}; - -template -using TrackingRecHitLayout = typename TrackingRecHitSoA::template TrackingRecHitSoALayout<>; -template -using TrackingRecHitSoAView = typename TrackingRecHitSoA::template TrackingRecHitSoALayout<>::View; -template -using TrackingRecHitSoAConstView = - typename TrackingRecHitSoA::template TrackingRecHitSoALayout<>::ConstView; - -#endif diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h deleted file mode 100644 index 1f494d0517450..0000000000000 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CUDADataFormats_TrackingRecHit_src_classes_h -#define CUDADataFormats_TrackingRecHit_src_classes_h - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_TrackingRecHit_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml deleted file mode 100644 index dfc2c6d748e0f..0000000000000 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/CUDADataFormats/TrackingRecHit/test/BuildFile.xml b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml deleted file mode 100644 index 7baacbac416a1..0000000000000 --- a/CUDADataFormats/TrackingRecHit/test/BuildFile.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cpp deleted file mode 100644 index 146bb9133d9d8..0000000000000 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" - -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -namespace testTrackingRecHitSoA { - - template - void runKernels(TrackingRecHitSoADevice& hits, cudaStream_t stream); - -} - -int main() { - using ParamsOnGPU = TrackingRecHitSoADevice::ParamsOnGPU; - cms::cudatest::requireDevices(); - - cudaStream_t stream; - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamDefault)); - - // inner scope to deallocate memory before destroying the stream - { - uint32_t nHits = 2000; - int32_t offset = 100; - uint32_t moduleStart[1856]; - - for (size_t i = 0; i < 1856; i++) { - moduleStart[i] = i * 2; - } - ParamsOnGPU* cpeParams_d; - cudaCheck(cudaMalloc(&cpeParams_d, sizeof(ParamsOnGPU))); - TrackingRecHitSoADevice tkhit(nHits, offset, cpeParams_d, &moduleStart[0], stream); - - testTrackingRecHitSoA::runKernels(tkhit, stream); - printf("tkhit hits %d \n", tkhit.nHits()); - auto test = tkhit.localCoordToHostAsync(stream); - printf("test[9] %.2f\n", test[9]); - - auto ret = tkhit.hitsModuleStartToHostAsync(stream); - printf("mods[9] %d\n", ret[9]); - cudaCheck(cudaFree(cpeParams_d)); - } - - cudaCheck(cudaStreamDestroy(stream)); - - return 0; -} diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cu b/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cu deleted file mode 100644 index 48e8dea96911e..0000000000000 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHitSoA_test.cu +++ /dev/null @@ -1,64 +0,0 @@ -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" - -namespace testTrackingRecHitSoA { - - template - __global__ void fill(TrackingRecHitSoAView soa) { - int i = threadIdx.x; - int j = blockIdx.x; - if (i == 0 and j == 0) { - soa.offsetBPIX2() = 22; - soa[10].xLocal() = 1.11; - } - - soa[i].iphi() = i % 10; - soa.hitsLayerStart()[j] = j; - __syncthreads(); - } - - template - __global__ void show(TrackingRecHitSoAView soa) { - int i = threadIdx.x; - int j = blockIdx.x; - - if (i == 0 and j == 0) { - printf("nbins = %d \n", soa.phiBinner().nbins()); - printf("offsetBPIX %d ->%d \n", i, soa.offsetBPIX2()); - printf("nHits %d ->%d \n", i, soa.nHits()); - printf("hitsModuleStart %d ->%d \n", i, soa.hitsModuleStart().at(28)); - } - - if (i < 10) // can be increased to soa.nHits() for debugging - printf("iPhi %d ->%d \n", i, soa[i].iphi()); - - if (j * blockDim.x + i < 10) // can be increased to soa.phiBinner().nbins() for debugging - printf(">bin size %d ->%d \n", j * blockDim.x + i, soa.phiBinner().size(j * blockDim.x + i)); - __syncthreads(); - } - - template - void runKernels(TrackingRecHitSoADevice& hits, cudaStream_t stream) { - printf("> RUN!\n"); - fill<<<10, 100, 0, stream>>>(hits.view()); - - cudaCheck(cudaDeviceSynchronize()); - cms::cuda::fillManyFromVector(&(hits.view().phiBinner()), - 10, - hits.view().iphi(), - hits.view().hitsLayerStart().data(), - 2000, - 256, - hits.view().phiBinnerStorage(), - stream); - cudaCheck(cudaDeviceSynchronize()); - show<<<10, 1000, 0, stream>>>(hits.view()); - cudaCheck(cudaDeviceSynchronize()); - } - - template void runKernels(TrackingRecHitSoADevice& hits, - cudaStream_t stream); - template void runKernels(TrackingRecHitSoADevice& hits, - cudaStream_t stream); - -} // namespace testTrackingRecHitSoA diff --git a/CUDADataFormats/Vertex/BuildFile.xml b/CUDADataFormats/Vertex/BuildFile.xml deleted file mode 100644 index c6b918ec4b12b..0000000000000 --- a/CUDADataFormats/Vertex/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/Vertex/README.md b/CUDADataFormats/Vertex/README.md deleted file mode 100644 index 3e495d15f776e..0000000000000 --- a/CUDADataFormats/Vertex/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Vertex CUDA Data Formats - -`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for -storing information about vertices created during the Pixel-local Reconstruction -chain. It stores data in an SoA manner. It contains the data that was previously -contained in the deprecated `ZVertexSoA` class. - -The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`, -while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h` - -Both formats use the same SoA Layout (`ZVertexSoAHeterogeneousLayout`) which is generated -via the `GENERATE_SOA_LAYOUT` macro in the `ZVertexUtilities.h` file. - -## Notes - -- Initially, `ZVertexSoA` had distinct array sizes for each attribute (e.g. `zv` was `MAXVTX` elements -long, `ndof` was `MAXTRACKS` elements long). All columns are now of uniform `MAXTRACKS` size, -meaning that there will be some wasted space (appx. 190kB). -- Host and Device classes should **not** be created via inheritance, as they're done here, -but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). - -## ZVertexHeterogeneousHost - -The version of the data format to be used for storing vertex data on the CPU. -Instances of this class are to be used for: - -- Having a place to copy data to host from device, via `cudaMemcpy`, or -- Running host-side algorithms using data stored in an SoA manner. - -## ZVertexHeterogeneousDevice - -The version of the data format to be used for storing vertex data on the GPU. - -Instances of `ZVertexHeterogeneousDevice` are to be created on host and be -used on device only. To do so, the instance's `view()` method is to be called -to pass a `View` to any kernel launched. Accessing data from the `view()` is not -possible on the host side. - -## Utilities - -Apart from `ZVertexSoAHeterogeneousLayout`, `ZVertexUtilities.h` also contains -a collection of methods which were originally -defined as class methods inside the `ZVertexSoA` class -which have been adapted to operate on `View` instances, so that they are callable -from within `__global__` kernels, on both CPU and CPU. diff --git a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h b/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h deleted file mode 100644 index 417a960951fb1..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef CUDADataFormatsVertexZVertexHeterogeneous_H -#define CUDADataFormatsVertexZVertexHeterogeneous_H - -#include "CUDADataFormats/Vertex/interface/ZVertexSoA.h" -#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" - -using ZVertexHeterogeneous = HeterogeneousSoA; -#ifndef __CUDACC__ -#include "CUDADataFormats/Common/interface/Product.h" -using ZVertexCUDAProduct = cms::cuda::Product; -#endif - -#endif diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoA.h b/CUDADataFormats/Vertex/interface/ZVertexSoA.h deleted file mode 100644 index 95106050f3d7a..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoA.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexSoA_h -#define CUDADataFormats_Vertex_ZVertexSoA_h - -#include -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" - -// SOA for vertices -// These vertices are clusterized and fitted only along the beam line (z) -// to obtain their global coordinate the beam spot position shall be added (eventually correcting for the beam angle as well) -struct ZVertexSoA { - static constexpr uint32_t MAXTRACKS = 128 * 1024; - static constexpr uint32_t MAXVTX = 1024; - - int16_t idv[MAXTRACKS]; // vertex index for each associated (original) track (-1 == not associate) - float zv[MAXVTX]; // output z-posistion of found vertices - float wv[MAXVTX]; // output weight (1/error^2) on the above - float chi2[MAXVTX]; // vertices chi2 - float ptv2[MAXVTX]; // vertices pt^2 - int32_t ndof[MAXTRACKS]; // vertices number of dof (reused as workspace for the number of nearest neighbours FIXME) - uint16_t sortInd[MAXVTX]; // sorted index (by pt2) ascending - uint32_t nvFinal; // the number of vertices - - __host__ __device__ void init() { nvFinal = 0; } -}; - -#endif // CUDADataFormats_Vertex_ZVertexSoA_h diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h deleted file mode 100644 index ae662d7fd5f9a..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H -#define CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H - -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class ZVertexSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection> { -public: - ZVertexSoAHeterogeneousDevice() = default; // cms::cuda::Product needs this - - // Constructor which specifies the SoA size - explicit ZVertexSoAHeterogeneousDevice(cudaStream_t stream) - : PortableDeviceCollection>(S, stream) {} -}; - -using ZVertexSoADevice = ZVertexSoAHeterogeneousDevice; - -#endif // CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h deleted file mode 100644 index 6b62d615e1d11..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H -#define CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H - -#include - -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" - -// TODO: The class is created via inheritance of the PortableHostCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class ZVertexSoAHeterogeneousHost : public cms::cuda::PortableHostCollection> { -public: - explicit ZVertexSoAHeterogeneousHost() : cms::cuda::PortableHostCollection>(S) {} - - // Constructor which specifies the SoA size and CUDA stream - explicit ZVertexSoAHeterogeneousHost(cudaStream_t stream) - : PortableHostCollection>(S, stream) {} -}; - -using ZVertexSoAHost = ZVertexSoAHeterogeneousHost; - -#endif // CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H diff --git a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h b/CUDADataFormats/Vertex/interface/ZVertexUtilities.h deleted file mode 100644 index 2403652377971..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexUtilities_h -#define CUDADataFormats_Vertex_ZVertexUtilities_h - -#include -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -GENERATE_SOA_LAYOUT(ZVertexSoAHeterogeneousLayout, - SOA_COLUMN(int16_t, idv), - SOA_COLUMN(float, zv), - SOA_COLUMN(float, wv), - SOA_COLUMN(float, chi2), - SOA_COLUMN(float, ptv2), - SOA_COLUMN(int32_t, ndof), - SOA_COLUMN(uint16_t, sortInd), - SOA_SCALAR(uint32_t, nvFinal)) - -// Previous ZVertexSoA class methods. -// They operate on View and ConstView of the ZVertexSoA. -namespace zVertex { - // Common types for both Host and Device code - using ZVertexSoALayout = ZVertexSoAHeterogeneousLayout<>; - using ZVertexSoAView = ZVertexSoAHeterogeneousLayout<>::View; - using ZVertexSoAConstView = ZVertexSoAHeterogeneousLayout<>::ConstView; - - namespace utilities { - - static constexpr uint32_t MAXTRACKS = 128 * 1024; - static constexpr uint32_t MAXVTX = 1024; - - __host__ __device__ inline void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; } - - } // namespace utilities -} // namespace zVertex - -#endif diff --git a/CUDADataFormats/Vertex/src/classes.h b/CUDADataFormats/Vertex/src/classes.h deleted file mode 100644 index 0340affffa06c..0000000000000 --- a/CUDADataFormats/Vertex/src/classes.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CUDADataFormats_Vertex_src_classes_h -#define CUDADataFormats_Vertex_src_classes_h - -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Common/interface/Product.h" -#include "DataFormats/Common/interface/Wrapper.h" - -#endif // CUDADataFormats_Vertex_src_classes_h diff --git a/CUDADataFormats/Vertex/src/classes_def.xml b/CUDADataFormats/Vertex/src/classes_def.xml deleted file mode 100644 index 63bd5a1cc94a7..0000000000000 --- a/CUDADataFormats/Vertex/src/classes_def.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h deleted file mode 100644 index f7555a75d9bec..0000000000000 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h -#define CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h - -#include - -#include - -#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -class SiPixelFedCablingMap; -class TrackerGeometry; -class SiPixelQuality; - -class SiPixelROCsStatusAndMappingWrapper { -public: - SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const &cablingMap, - TrackerGeometry const &trackerGeom, - SiPixelQuality const *badPixelInfo); - ~SiPixelROCsStatusAndMappingWrapper(); - - bool hasQuality() const { return hasQuality_; } - - // returns pointer to GPU memory - const SiPixelROCsStatusAndMapping *getGPUProductAsync(cudaStream_t cudaStream) const; - - // returns pointer to GPU memory - const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; - cms::cuda::device::unique_ptr getModToUnpRegionalAsync(std::set const &modules, - cudaStream_t cudaStream) const; - -private: - const SiPixelFedCablingMap *cablingMap_; - std::vector> modToUnpDefault; - unsigned int size; - bool hasQuality_; - - SiPixelROCsStatusAndMapping *cablingMapHost = nullptr; // pointer to struct in CPU - - struct GPUData { - ~GPUData(); - SiPixelROCsStatusAndMapping *cablingMapDevice = nullptr; // pointer to struct in GPU - }; - cms::cuda::ESProduct gpuData_; - - struct ModulesToUnpack { - ~ModulesToUnpack(); - unsigned char *modToUnpDefault = nullptr; // pointer to GPU - }; - cms::cuda::ESProduct modToUnp_; -}; - -#endif // CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc deleted file mode 100644 index 9c37860ca9ffe..0000000000000 --- a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include - -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" -#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" -#include "FWCore/Framework/interface/ESProducer.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/ModuleFactory.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "RecoTracker/Record/interface/CkfComponentsRecord.h" // TODO: eventually use something more limited - -class SiPixelROCsStatusAndMappingWrapperESProducer : public edm::ESProducer { -public: - explicit SiPixelROCsStatusAndMappingWrapperESProducer(const edm::ParameterSet& iConfig); - std::unique_ptr produce(const CkfComponentsRecord& iRecord); - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - edm::ESGetToken cablingMapToken_; - edm::ESGetToken qualityToken_; - edm::ESGetToken geometryToken_; - bool useQuality_; -}; - -SiPixelROCsStatusAndMappingWrapperESProducer::SiPixelROCsStatusAndMappingWrapperESProducer( - const edm::ParameterSet& iConfig) - : useQuality_(iConfig.getParameter("UseQualityInfo")) { - auto const& component = iConfig.getParameter("ComponentName"); - auto cc = setWhatProduced(this, component); - cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); - if (useQuality_) { - qualityToken_ = cc.consumes(); - } - geometryToken_ = cc.consumes(); -} - -void SiPixelROCsStatusAndMappingWrapperESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("ComponentName", ""); - desc.add("CablingMapLabel", "")->setComment("CablingMap label"); - desc.add("UseQualityInfo", false); - descriptions.addWithDefaultLabel(desc); -} - -std::unique_ptr SiPixelROCsStatusAndMappingWrapperESProducer::produce( - const CkfComponentsRecord& iRecord) { - auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); - - const SiPixelQuality* quality = nullptr; - if (useQuality_) { - auto qualityInfo = iRecord.getTransientHandle(qualityToken_); - quality = qualityInfo.product(); - } - - auto geom = iRecord.getTransientHandle(geometryToken_); - - return std::make_unique(*cablingMap, *geom, quality); -} - -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" - -DEFINE_FWK_EVENTSETUP_MODULE(SiPixelROCsStatusAndMappingWrapperESProducer); diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc deleted file mode 100644 index 45767102b5958..0000000000000 --- a/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "FWCore/Utilities/interface/typelookup.h" - -TYPELOOKUP_DATA_REG(SiPixelROCsStatusAndMappingWrapper); diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc deleted file mode 100644 index 665d31b97ead2..0000000000000 --- a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc +++ /dev/null @@ -1,171 +0,0 @@ -// C++ includes -#include -#include -#include -#include - -// CUDA includes -#include - -// CMSSW includes -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "Geometry/CommonDetUnit/interface/GeomDetType.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const& cablingMap, - TrackerGeometry const& trackerGeom, - SiPixelQuality const* badPixelInfo) - : cablingMap_(&cablingMap), modToUnpDefault(pixelgpudetails::MAX_SIZE), hasQuality_(badPixelInfo != nullptr) { - cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelROCsStatusAndMapping))); - - std::vector const& fedIds = cablingMap.fedIds(); - std::unique_ptr const& cabling = cablingMap.cablingTree(); - - unsigned int startFed = *(fedIds.begin()); - unsigned int endFed = *(fedIds.end() - 1); - - sipixelobjects::CablingPathToDetUnit path; - int index = 1; - - for (unsigned int fed = startFed; fed <= endFed; fed++) { - for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { - for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { - path = {fed, link, roc}; - const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); - cablingMapHost->fed[index] = fed; - cablingMapHost->link[index] = link; - cablingMapHost->roc[index] = roc; - if (pixelRoc != nullptr) { - cablingMapHost->rawId[index] = pixelRoc->rawId(); - cablingMapHost->rocInDet[index] = pixelRoc->idInDetUnit(); - modToUnpDefault[index] = false; - if (badPixelInfo != nullptr) - cablingMapHost->badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); - else - cablingMapHost->badRocs[index] = false; - } else { // store some dummy number - cablingMapHost->rawId[index] = gpuClustering::invalidModuleId; - cablingMapHost->rocInDet[index] = gpuClustering::invalidModuleId; - cablingMapHost->badRocs[index] = true; - modToUnpDefault[index] = true; - } - index++; - } - } - } // end of FED loop - - // Given FedId, Link and idinLnk; use the following formula - // to get the rawId and idinDU - // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; - // where, MAX_LINK = 48, MAX_ROC = 8 for Phase1 as mentioned Danek's email - // FedID varies between 1200 to 1338 (In total 108 FED's) - // Link varies between 1 to 48 - // idinLnk varies between 1 to 8 - - for (int i = 1; i < index; i++) { - if (cablingMapHost->rawId[i] == gpuClustering::invalidModuleId) { - cablingMapHost->moduleId[i] = gpuClustering::invalidModuleId; - } else { - /* - std::cout << cablingMapHost->rawId[i] << std::endl; - */ - auto gdet = trackerGeom.idToDetUnit(cablingMapHost->rawId[i]); - if (!gdet) { - LogDebug("SiPixelROCsStatusAndMapping") << " Not found: " << cablingMapHost->rawId[i] << std::endl; - continue; - } - cablingMapHost->moduleId[i] = gdet->index(); - } - LogDebug("SiPixelROCsStatusAndMapping") - << "----------------------------------------------------------------------------" << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") - << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) << cablingMapHost->link[i] << std::setw(20) - << cablingMapHost->roc[i] << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") - << i << std::setw(20) << cablingMapHost->rawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] - << std::setw(20) << cablingMapHost->moduleId[i] << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") - << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") - << "----------------------------------------------------------------------------" << std::endl; - } - - cablingMapHost->size = index - 1; -} - -SiPixelROCsStatusAndMappingWrapper::~SiPixelROCsStatusAndMappingWrapper() { cudaCheck(cudaFreeHost(cablingMapHost)); } - -const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync( - cudaStream_t cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { - // allocate - cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelROCsStatusAndMapping))); - - // transfer - cudaCheck(cudaMemcpyAsync( - data.cablingMapDevice, this->cablingMapHost, sizeof(SiPixelROCsStatusAndMapping), cudaMemcpyDefault, stream)); - }); - return data.cablingMapDevice; -} - -const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { - const auto& data = - modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cudaStream_t stream) { - cudaCheck(cudaMalloc((void**)&data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL)); - cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, - this->modToUnpDefault.data(), - this->modToUnpDefault.size() * sizeof(unsigned char), - cudaMemcpyDefault, - stream)); - }); - return data.modToUnpDefault; -} - -cms::cuda::device::unique_ptr SiPixelROCsStatusAndMappingWrapper::getModToUnpRegionalAsync( - std::set const& modules, cudaStream_t cudaStream) const { - auto modToUnpDevice = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); - auto modToUnpHost = cms::cuda::make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); - - std::vector const& fedIds = cablingMap_->fedIds(); - std::unique_ptr const& cabling = cablingMap_->cablingTree(); - - unsigned int startFed = *(fedIds.begin()); - unsigned int endFed = *(fedIds.end() - 1); - - sipixelobjects::CablingPathToDetUnit path; - int index = 1; - - for (unsigned int fed = startFed; fed <= endFed; fed++) { - for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { - for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { - path = {fed, link, roc}; - const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); - if (pixelRoc != nullptr) { - modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); - } else { // store some dummy number - modToUnpHost[index] = true; - } - index++; - } - } - } - - cudaCheck(cudaMemcpyAsync(modToUnpDevice.get(), - modToUnpHost.get(), - pixelgpudetails::MAX_SIZE * sizeof(unsigned char), - cudaMemcpyHostToDevice, - cudaStream)); - return modToUnpDevice; -} - -SiPixelROCsStatusAndMappingWrapper::GPUData::~GPUData() { cudaCheck(cudaFree(cablingMapDevice)); } - -SiPixelROCsStatusAndMappingWrapper::ModulesToUnpack::~ModulesToUnpack() { cudaCheck(cudaFree(modToUnpDefault)); } diff --git a/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml b/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml index 79925fdcb6cf8..7606931353d8d 100644 --- a/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml +++ b/DQM/SiPixelHeterogeneous/plugins/BuildFile.xml @@ -9,7 +9,4 @@ - - - diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoA.cc deleted file mode 100644 index 6e2a908b59b38..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHitsSoA.cc +++ /dev/null @@ -1,254 +0,0 @@ -// -*- C++ -*- -// Package: SiPixelCompareRecHitsSoA -// Class: SiPixelCompareRecHitsSoA -// -/**\class SiPixelCompareRecHitsSoA SiPixelCompareRecHitsSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury, Alessandro Rossi -// -#include "DataFormats/Math/interface/approx_atan2.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -// Geometry -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" -#include "Geometry/CommonTopologies/interface/PixelTopology.h" -#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" - -template -class SiPixelCompareRecHitsSoA : public DQMEDAnalyzer { -public: - using HitSoA = TrackingRecHitSoAView; - using HitsOnHost = TrackingRecHitSoAHost; - - explicit SiPixelCompareRecHitsSoA(const edm::ParameterSet&); - ~SiPixelCompareRecHitsSoA() override = default; - void dqmBeginRun(const edm::Run&, const edm::EventSetup&) override; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - const edm::ESGetToken geomToken_; - const edm::ESGetToken topoToken_; - const edm::EDGetTokenT tokenSoAHitsCPU_; //these two are both on CPU but originally they have been - const edm::EDGetTokenT tokenSoAHitsGPU_; //produced on CPU or on GPU - const std::string topFolderName_; - const float mind2cut_; - static constexpr uint32_t invalidHit_ = std::numeric_limits::max(); - static constexpr float micron_ = 10000.; - const TrackerGeometry* tkGeom_ = nullptr; - const TrackerTopology* tTopo_ = nullptr; - MonitorElement* hnHits_; - MonitorElement* hBchargeL_[4]; // max 4 barrel hits - MonitorElement* hBsizexL_[4]; - MonitorElement* hBsizeyL_[4]; - MonitorElement* hBposxL_[4]; - MonitorElement* hBposyL_[4]; - MonitorElement* hFchargeD_[2][12]; // max 12 endcap disks - MonitorElement* hFsizexD_[2][12]; - MonitorElement* hFsizeyD_[2][12]; - MonitorElement* hFposxD_[2][12]; - MonitorElement* hFposyD_[2][12]; - //differences - MonitorElement* hBchargeDiff_; - MonitorElement* hFchargeDiff_; - MonitorElement* hBsizeXDiff_; - MonitorElement* hFsizeXDiff_; - MonitorElement* hBsizeYDiff_; - MonitorElement* hFsizeYDiff_; - MonitorElement* hBposXDiff_; - MonitorElement* hFposXDiff_; - MonitorElement* hBposYDiff_; - MonitorElement* hFposYDiff_; -}; -// -// constructors -// - -template -SiPixelCompareRecHitsSoA::SiPixelCompareRecHitsSoA(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), - topoToken_(esConsumes()), - tokenSoAHitsCPU_(consumes(iConfig.getParameter("pixelHitsSrcCPU"))), - tokenSoAHitsGPU_(consumes(iConfig.getParameter("pixelHitsSrcGPU"))), - topFolderName_(iConfig.getParameter("topFolderName")), - mind2cut_(iConfig.getParameter("minD2cut")) {} -// -// Begin Run -// -template -void SiPixelCompareRecHitsSoA::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { - tkGeom_ = &iSetup.getData(geomToken_); - tTopo_ = &iSetup.getData(topoToken_); -} - -// -// -- Analyze -// -template -void SiPixelCompareRecHitsSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - const auto& rhsoaHandleCPU = iEvent.getHandle(tokenSoAHitsCPU_); - const auto& rhsoaHandleGPU = iEvent.getHandle(tokenSoAHitsGPU_); - if (not rhsoaHandleCPU or not rhsoaHandleGPU) { - edm::LogWarning out("SiPixelCompareRecHitSoA"); - if (not rhsoaHandleCPU) { - out << "reference (CPU) rechits not found; "; - } - if (not rhsoaHandleGPU) { - out << "target (GPU) rechits not found; "; - } - out << "the comparison will not run."; - return; - } - - auto const& rhsoaCPU = *rhsoaHandleCPU; - auto const& rhsoaGPU = *rhsoaHandleGPU; - - auto const& soa2dCPU = rhsoaCPU.const_view(); - auto const& soa2dGPU = rhsoaGPU.const_view(); - - uint32_t nHitsCPU = soa2dCPU.nHits(); - uint32_t nHitsGPU = soa2dGPU.nHits(); - - hnHits_->Fill(nHitsCPU, nHitsGPU); - auto detIds = tkGeom_->detUnitIds(); - for (uint32_t i = 0; i < nHitsCPU; i++) { - float minD = mind2cut_; - uint32_t matchedHit = invalidHit_; - uint16_t indCPU = soa2dCPU[i].detectorIndex(); - float xLocalCPU = soa2dCPU[i].xLocal(); - float yLocalCPU = soa2dCPU[i].yLocal(); - for (uint32_t j = 0; j < nHitsGPU; j++) { - if (soa2dGPU.detectorIndex(j) == indCPU) { - float dx = xLocalCPU - soa2dGPU[j].xLocal(); - float dy = yLocalCPU - soa2dGPU[j].yLocal(); - float distance = dx * dx + dy * dy; - if (distance < minD) { - minD = distance; - matchedHit = j; - } - } - } - DetId id = detIds[indCPU]; - uint32_t chargeCPU = soa2dCPU[i].chargeAndStatus().charge; - int16_t sizeXCPU = std::ceil(float(std::abs(soa2dCPU[i].clusterSizeX()) / 8.)); - int16_t sizeYCPU = std::ceil(float(std::abs(soa2dCPU[i].clusterSizeY()) / 8.)); - uint32_t chargeGPU = 0; - int16_t sizeXGPU = -99; - int16_t sizeYGPU = -99; - float xLocalGPU = -999.; - float yLocalGPU = -999.; - if (matchedHit != invalidHit_) { - chargeGPU = soa2dGPU[matchedHit].chargeAndStatus().charge; - sizeXGPU = std::ceil(float(std::abs(soa2dGPU[matchedHit].clusterSizeX()) / 8.)); - sizeYGPU = std::ceil(float(std::abs(soa2dGPU[matchedHit].clusterSizeY()) / 8.)); - xLocalGPU = soa2dGPU[matchedHit].xLocal(); - yLocalGPU = soa2dGPU[matchedHit].yLocal(); - } - switch (id.subdetId()) { - case PixelSubdetector::PixelBarrel: - hBchargeL_[tTopo_->pxbLayer(id) - 1]->Fill(chargeCPU, chargeGPU); - hBsizexL_[tTopo_->pxbLayer(id) - 1]->Fill(sizeXCPU, sizeXGPU); - hBsizeyL_[tTopo_->pxbLayer(id) - 1]->Fill(sizeYCPU, sizeYGPU); - hBposxL_[tTopo_->pxbLayer(id) - 1]->Fill(xLocalCPU, xLocalGPU); - hBposyL_[tTopo_->pxbLayer(id) - 1]->Fill(yLocalCPU, yLocalGPU); - hBchargeDiff_->Fill(chargeCPU - chargeGPU); - hBsizeXDiff_->Fill(sizeXCPU - sizeXGPU); - hBsizeYDiff_->Fill(sizeYCPU - sizeYGPU); - hBposXDiff_->Fill(micron_ * (xLocalCPU - xLocalGPU)); - hBposYDiff_->Fill(micron_ * (yLocalCPU - yLocalGPU)); - break; - case PixelSubdetector::PixelEndcap: - hFchargeD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(chargeCPU, chargeGPU); - hFsizexD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeXCPU, sizeXGPU); - hFsizeyD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeYCPU, sizeYGPU); - hFposxD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(xLocalCPU, xLocalGPU); - hFposyD_[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(yLocalCPU, yLocalGPU); - hFchargeDiff_->Fill(chargeCPU - chargeGPU); - hFsizeXDiff_->Fill(sizeXCPU - sizeXGPU); - hFsizeYDiff_->Fill(sizeYCPU - sizeYGPU); - hFposXDiff_->Fill(micron_ * (xLocalCPU - xLocalGPU)); - hFposYDiff_->Fill(micron_ * (yLocalCPU - yLocalGPU)); - break; - } - } -} - -// -// -- Book Histograms -// -template -void SiPixelCompareRecHitsSoA::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - iBook.cd(); - iBook.setCurrentFolder(topFolderName_); - - // clang-format off - //Global - hnHits_ = iBook.book2I("nHits", "CPUvsGPU RecHits per event;#CPU RecHits;#GPU RecHits", 200, 0, 5000,200, 0, 5000); - //Barrel Layer - for(unsigned int il=0;ilnumberOfLayers(PixelSubdetector::PixelBarrel);il++){ - hBchargeL_[il] = iBook.book2I(Form("recHitsBLay%dCharge",il+1), Form("CPUvsGPU RecHits Charge Barrel Layer%d;CPU Charge;GPU Charge",il+1), 250, 0, 100000, 250, 0, 100000); - hBsizexL_[il] = iBook.book2I(Form("recHitsBLay%dSizex",il+1), Form("CPUvsGPU RecHits SizeX Barrel Layer%d;CPU SizeX;GPU SizeX",il+1), 30, 0, 30, 30, 0, 30); - hBsizeyL_[il] = iBook.book2I(Form("recHitsBLay%dSizey",il+1), Form("CPUvsGPU RecHits SizeY Barrel Layer%d;CPU SizeY;GPU SizeY",il+1), 30, 0, 30, 30, 0, 30); - hBposxL_[il] = iBook.book2D(Form("recHitsBLay%dPosx",il+1), Form("CPUvsGPU RecHits x-pos in Barrel Layer%d;CPU pos x;GPU pos x",il+1), 200, -5, 5, 200,-5,5); - hBposyL_[il] = iBook.book2D(Form("recHitsBLay%dPosy",il+1), Form("CPUvsGPU RecHits y-pos in Barrel Layer%d;CPU pos y;GPU pos y",il+1), 200, -5, 5, 200,-5,5); - } - //Endcaps - //Endcaps Disk - for(int is=0;is<2;is++){ - int sign=is==0? -1:1; - for(unsigned int id=0;idnumberOfLayers(PixelSubdetector::PixelEndcap);id++){ - hFchargeD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dCharge",id*sign+sign), Form("CPUvsGPU RecHits Charge Endcaps Disk%+d;CPU Charge;GPU Charge",id*sign+sign), 250, 0, 100000, 250, 0, 100000); - hFsizexD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dSizex",id*sign+sign), Form("CPUvsGPU RecHits SizeX Endcaps Disk%+d;CPU SizeX;GPU SizeX",id*sign+sign), 30, 0, 30, 30, 0, 30); - hFsizeyD_[is][id] = iBook.book2I(Form("recHitsFDisk%+dSizey",id*sign+sign), Form("CPUvsGPU RecHits SizeY Endcaps Disk%+d;CPU SizeY;GPU SizeY",id*sign+sign), 30, 0, 30, 30, 0, 30); - hFposxD_[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosx",id*sign+sign), Form("CPUvsGPU RecHits x-pos Endcaps Disk%+d;CPU pos x;GPU pos x",id*sign+sign), 200, -5, 5, 200, -5, 5); - hFposyD_[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosy",id*sign+sign), Form("CPUvsGPU RecHits y-pos Endcaps Disk%+d;CPU pos y;GPU pos y",id*sign+sign), 200, -5, 5, 200, -5, 5); - } - } - //1D differences - hBchargeDiff_ = iBook.book1D("rechitChargeDiffBpix","Charge differnce of rechits in BPix; rechit charge difference (CPU - GPU)", 101, -50.5, 50.5); - hFchargeDiff_ = iBook.book1D("rechitChargeDiffFpix","Charge differnce of rechits in FPix; rechit charge difference (CPU - GPU)", 101, -50.5, 50.5); - hBsizeXDiff_ = iBook.book1D("rechitsizeXDiffBpix","SizeX difference of rechits in BPix; rechit sizex difference (CPU - GPU)", 21, -10.5, 10.5); - hFsizeXDiff_ = iBook.book1D("rechitsizeXDiffFpix","SizeX difference of rechits in FPix; rechit sizex difference (CPU - GPU)", 21, -10.5, 10.5); - hBsizeYDiff_ = iBook.book1D("rechitsizeYDiffBpix","SizeY difference of rechits in BPix; rechit sizey difference (CPU - GPU)", 21, -10.5, 10.5); - hFsizeYDiff_ = iBook.book1D("rechitsizeYDiffFpix","SizeY difference of rechits in FPix; rechit sizey difference (CPU - GPU)", 21, -10.5, 10.5); - hBposXDiff_ = iBook.book1D("rechitsposXDiffBpix","x-position difference of rechits in BPix; rechit x-pos difference (CPU - GPU)", 1000, -10, 10); - hFposXDiff_ = iBook.book1D("rechitsposXDiffFpix","x-position difference of rechits in FPix; rechit x-pos difference (CPU - GPU)", 1000, -10, 10); - hBposYDiff_ = iBook.book1D("rechitsposYDiffBpix","y-position difference of rechits in BPix; rechit y-pos difference (CPU - GPU)", 1000, -10, 10); - hFposYDiff_ = iBook.book1D("rechitsposYDiffFpix","y-position difference of rechits in FPix; rechit y-pos difference (CPU - GPU)", 1000, -10, 10); -} - -template -void SiPixelCompareRecHitsSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelRecHitsSoA - edm::ParameterSetDescription desc; - desc.add("pixelHitsSrcCPU", edm::InputTag("siPixelRecHitsPreSplittingSoA@cpu")); - desc.add("pixelHitsSrcGPU", edm::InputTag("siPixelRecHitsPreSplittingSoA@cuda")); - desc.add("topFolderName", "SiPixelHeterogeneous/PixelRecHitsCompareGPUvsCPU"); - desc.add("minD2cut", 0.0001); - descriptions.addWithDefaultLabel(desc); -} - -using SiPixelPhase1CompareRecHitsSoA = SiPixelCompareRecHitsSoA; -using SiPixelPhase2CompareRecHitsSoA = SiPixelCompareRecHitsSoA; -using SiPixelHIonPhase1CompareRecHitsSoA = SiPixelCompareRecHitsSoA; - -DEFINE_FWK_MODULE(SiPixelPhase1CompareRecHitsSoA); -DEFINE_FWK_MODULE(SiPixelPhase2CompareRecHitsSoA); -DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareRecHitsSoA); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoA.cc deleted file mode 100644 index f3635d6df45da..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoA.cc +++ /dev/null @@ -1,368 +0,0 @@ -// -*- C++ -*- -// Package: SiPixelCompareTrackSoA -// Class: SiPixelCompareTrackSoA -// -/**\class SiPixelCompareTrackSoA SiPixelCompareTrackSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury -// -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/Math/interface/deltaR.h" -#include "DataFormats/Math/interface/deltaPhi.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/InputTag.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -// for string manipulations -#include - -namespace { - // same logic used for the MTV: - // cf https://github.com/cms-sw/cmssw/blob/master/Validation/RecoTrack/src/MTVHistoProducerAlgoForTracker.cc - typedef dqm::reco::DQMStore DQMStore; - - void setBinLog(TAxis* axis) { - int bins = axis->GetNbins(); - float from = axis->GetXmin(); - float to = axis->GetXmax(); - float width = (to - from) / bins; - std::vector new_bins(bins + 1, 0); - for (int i = 0; i <= bins; i++) { - new_bins[i] = TMath::Power(10, from + i * width); - } - axis->Set(bins, new_bins.data()); - } - - void setBinLogX(TH1* h) { - TAxis* axis = h->GetXaxis(); - setBinLog(axis); - } - void setBinLogY(TH1* h) { - TAxis* axis = h->GetYaxis(); - setBinLog(axis); - } - - template - dqm::reco::MonitorElement* make2DIfLog(DQMStore::IBooker& ibook, bool logx, bool logy, Args&&... args) { - auto h = std::make_unique(std::forward(args)...); - if (logx) - setBinLogX(h.get()); - if (logy) - setBinLogY(h.get()); - const auto& name = h->GetName(); - return ibook.book2I(name, h.release()); - } -} // namespace - -template -class SiPixelCompareTrackSoA : public DQMEDAnalyzer { -public: - using PixelTrackSoA = TrackSoAHeterogeneousHost; - - explicit SiPixelCompareTrackSoA(const edm::ParameterSet&); - ~SiPixelCompareTrackSoA() override = default; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - const edm::EDGetTokenT tokenSoATrackCPU_; - const edm::EDGetTokenT tokenSoATrackGPU_; - const std::string topFolderName_; - const bool useQualityCut_; - const pixelTrack::Quality minQuality_; - const float dr2cut_; - MonitorElement* hnTracks_; - MonitorElement* hnLooseAndAboveTracks_; - MonitorElement* hnLooseAndAboveTracks_matched_; - MonitorElement* hDeltaNTracks_; - MonitorElement* hDeltaNLooseAndAboveTracks_; - MonitorElement* hDeltaNLooseAndAboveTracks_matched_; - MonitorElement* hnHits_; - MonitorElement* hnHitsVsPhi_; - MonitorElement* hnHitsVsEta_; - MonitorElement* hnLayers_; - MonitorElement* hnLayersVsPhi_; - MonitorElement* hnLayersVsEta_; - MonitorElement* hCharge_; - MonitorElement* hchi2_; - MonitorElement* hChi2VsPhi_; - MonitorElement* hChi2VsEta_; - MonitorElement* hpt_; - MonitorElement* hCurvature_; - MonitorElement* hptLogLog_; - MonitorElement* heta_; - MonitorElement* hphi_; - MonitorElement* hz_; - MonitorElement* htip_; - MonitorElement* hquality_; - //1D differences - MonitorElement* hptdiffMatched_; - MonitorElement* hCurvdiffMatched_; - MonitorElement* hetadiffMatched_; - MonitorElement* hphidiffMatched_; - MonitorElement* hzdiffMatched_; - MonitorElement* htipdiffMatched_; - - //for matching eff vs region: derive the ratio at harvesting - MonitorElement* hpt_eta_tkAllRef_; - MonitorElement* hpt_eta_tkAllRefMatched_; - MonitorElement* hphi_z_tkAllRef_; - MonitorElement* hphi_z_tkAllRefMatched_; -}; - -// -// constructors -// - -template -SiPixelCompareTrackSoA::SiPixelCompareTrackSoA(const edm::ParameterSet& iConfig) - : tokenSoATrackCPU_(consumes(iConfig.getParameter("pixelTrackSrcCPU"))), - tokenSoATrackGPU_(consumes(iConfig.getParameter("pixelTrackSrcGPU"))), - topFolderName_(iConfig.getParameter("topFolderName")), - useQualityCut_(iConfig.getParameter("useQualityCut")), - minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))), - dr2cut_(iConfig.getParameter("deltaR2cut")) {} - -// -// -- Analyze -// -template -void SiPixelCompareTrackSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - using helper = TracksUtilities; - const auto& tsoaHandleCPU = iEvent.getHandle(tokenSoATrackCPU_); - const auto& tsoaHandleGPU = iEvent.getHandle(tokenSoATrackGPU_); - if (not tsoaHandleCPU or not tsoaHandleGPU) { - edm::LogWarning out("SiPixelCompareTrackSoA"); - if (not tsoaHandleCPU) { - out << "reference (cpu) tracks not found; "; - } - if (not tsoaHandleGPU) { - out << "target (gpu) tracks not found; "; - } - out << "the comparison will not run."; - return; - } - - auto const& tsoaCPU = *tsoaHandleCPU; - auto const& tsoaGPU = *tsoaHandleGPU; - auto maxTracksCPU = tsoaCPU.view().metadata().size(); //this should be same for both? - auto maxTracksGPU = tsoaGPU.view().metadata().size(); //this should be same for both? - auto const* qualityCPU = tsoaCPU.view().quality(); - auto const* qualityGPU = tsoaGPU.view().quality(); - int32_t nTracksCPU = 0; - int32_t nTracksGPU = 0; - int32_t nLooseAndAboveTracksCPU = 0; - int32_t nLooseAndAboveTracksCPU_matchedGPU = 0; - int32_t nLooseAndAboveTracksGPU = 0; - - //Loop over GPU tracks and store the indices of the loose tracks. Whats happens if useQualityCut_ is false? - std::vector looseTrkidxGPU; - for (int32_t jt = 0; jt < maxTracksGPU; ++jt) { - if (helper::nHits(tsoaGPU.view(), jt) == 0) - break; // this is a guard - if (!(tsoaGPU.view()[jt].pt() > 0.)) - continue; - nTracksGPU++; - if (useQualityCut_ && qualityGPU[jt] < minQuality_) - continue; - nLooseAndAboveTracksGPU++; - looseTrkidxGPU.emplace_back(jt); - } - - //Now loop over CPU tracks//nested loop for loose gPU tracks - for (int32_t it = 0; it < maxTracksCPU; ++it) { - int nHitsCPU = helper::nHits(tsoaCPU.view(), it); - - if (nHitsCPU == 0) - break; // this is a guard - - float ptCPU = tsoaCPU.view()[it].pt(); - float etaCPU = tsoaCPU.view()[it].eta(); - float phiCPU = helper::phi(tsoaCPU.view(), it); - float zipCPU = helper::zip(tsoaCPU.view(), it); - float tipCPU = helper::tip(tsoaCPU.view(), it); - auto qCPU = helper::charge(tsoaCPU.view(), it); - - if (!(ptCPU > 0.)) - continue; - nTracksCPU++; - if (useQualityCut_ && qualityCPU[it] < minQuality_) - continue; - nLooseAndAboveTracksCPU++; - //Now loop over loose GPU trk and find the closest in DeltaR//do we need pt cut? - const int32_t notFound = -1; - int32_t closestTkidx = notFound; - float mindr2 = dr2cut_; - - for (auto gid : looseTrkidxGPU) { - float etaGPU = tsoaGPU.view()[gid].eta(); - float phiGPU = helper::phi(tsoaGPU.view(), gid); - float dr2 = reco::deltaR2(etaCPU, phiCPU, etaGPU, phiGPU); - if (dr2 > dr2cut_) - continue; // this is arbitrary - if (mindr2 > dr2) { - mindr2 = dr2; - closestTkidx = gid; - } - } - - hpt_eta_tkAllRef_->Fill(etaCPU, ptCPU); //all CPU tk - hphi_z_tkAllRef_->Fill(phiCPU, zipCPU); - if (closestTkidx == notFound) - continue; - nLooseAndAboveTracksCPU_matchedGPU++; - - hchi2_->Fill(tsoaCPU.view()[it].chi2(), tsoaGPU.view()[closestTkidx].chi2()); - hCharge_->Fill(qCPU, helper::charge(tsoaGPU.view(), closestTkidx)); - hnHits_->Fill(helper::nHits(tsoaCPU.view(), it), helper::nHits(tsoaGPU.view(), closestTkidx)); - hnLayers_->Fill(tsoaCPU.view()[it].nLayers(), tsoaGPU.view()[closestTkidx].nLayers()); - hpt_->Fill(ptCPU, tsoaGPU.view()[closestTkidx].pt()); - hCurvature_->Fill(qCPU / ptCPU, helper::charge(tsoaGPU.view(), closestTkidx) / tsoaGPU.view()[closestTkidx].pt()); - hptLogLog_->Fill(ptCPU, tsoaGPU.view()[closestTkidx].pt()); - heta_->Fill(etaCPU, tsoaGPU.view()[closestTkidx].eta()); - hphi_->Fill(phiCPU, helper::phi(tsoaGPU.view(), closestTkidx)); - hz_->Fill(zipCPU, helper::zip(tsoaGPU.view(), closestTkidx)); - htip_->Fill(tipCPU, helper::tip(tsoaGPU.view(), closestTkidx)); - hptdiffMatched_->Fill(ptCPU - tsoaGPU.view()[closestTkidx].pt()); - hCurvdiffMatched_->Fill((helper::charge(tsoaCPU.view(), it) / tsoaCPU.view()[it].pt()) - - (helper::charge(tsoaGPU.view(), closestTkidx) / tsoaGPU.view()[closestTkidx].pt())); - hetadiffMatched_->Fill(etaCPU - tsoaGPU.view()[closestTkidx].eta()); - hphidiffMatched_->Fill(reco::deltaPhi(phiCPU, helper::phi(tsoaGPU.view(), closestTkidx))); - hzdiffMatched_->Fill(zipCPU - helper::zip(tsoaGPU.view(), closestTkidx)); - htipdiffMatched_->Fill(tipCPU - helper::tip(tsoaGPU.view(), closestTkidx)); - hpt_eta_tkAllRefMatched_->Fill(etaCPU, tsoaCPU.view()[it].pt()); //matched to gpu - hphi_z_tkAllRefMatched_->Fill(etaCPU, zipCPU); - } - - // Define a lambda function for filling the histograms - auto fillHistogram = [](auto& histogram, auto xValue, auto yValue) { histogram->Fill(xValue, yValue); }; - - // Define a lambda for filling delta histograms - auto fillDeltaHistogram = [](auto& histogram, int cpuValue, int gpuValue) { - histogram->Fill(std::min(cpuValue, 1000), std::clamp(gpuValue - cpuValue, -100, 100)); - }; - - // Fill the histograms - fillHistogram(hnTracks_, nTracksCPU, nTracksGPU); - fillHistogram(hnLooseAndAboveTracks_, nLooseAndAboveTracksCPU, nLooseAndAboveTracksGPU); - fillHistogram(hnLooseAndAboveTracks_matched_, nLooseAndAboveTracksCPU, nLooseAndAboveTracksCPU_matchedGPU); - - fillDeltaHistogram(hDeltaNTracks_, nTracksCPU, nTracksGPU); - fillDeltaHistogram(hDeltaNLooseAndAboveTracks_, nLooseAndAboveTracksCPU, nLooseAndAboveTracksGPU); - fillDeltaHistogram(hDeltaNLooseAndAboveTracks_matched_, nLooseAndAboveTracksCPU, nLooseAndAboveTracksCPU_matchedGPU); -} - -// -// -- Book Histograms -// -template -void SiPixelCompareTrackSoA::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - iBook.cd(); - iBook.setCurrentFolder(topFolderName_); - - // Define a helper function for booking histograms - std::string toRep = "Number of tracks"; - auto bookTracksTH2I = [&](const std::string& name, - const std::string& title, - int xBins, - double xMin, - double xMax, - int yBins, - double yMin, - double yMax) { - return iBook.book2I(name, fmt::sprintf(title, toRep), xBins, xMin, xMax, yBins, yMin, yMax); - }; - - // Define common parameters for different histogram types - constexpr int xBins = 501; - constexpr double xMin = -0.5; - constexpr double xMax = 1001.5; - - constexpr int dXBins = 1001; - constexpr double dXMin = -0.5; - constexpr double dXMax = 1000.5; - - constexpr int dYBins = 201; - constexpr double dYMin = -100.5; - constexpr double dYMax = 100.5; - - // FIXME: all the 2D correlation plots are quite heavy in terms of memory consumption, so a as soon as DQM supports THnSparse - // these should be moved to a less resource consuming format - - // Book histograms using the helper function - // clang-format off - hnTracks_ = bookTracksTH2I("nTracks", "%s per event; Reference; Target", xBins, xMin, xMax, xBins, xMin, xMax); - hnLooseAndAboveTracks_ = bookTracksTH2I("nLooseAndAboveTracks", "%s (quality #geq loose) per event; Reference; Target", xBins, xMin, xMax, xBins, xMin, xMax); - hnLooseAndAboveTracks_matched_ = bookTracksTH2I("nLooseAndAboveTracks_matched", "%s (quality #geq loose) per event; Reference; Target", xBins, xMin, xMax, xBins, xMin, xMax); - - hDeltaNTracks_ = bookTracksTH2I("deltaNTracks", "%s per event; Reference; Target - Reference", dXBins, dXMin, dXMax, dYBins, dYMin, dYMax); - hDeltaNLooseAndAboveTracks_ = bookTracksTH2I("deltaNLooseAndAboveTracks", "%s (quality #geq loose) per event; Reference; Target - Reference", dXBins, dXMin, dXMax, dYBins, dYMin, dYMax); - hDeltaNLooseAndAboveTracks_matched_ = bookTracksTH2I("deltaNLooseAndAboveTracks_matched", "%s (quality #geq loose) per event; Reference; Target - Reference", dXBins, dXMin, dXMax, dYBins, dYMin, dYMax); - - toRep = "Number of all RecHits per track (quality #geq loose)"; - hnHits_ = iBook.book2I("nRecHits", fmt::sprintf("%s;CPU;GPU",toRep), 15, -0.5, 14.5, 15, -0.5, 14.5); - - toRep = "Number of all layers per track (quality #geq loose)"; - hnLayers_ = iBook.book2I("nLayers", fmt::sprintf("%s;CPU;GPU",toRep), 15, -0.5, 14.5, 15, -0.5, 14.5); - - toRep = "Track (quality #geq loose) #chi^{2}/ndof"; - hchi2_ = iBook.book2I("nChi2ndof", fmt::sprintf("%s;CPU;GPU",toRep), 40, 0., 20., 40, 0., 20.); - - toRep = "Track (quality #geq loose) charge"; - hCharge_ = iBook.book2I("charge",fmt::sprintf("%s;CPU;GPU",toRep),3, -1.5, 1.5, 3, -1.5, 1.5); - - hpt_ = iBook.book2I("pt", "Track (quality #geq loose) p_{T} [GeV];CPU;GPU", 200, 0., 200., 200, 0., 200.); - hCurvature_ = iBook.book2I("curvature", "Track (quality #geq loose) q/p_{T} [GeV^{-1}];CPU;GPU", 60,- 3., 3., 60, -3., 3. ); - hptLogLog_ = make2DIfLog(iBook, true, true, "ptLogLog", "Track (quality #geq loose) p_{T} [GeV];CPU;GPU", 200, log10(0.5), log10(200.), 200, log10(0.5), log10(200.)); - heta_ = iBook.book2I("eta", "Track (quality #geq loose) #eta;CPU;GPU", 30, -3., 3., 30, -3., 3.); - hphi_ = iBook.book2I("phi", "Track (quality #geq loose) #phi;CPU;GPU", 30, -M_PI, M_PI, 30, -M_PI, M_PI); - hz_ = iBook.book2I("z", "Track (quality #geq loose) z [cm];CPU;GPU", 30, -30., 30., 30, -30., 30.); - htip_ = iBook.book2I("tip", "Track (quality #geq loose) TIP [cm];CPU;GPU", 100, -0.5, 0.5, 100, -0.5, 0.5); - //1D difference plots - hptdiffMatched_ = iBook.book1D("ptdiffmatched", " p_{T} diff [GeV] between matched tracks; #Delta p_{T} [GeV]", 61, -30.5, 30.5); - hCurvdiffMatched_ = iBook.book1D("curvdiffmatched", "q/p_{T} diff [GeV^{-1}] between matched tracks; #Delta q/p_{T} [GeV^{-1}]", 61, -3.05, 3.05); - hetadiffMatched_ = iBook.book1D("etadiffmatched", " #eta diff between matched tracks; #Delta #eta", 161, -0.045 ,0.045); - hphidiffMatched_ = iBook.book1D("phidiffmatched", " #phi diff between matched tracks; #Delta #phi", 161, -0.045 ,0.045); - hzdiffMatched_ = iBook.book1D("zdiffmatched", " z diff between matched tracks; #Delta z [cm]", 301, -1.55, 1.55); - htipdiffMatched_ = iBook.book1D("tipdiffmatched", " TIP diff between matched tracks; #Delta TIP [cm]", 301, -1.55, 1.55); - //2D plots for eff - hpt_eta_tkAllRef_ = iBook.book2I("ptetatrkAllReference", "Track (quality #geq loose) on CPU; #eta; p_{T} [GeV];", 30, -M_PI, M_PI, 200, 0., 200.); - hpt_eta_tkAllRefMatched_ = iBook.book2I("ptetatrkAllReferencematched", "Track (quality #geq loose) on CPU matched to GPU track; #eta; p_{T} [GeV];", 30, -M_PI, M_PI, 200, 0., 200.); - - hphi_z_tkAllRef_ = iBook.book2I("phiztrkAllReference", "Track (quality #geq loose) on CPU; #phi; z [cm];", 30, -M_PI, M_PI, 30, -30., 30.); - hphi_z_tkAllRefMatched_ = iBook.book2I("phiztrkAllReferencematched", "Track (quality #geq loose) on CPU; #phi; z [cm];", 30, -M_PI, M_PI, 30, -30., 30.); - -} - -template -void SiPixelCompareTrackSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelTrackSoA - edm::ParameterSetDescription desc; - desc.add("pixelTrackSrcCPU", edm::InputTag("pixelTracksSoA@cpu")); - desc.add("pixelTrackSrcGPU", edm::InputTag("pixelTracksSoA@cuda")); - desc.add("topFolderName", "SiPixelHeterogeneous/PixelTrackCompareGPUvsCPU"); - desc.add("useQualityCut", true); - desc.add("minQuality", "loose"); - desc.add("deltaR2cut", 0.02 * 0.02)->setComment("deltaR2 cut between track on CPU and GPU"); - descriptions.addWithDefaultLabel(desc); -} - -using SiPixelPhase1CompareTrackSoA = SiPixelCompareTrackSoA; -using SiPixelPhase2CompareTrackSoA = SiPixelCompareTrackSoA; -using SiPixelHIonPhase1CompareTrackSoA = SiPixelCompareTrackSoA; - -DEFINE_FWK_MODULE(SiPixelPhase1CompareTrackSoA); -DEFINE_FWK_MODULE(SiPixelPhase2CompareTrackSoA); -DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareTrackSoA); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoA.cc deleted file mode 100644 index 7961a17817d98..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoA.cc +++ /dev/null @@ -1,186 +0,0 @@ -// -*- C++ -*- -// Package: SiPixelCompareVertexSoA -// Class: SiPixelCompareVertexSoA -// -/**\class SiPixelCompareVertexSoA SiPixelCompareVertexSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury -// -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "DataFormats/Common/interface/Handle.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" - -class SiPixelCompareVertexSoA : public DQMEDAnalyzer { -public: - using IndToEdm = std::vector; - explicit SiPixelCompareVertexSoA(const edm::ParameterSet&); - ~SiPixelCompareVertexSoA() override = default; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - const edm::EDGetTokenT tokenSoAVertexCPU_; - const edm::EDGetTokenT tokenSoAVertexGPU_; - const edm::EDGetTokenT tokenBeamSpot_; - const std::string topFolderName_; - const float dzCut_; - MonitorElement* hnVertex_; - MonitorElement* hx_; - MonitorElement* hy_; - MonitorElement* hz_; - MonitorElement* hchi2_; - MonitorElement* hchi2oNdof_; - MonitorElement* hptv2_; - MonitorElement* hntrks_; - MonitorElement* hxdiff_; - MonitorElement* hydiff_; - MonitorElement* hzdiff_; -}; - -// -// constructors -// - -// Note tokenSoAVertexGPU_ contains data copied from device to host, hence is a HostCollection -SiPixelCompareVertexSoA::SiPixelCompareVertexSoA(const edm::ParameterSet& iConfig) - : tokenSoAVertexCPU_(consumes(iConfig.getParameter("pixelVertexSrcCPU"))), - tokenSoAVertexGPU_(consumes(iConfig.getParameter("pixelVertexSrcGPU"))), - tokenBeamSpot_(consumes(iConfig.getParameter("beamSpotSrc"))), - topFolderName_(iConfig.getParameter("topFolderName")), - dzCut_(iConfig.getParameter("dzCut")) {} - -// -// -- Analyze -// -void SiPixelCompareVertexSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - const auto& vsoaHandleCPU = iEvent.getHandle(tokenSoAVertexCPU_); - const auto& vsoaHandleGPU = iEvent.getHandle(tokenSoAVertexGPU_); - if (not vsoaHandleCPU or not vsoaHandleGPU) { - edm::LogWarning out("SiPixelCompareVertexSoA"); - if (not vsoaHandleCPU) { - out << "reference (cpu) tracks not found; "; - } - if (not vsoaHandleGPU) { - out << "target (gpu) tracks not found; "; - } - out << "the comparison will not run."; - return; - } - - auto const& vsoaCPU = *vsoaHandleCPU; - int nVerticesCPU = vsoaCPU.view().nvFinal(); - auto const& vsoaGPU = *vsoaHandleGPU; - int nVerticesGPU = vsoaGPU.view().nvFinal(); - - auto bsHandle = iEvent.getHandle(tokenBeamSpot_); - float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; - if (!bsHandle.isValid()) { - edm::LogWarning("SiPixelCompareVertexSoA") << "No beamspot found. returning vertexes with (0,0,Z) "; - } else { - const reco::BeamSpot& bs = *bsHandle; - x0 = bs.x0(); - y0 = bs.y0(); - z0 = bs.z0(); - dxdz = bs.dxdz(); - dydz = bs.dydz(); - } - - for (int ivc = 0; ivc < nVerticesCPU; ivc++) { - auto sic = vsoaCPU.view()[ivc].sortInd(); - auto zc = vsoaCPU.view()[sic].zv(); - auto xc = x0 + dxdz * zc; - auto yc = y0 + dydz * zc; - zc += z0; - - auto ndofCPU = vsoaCPU.view()[sic].ndof(); - auto chi2CPU = vsoaCPU.view()[sic].chi2(); - - const int32_t notFound = -1; - int32_t closestVtxidx = notFound; - float mindz = dzCut_; - - for (int ivg = 0; ivg < nVerticesGPU; ivg++) { - auto sig = vsoaGPU.view()[ivg].sortInd(); - auto zgc = vsoaGPU.view()[sig].zv() + z0; - auto zDist = std::abs(zc - zgc); - //insert some matching condition - if (zDist > dzCut_) - continue; - if (mindz > zDist) { - mindz = zDist; - closestVtxidx = sig; - } - } - if (closestVtxidx == notFound) - continue; - - auto zg = vsoaGPU.view()[closestVtxidx].zv(); - auto xg = x0 + dxdz * zg; - auto yg = y0 + dydz * zg; - zg += z0; - auto ndofGPU = vsoaGPU.view()[closestVtxidx].ndof(); - auto chi2GPU = vsoaGPU.view()[closestVtxidx].chi2(); - - hx_->Fill(xc - x0, xg - x0); - hy_->Fill(yc - y0, yg - y0); - hz_->Fill(zc, zg); - hxdiff_->Fill(xc - xg); - hydiff_->Fill(yc - yg); - hzdiff_->Fill(zc - zg); - hchi2_->Fill(chi2CPU, chi2GPU); - hchi2oNdof_->Fill(chi2CPU / ndofCPU, chi2GPU / ndofGPU); - hptv2_->Fill(vsoaCPU.view()[sic].ptv2(), vsoaGPU.view()[closestVtxidx].ptv2()); - hntrks_->Fill(ndofCPU + 1, ndofGPU + 1); - } - hnVertex_->Fill(nVerticesCPU, nVerticesGPU); -} - -// -// -- Book Histograms -// -void SiPixelCompareVertexSoA::bookHistograms(DQMStore::IBooker& ibooker, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - ibooker.cd(); - ibooker.setCurrentFolder(topFolderName_); - - // FIXME: all the 2D correlation plots are quite heavy in terms of memory consumption, so a as soon as DQM supports either TH2I or THnSparse - // these should be moved to a less resource consuming format - hnVertex_ = ibooker.book2I("nVertex", "# of Vertices;CPU;GPU", 101, -0.5, 100.5, 101, -0.5, 100.5); - hx_ = ibooker.book2I("vx", "Vertez x - Beamspot x;CPU;GPU", 50, -0.1, 0.1, 50, -0.1, 0.1); - hy_ = ibooker.book2I("vy", "Vertez y - Beamspot y;CPU;GPU", 50, -0.1, 0.1, 50, -0.1, 0.1); - hz_ = ibooker.book2I("vz", "Vertez z;CPU;GPU", 30, -30., 30., 30, -30., 30.); - hchi2_ = ibooker.book2I("chi2", "Vertex chi-squared;CPU;GPU", 40, 0., 20., 40, 0., 20.); - hchi2oNdof_ = ibooker.book2I("chi2oNdof", "Vertex chi-squared/Ndof;CPU;GPU", 40, 0., 20., 40, 0., 20.); - hptv2_ = ibooker.book2I("ptsq", "Vertex #sum (p_{T})^{2};CPU;GPU", 200, 0., 200., 200, 0., 200.); - hntrks_ = ibooker.book2I("ntrk", "#tracks associated;CPU;GPU", 100, -0.5, 99.5, 100, -0.5, 99.5); - hntrks_ = ibooker.book2I("ntrk", "#tracks associated;CPU;GPU", 100, -0.5, 99.5, 100, -0.5, 99.5); - hxdiff_ = ibooker.book1D("vxdiff", ";Vertex x difference (CPU - GPU);#entries", 100, -0.001, 0.001); - hydiff_ = ibooker.book1D("vydiff", ";Vertex y difference (CPU - GPU);#entries", 100, -0.001, 0.001); - hzdiff_ = ibooker.book1D("vzdiff", ";Vertex z difference (CPU - GPU);#entries", 100, -2.5, 2.5); -} - -void SiPixelCompareVertexSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelVertexSoA - edm::ParameterSetDescription desc; - desc.add("pixelVertexSrcCPU", edm::InputTag("pixelVerticesSoA@cpu")); - desc.add("pixelVertexSrcGPU", edm::InputTag("pixelVerticesSoA@cuda")); - desc.add("beamSpotSrc", edm::InputTag("offlineBeamSpot")); - desc.add("topFolderName", "SiPixelHeterogeneous/PixelVertexCompareSoAGPUvsCPU"); - desc.add("dzCut", 1.); - descriptions.addWithDefaultLabel(desc); -} - -DEFINE_FWK_MODULE(SiPixelCompareVertexSoA); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoA.cc deleted file mode 100644 index a1feefe53ba58..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoA.cc +++ /dev/null @@ -1,211 +0,0 @@ -// -*- C++ -*- -///bookLayer -// Package: SiPixelMonitorRecHitsSoA -// Class: SiPixelMonitorRecHitsSoA -// -/**\class SiPixelMonitorRecHitsSoA SiPixelMonitorRecHitsSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury, Alessandro Rossi -// -#include "DataFormats/Math/interface/approx_atan2.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -// Geometry -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" -#include "Geometry/CommonTopologies/interface/PixelTopology.h" -#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" - -template -class SiPixelMonitorRecHitsSoA : public DQMEDAnalyzer { -public: - using HitSoA = TrackingRecHitSoAView; - using HitsOnHost = TrackingRecHitSoAHost; - - explicit SiPixelMonitorRecHitsSoA(const edm::ParameterSet&); - ~SiPixelMonitorRecHitsSoA() override = default; - void dqmBeginRun(const edm::Run&, const edm::EventSetup&) override; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - const edm::ESGetToken geomToken_; - const edm::ESGetToken topoToken_; - const edm::EDGetTokenT tokenSoAHitsCPU_; - const std::string topFolderName_; - const TrackerGeometry* tkGeom_ = nullptr; - const TrackerTopology* tTopo_ = nullptr; - MonitorElement* hnHits; - MonitorElement* hBFposZP; - MonitorElement* hBFposZR; - MonitorElement* hBposXY; - MonitorElement* hBposZP; - MonitorElement* hBcharge; - MonitorElement* hBsizex; - MonitorElement* hBsizey; - MonitorElement* hBposZPL[4]; // max 4 barrel hits - MonitorElement* hBchargeL[4]; - MonitorElement* hBsizexL[4]; - MonitorElement* hBsizeyL[4]; - MonitorElement* hFposXY; - MonitorElement* hFposZP; - MonitorElement* hFcharge; - MonitorElement* hFsizex; - MonitorElement* hFsizey; - MonitorElement* hFposXYD[2][12]; // max 12 endcap disks - MonitorElement* hFchargeD[2][12]; - MonitorElement* hFsizexD[2][12]; - MonitorElement* hFsizeyD[2][12]; -}; - -// -// constructors -// - -template -SiPixelMonitorRecHitsSoA::SiPixelMonitorRecHitsSoA(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), - topoToken_(esConsumes()), - tokenSoAHitsCPU_(consumes(iConfig.getParameter("pixelHitsSrc"))), - topFolderName_(iConfig.getParameter("TopFolderName")) {} -// -// Begin Run -// -template -void SiPixelMonitorRecHitsSoA::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { - tkGeom_ = &iSetup.getData(geomToken_); - tTopo_ = &iSetup.getData(topoToken_); -} - -// -// -- Analyze -// -template -void SiPixelMonitorRecHitsSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - const auto& rhsoaHandle = iEvent.getHandle(tokenSoAHitsCPU_); - if (!rhsoaHandle.isValid()) { - edm::LogWarning("SiPixelMonitorRecHitsSoA") << "No RecHits SoA found \n returning!" << std::endl; - return; - } - auto const& rhsoa = *rhsoaHandle; - auto const& soa2d = rhsoa.const_view(); - - uint32_t nHits_ = soa2d.nHits(); - hnHits->Fill(nHits_); - auto detIds = tkGeom_->detUnitIds(); - for (uint32_t i = 0; i < nHits_; i++) { - DetId id = detIds[soa2d[i].detectorIndex()]; - float xG = soa2d[i].xGlobal(); - float yG = soa2d[i].yGlobal(); - float zG = soa2d[i].zGlobal(); - float rG = soa2d[i].rGlobal(); - float fphi = short2phi(soa2d[i].iphi()); - uint32_t charge = soa2d[i].chargeAndStatus().charge; - int16_t sizeX = std::ceil(float(std::abs(soa2d[i].clusterSizeX()) / 8.)); - int16_t sizeY = std::ceil(float(std::abs(soa2d[i].clusterSizeY()) / 8.)); - hBFposZP->Fill(zG, fphi); - int16_t ysign = yG >= 0 ? 1 : -1; - hBFposZR->Fill(zG, rG * ysign); - switch (id.subdetId()) { - case PixelSubdetector::PixelBarrel: - hBposXY->Fill(xG, yG); - hBposZP->Fill(zG, fphi); - hBcharge->Fill(charge); - hBsizex->Fill(sizeX); - hBsizey->Fill(sizeY); - hBposZPL[tTopo_->pxbLayer(id) - 1]->Fill(zG, fphi); - hBchargeL[tTopo_->pxbLayer(id) - 1]->Fill(charge); - hBsizexL[tTopo_->pxbLayer(id) - 1]->Fill(sizeX); - hBsizeyL[tTopo_->pxbLayer(id) - 1]->Fill(sizeY); - break; - case PixelSubdetector::PixelEndcap: - hFposXY->Fill(xG, yG); - hFposZP->Fill(zG, fphi); - hFcharge->Fill(charge); - hFsizex->Fill(sizeX); - hFsizey->Fill(sizeY); - hFposXYD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(xG, yG); - hFchargeD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(charge); - hFsizexD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeX); - hFsizeyD[tTopo_->pxfSide(id) - 1][tTopo_->pxfDisk(id) - 1]->Fill(sizeY); - break; - } - } -} - -// -// -- Book Histograms -// -template -void SiPixelMonitorRecHitsSoA::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - iBook.cd(); - iBook.setCurrentFolder(topFolderName_); - - // clang-format off - //Global - hnHits = iBook.book1D("nHits", "RecHits per event;RecHits;#events", 200, 0, 5000); - hBFposZP = iBook.book2D("recHitsGlobalPosZP", "RecHits position Global;Z;#phi", 1000, -60, 60, 200,-3.2,3.2); - hBFposZR = iBook.book2D("recHitsGlobalPosZR", "RecHits position Global;Z;R", 1000, -60, 60, 200,-20,20); - //Barrel - hBposXY = iBook.book2D("recHitsBarrelPosXY", "RecHits position Barrel;X;Y", 200, -20, 20, 200,-20,20); - hBposZP = iBook.book2D("recHitsBarrelPosZP", "RecHits position Barrel;Z;#phi", 300, -30, 30, 200,-3.2,3.2); - hBcharge = iBook.book1D("recHitsBarrelCharge", "RecHits Charge Barrel;Charge;#events", 250, 0, 100000); - hBsizex = iBook.book1D("recHitsBarrelSizex", "RecHits SizeX Barrel;SizeX;#events", 50, 0, 50); - hBsizey = iBook.book1D("recHitsBarrelSizey", "RecHits SizeY Barrel;SizeY;#events", 50, 0, 50); - //Barrel Layer - for(unsigned int il=0;ilnumberOfLayers(PixelSubdetector::PixelBarrel);il++){ - hBposZPL[il] = iBook.book2D(Form("recHitsBLay%dPosZP",il+1), Form("RecHits position Barrel Layer%d;Z;#phi",il+1), 300, -30, 30, 200,-3.2,3.2); - hBchargeL[il] = iBook.book1D(Form("recHitsBLay%dCharge",il+1), Form("RecHits Charge Barrel Layer%d;Charge;#events",il+1), 250, 0, 100000); - hBsizexL[il] = iBook.book1D(Form("recHitsBLay%dSizex",il+1), Form("RecHits SizeX Barrel Layer%d;SizeX;#events",il+1), 50, 0, 50); - hBsizeyL[il] = iBook.book1D(Form("recHitsBLay%dSizey",il+1), Form("RecHits SizeY Barrel Layer%d;SizeY;#events",il+1), 50, 0, 50); - } - //Endcaps - hFposXY = iBook.book2D("recHitsEndcapsPosXY", "RecHits position Endcaps;X;Y", 200, -20, 20, 200,-20, 20); - hFposZP = iBook.book2D("recHitsEndcapsPosZP", "RecHits position Endcaps;Z;#phi", 600, -60, 60, 200,-3.2,3.2); - hFcharge = iBook.book1D("recHitsEndcapsCharge", "RecHits Charge Endcaps;Charge;#events", 250, 0, 100000); - hFsizex = iBook.book1D("recHitsEndcapsSizex", "RecHits SizeX Endcaps;SizeX;#events", 50, 0, 50); - hFsizey = iBook.book1D("recHitsEndcapsSizey", "RecHits SizeY Endcaps;SizeY;#events", 50, 0, 50); - //Endcaps Disk - for(int is=0;is<2;is++){ - int sign=is==0? -1:1; - for(unsigned int id=0;idnumberOfLayers(PixelSubdetector::PixelEndcap);id++){ - hFposXYD[is][id] = iBook.book2D(Form("recHitsFDisk%+dPosXY",id*sign+sign), Form("RecHits position Endcaps Disk%+d;X;Y",id*sign+sign), 200, -20, 20, 200,-20,20); - hFchargeD[is][id] = iBook.book1D(Form("recHitsFDisk%+dCharge",id*sign+sign), Form("RecHits Charge Endcaps Disk%+d;Charge;#events",id*sign+sign), 250, 0, 100000); - hFsizexD[is][id] = iBook.book1D(Form("recHitsFDisk%+dSizex",id*sign+sign), Form("RecHits SizeX Endcaps Disk%+d;SizeX;#events",id*sign+sign), 50, 0, 50); - hFsizeyD[is][id] = iBook.book1D(Form("recHitsFDisk%+dSizey",id*sign+sign), Form("RecHits SizeY Endcaps Disk%+d;SizeY;#events",id*sign+sign), 50, 0, 50); - } - } -} - -template -void SiPixelMonitorRecHitsSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelRecHitsSoA - edm::ParameterSetDescription desc; - desc.add("pixelHitsSrc", edm::InputTag("siPixelRecHitsPreSplittingSoA")); - desc.add("TopFolderName", "SiPixelHeterogeneous/PixelRecHitsSoA"); - descriptions.addWithDefaultLabel(desc); -} - -using SiPixelPhase1MonitorRecHitsSoA = SiPixelMonitorRecHitsSoA; -using SiPixelPhase2MonitorRecHitsSoA = SiPixelMonitorRecHitsSoA; -using SiPixelHIonPhase1MonitorRecHitsSoA = SiPixelMonitorRecHitsSoA; - -DEFINE_FWK_MODULE(SiPixelPhase1MonitorRecHitsSoA); -DEFINE_FWK_MODULE(SiPixelPhase2MonitorRecHitsSoA); -DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorRecHitsSoA); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoA.cc deleted file mode 100644 index f3ccb74bc3fea..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoA.cc +++ /dev/null @@ -1,201 +0,0 @@ -// -*- C++ -*- -// Package: SiPixelMonitorTrackSoA -// Class: SiPixelMonitorTrackSoA -// -/**\class SiPixelMonitorTrackSoA SiPixelMonitorTrackSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury -// -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "FWCore/Utilities/interface/InputTag.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -// for string manipulations -#include - -template -class SiPixelMonitorTrackSoA : public DQMEDAnalyzer { -public: - using PixelTrackHeterogeneous = TrackSoAHeterogeneousHost; - explicit SiPixelMonitorTrackSoA(const edm::ParameterSet&); - ~SiPixelMonitorTrackSoA() override = default; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - edm::EDGetTokenT tokenSoATrack_; - std::string topFolderName_; - bool useQualityCut_; - pixelTrack::Quality minQuality_; - MonitorElement* hnTracks; - MonitorElement* hnLooseAndAboveTracks; - MonitorElement* hnHits; - MonitorElement* hnHitsVsPhi; - MonitorElement* hnHitsVsEta; - MonitorElement* hnLayers; - MonitorElement* hnLayersVsPhi; - MonitorElement* hnLayersVsEta; - MonitorElement* hchi2; - MonitorElement* hChi2VsPhi; - MonitorElement* hChi2VsEta; - MonitorElement* hpt; - MonitorElement* hCurvature; - MonitorElement* heta; - MonitorElement* hphi; - MonitorElement* hz; - MonitorElement* htip; - MonitorElement* hquality; -}; - -// -// constructors -// - -template -SiPixelMonitorTrackSoA::SiPixelMonitorTrackSoA(const edm::ParameterSet& iConfig) { - tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); - topFolderName_ = iConfig.getParameter("topFolderName"); //"SiPixelHeterogeneous/PixelTrackSoA"; - useQualityCut_ = iConfig.getParameter("useQualityCut"); - minQuality_ = pixelTrack::qualityByName(iConfig.getParameter("minQuality")); -} - -// -// -- Analyze -// -template -void SiPixelMonitorTrackSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - const auto& tsoaHandle = iEvent.getHandle(tokenSoATrack_); - if (!tsoaHandle.isValid()) { - edm::LogWarning("SiPixelMonitorTrackSoA") << "No Track SoA found \n returning!" << std::endl; - return; - } - - using helper = TracksUtilities; - auto const& tsoa = *tsoaHandle.product(); - auto maxTracks = tsoa.view().metadata().size(); - auto const* quality = tsoa.view().quality(); - int32_t nTracks = 0; - int32_t nLooseAndAboveTracks = 0; - - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = helper::nHits(tsoa.const_view(), it); - auto nLayers = tsoa.view()[it].nLayers(); - if (nHits == 0) - break; // this is a guard - float pt = tsoa.view()[it].pt(); - if (!(pt > 0.)) - continue; - - // fill the quality for all tracks - pixelTrack::Quality qual = quality[it]; - hquality->Fill(int(qual)); - nTracks++; - - if (useQualityCut_ && quality[it] < minQuality_) - continue; - - // fill parameters only for quality >= loose - float chi2 = tsoa.view()[it].chi2(); - float phi = helper::phi(tsoa.const_view(), it); - float zip = helper::zip(tsoa.const_view(), it); - float eta = tsoa.view()[it].eta(); - float tip = helper::tip(tsoa.const_view(), it); - auto charge = helper::charge(tsoa.const_view(), it); - - hchi2->Fill(chi2); - hChi2VsPhi->Fill(phi, chi2); - hChi2VsEta->Fill(eta, chi2); - hnHits->Fill(nHits); - hnLayers->Fill(nLayers); - hnHitsVsPhi->Fill(phi, nHits); - hnHitsVsEta->Fill(eta, nHits); - hnLayersVsPhi->Fill(phi, nLayers); - hnLayersVsEta->Fill(eta, nLayers); - hpt->Fill(pt); - hCurvature->Fill(charge / pt); - heta->Fill(eta); - hphi->Fill(phi); - hz->Fill(zip); - htip->Fill(tip); - nLooseAndAboveTracks++; - } - hnTracks->Fill(nTracks); - hnLooseAndAboveTracks->Fill(nLooseAndAboveTracks); -} - -// -// -- Book Histograms -// -template -void SiPixelMonitorTrackSoA::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - iBook.cd(); - iBook.setCurrentFolder(topFolderName_); - - // clang-format off - std::string toRep = "Number of tracks"; - hnTracks = iBook.book1D("nTracks", fmt::sprintf(";%s per event;#events",toRep), 1001, -0.5, 2001.5); - hnLooseAndAboveTracks = iBook.book1D("nLooseAndAboveTracks", fmt::sprintf(";%s (quality #geq loose) per event;#events",toRep), 1001, -0.5, 2001.5); - - toRep = "Number of all RecHits per track (quality #geq loose)"; - hnHits = iBook.book1D("nRecHits", fmt::sprintf(";%s;#tracks",toRep), 15, -0.5, 14.5); - hnHitsVsPhi = iBook.bookProfile("nHitsPerTrackVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI,0., 15.); - hnHitsVsEta = iBook.bookProfile("nHitsPerTrackVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 15.); - - toRep = "Number of all layers per track (quality #geq loose)"; - hnLayers = iBook.book1D("nLayers", fmt::sprintf(";%s;#tracks",toRep), 15, -0.5, 14.5); - hnLayersVsPhi = iBook.bookProfile("nLayersPerTrackVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI,0., 15.); - hnLayersVsEta = iBook.bookProfile("nLayersPerTrackVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 15.); - - toRep = "Track (quality #geq loose) #chi^{2}/ndof"; - hchi2 = iBook.book1D("nChi2ndof", fmt::sprintf(";%s;#tracks",toRep), 40, 0., 20.); - hChi2VsPhi = iBook.bookProfile("nChi2ndofVsPhi", fmt::sprintf("%s vs track #phi;Track #phi;%s",toRep,toRep), 30, -M_PI, M_PI, 0., 20.); - hChi2VsEta = iBook.bookProfile("nChi2ndofVsEta", fmt::sprintf("%s vs track #eta;Track #eta;%s",toRep,toRep), 30, -3., 3., 0., 20.); - // clang-format on - - hpt = iBook.book1D("pt", ";Track (quality #geq loose) p_{T} [GeV];#tracks", 200, 0., 200.); - hCurvature = iBook.book1D("curvature", ";Track (quality #geq loose) q/p_{T} [GeV^{-1}];#tracks", 100, -3., 3.); - heta = iBook.book1D("eta", ";Track (quality #geq loose) #eta;#tracks", 30, -3., 3.); - hphi = iBook.book1D("phi", ";Track (quality #geq loose) #phi;#tracks", 30, -M_PI, M_PI); - hz = iBook.book1D("z", ";Track (quality #geq loose) z [cm];#tracks", 30, -30., 30.); - htip = iBook.book1D("tip", ";Track (quality #geq loose) TIP [cm];#tracks", 100, -0.5, 0.5); - hquality = iBook.book1D("quality", ";Track Quality;#tracks", 7, -0.5, 6.5); - uint i = 1; - for (const auto& q : pixelTrack::qualityName) { - hquality->setBinLabel(i, q, 1); - i++; - } -} - -template -void SiPixelMonitorTrackSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelTrackSoA - edm::ParameterSetDescription desc; - desc.add("pixelTrackSrc", edm::InputTag("pixelTracksSoA")); - desc.add("topFolderName", "SiPixelHeterogeneous/PixelTrackSoA"); - desc.add("useQualityCut", true); - desc.add("minQuality", "loose"); - descriptions.addWithDefaultLabel(desc); -} - -using SiPixelPhase1MonitorTrackSoA = SiPixelMonitorTrackSoA; -using SiPixelPhase2MonitorTrackSoA = SiPixelMonitorTrackSoA; -using SiPixelHIonPhase1MonitorTrackSoA = SiPixelMonitorTrackSoA; - -DEFINE_FWK_MODULE(SiPixelPhase1MonitorTrackSoA); -DEFINE_FWK_MODULE(SiPixelPhase2MonitorTrackSoA); -DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorTrackSoA); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoA.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoA.cc deleted file mode 100644 index 3d90da5c01c5a..0000000000000 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoA.cc +++ /dev/null @@ -1,132 +0,0 @@ -// -*- C++ -*- -///bookLayer -// Package: SiPixelMonitorVertexSoA -// Class: SiPixelMonitorVertexSoA -// -/**\class SiPixelMonitorVertexSoA SiPixelMonitorVertexSoA.cc -*/ -// -// Author: Suvankar Roy Chowdhury -// -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "DataFormats/Common/interface/Handle.h" -// DQM Histograming -#include "DQMServices/Core/interface/MonitorElement.h" -#include "DQMServices/Core/interface/DQMEDAnalyzer.h" -#include "DQMServices/Core/interface/DQMStore.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" - -class SiPixelMonitorVertexSoA : public DQMEDAnalyzer { -public: - using IndToEdm = std::vector; - explicit SiPixelMonitorVertexSoA(const edm::ParameterSet&); - ~SiPixelMonitorVertexSoA() override = default; - void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; - void analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) override; - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - edm::EDGetTokenT tokenSoAVertex_; - edm::EDGetTokenT tokenBeamSpot_; - std::string topFolderName_; - MonitorElement* hnVertex; - MonitorElement* hx; - MonitorElement* hy; - MonitorElement* hz; - MonitorElement* hchi2; - MonitorElement* hchi2oNdof; - MonitorElement* hptv2; - MonitorElement* hntrks; -}; - -// -// constructors -// - -SiPixelMonitorVertexSoA::SiPixelMonitorVertexSoA(const edm::ParameterSet& iConfig) { - tokenSoAVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); - tokenBeamSpot_ = consumes(iConfig.getParameter("beamSpotSrc")); - topFolderName_ = iConfig.getParameter("topFolderName"); -} - -// -// -- Analyze -// -void SiPixelMonitorVertexSoA::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { - const auto& vsoaHandle = iEvent.getHandle(tokenSoAVertex_); - if (!vsoaHandle.isValid()) { - edm::LogWarning("SiPixelMonitorVertexSoA") << "No Vertex SoA found \n returning!" << std::endl; - return; - } - - auto const& vsoa = *vsoaHandle; - int nVertices = vsoa.view().nvFinal(); - auto bsHandle = iEvent.getHandle(tokenBeamSpot_); - float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; - if (!bsHandle.isValid()) { - edm::LogWarning("SiPixelMonitorVertexSoA") << "No beamspot found. returning vertexes with (0,0,Z) "; - } else { - const reco::BeamSpot& bs = *bsHandle; - x0 = bs.x0(); - y0 = bs.y0(); - z0 = bs.z0(); - dxdz = bs.dxdz(); - dydz = bs.dydz(); - } - - for (int iv = 0; iv < nVertices; iv++) { - auto si = vsoa.view()[iv].sortInd(); - auto z = vsoa.view()[si].zv(); - auto x = x0 + dxdz * z; - auto y = y0 + dydz * z; - - z += z0; - hx->Fill(x); - hy->Fill(y); - hz->Fill(z); - auto ndof = vsoa.view()[si].ndof(); - hchi2->Fill(vsoa.view()[si].chi2()); - hchi2oNdof->Fill(vsoa.view()[si].chi2() / ndof); - hptv2->Fill(vsoa.view()[si].ptv2()); - hntrks->Fill(ndof + 1); - } - hnVertex->Fill(nVertices); -} - -// -// -- Book Histograms -// -void SiPixelMonitorVertexSoA::bookHistograms(DQMStore::IBooker& ibooker, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { - //std::string top_folder = ""// - ibooker.cd(); - ibooker.setCurrentFolder(topFolderName_); - hnVertex = ibooker.book1D("nVertex", ";# of Vertices;#entries", 101, -0.5, 100.5); - hx = ibooker.book1D("vx", ";Vertex x;#entries", 10, -5., 5.); - hy = ibooker.book1D("vy", ";Vertex y;#entries", 10, -5., 5.); - hz = ibooker.book1D("vz", ";Vertex z;#entries", 30, -30., 30); - hchi2 = ibooker.book1D("chi2", ";Vertex chi-squared;#entries", 40, 0., 20.); - hchi2oNdof = ibooker.book1D("chi2oNdof", ";Vertex chi-squared/Ndof;#entries", 40, 0., 20.); - hptv2 = ibooker.book1D("ptsq", ";Vertex #sum (p_{T})^{2};#entries", 200, 0., 200.); - hntrks = ibooker.book1D("ntrk", ";#tracks associated;#entries", 100, -0.5, 99.5); -} - -void SiPixelMonitorVertexSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // monitorpixelVertexSoA - edm::ParameterSetDescription desc; - desc.add("pixelVertexSrc", edm::InputTag("pixelVerticesSoA")); - desc.add("beamSpotSrc", edm::InputTag("offlineBeamSpot")); - desc.add("topFolderName", "SiPixelHeterogeneous/PixelVertexSoA"); - descriptions.addWithDefaultLabel(desc); -} - -DEFINE_FWK_MODULE(SiPixelMonitorVertexSoA); diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h deleted file mode 100644 index a97dfadea52c4..0000000000000 --- a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h -#define DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h - -#include -#include -#include - -namespace legacy { - - // The main purpose of this class is to deliver digi and cluster data - // from an EDProducer that transfers the data from GPU to host to an - // EDProducer that converts the SoA to legacy data products. The class - // is independent of any GPU technology, and in prunciple could be - // produced by host code, and be used for other purposes than - // conversion-to-legacy as well. - - class SiPixelDigisSoA { - public: - SiPixelDigisSoA() = default; - explicit SiPixelDigisSoA( - size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus) - : pdigi_(pdigi, pdigi + nDigis), - rawIdArr_(rawIdArr, rawIdArr + nDigis), - adc_(adc, adc + nDigis), - clus_(clus, clus + nDigis) {} - - ~SiPixelDigisSoA() = default; - - auto size() const { return pdigi_.size(); } - - uint32_t pdigi(size_t i) const { return pdigi_[i]; } - uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } - uint16_t adc(size_t i) const { return adc_[i]; } - int32_t clus(size_t i) const { return clus_[i]; } - - const std::vector& pdigiVector() const { return pdigi_; } - const std::vector& rawIdArrVector() const { return rawIdArr_; } - const std::vector& adcVector() const { return adc_; } - const std::vector& clusVector() const { return clus_; } - - private: - std::vector pdigi_; // packed digi (row, col, adc) of each pixel - std::vector rawIdArr_; // DetId of each pixel - std::vector adc_; // ADC of each pixel - std::vector clus_; // cluster id of each pixel - }; - -} // namespace legacy - -#endif // DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index be707668d0dfc..308b8a70d1a1c 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -1,15 +1,14 @@ -#ifndef SIPIXELDIGI_CLASSES_H -#define SIPIXELDIGI_CLASSES_H +#ifndef DataFormats_SiPixelDigi_src_classes_h +#define DataFormats_SiPixelDigi_src_classes_h #include +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelDigi/interface/PixelDigiCollection.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigiError.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" -#include "DataFormats/Common/interface/Wrapper.h" -#include "DataFormats/Common/interface/DetSetVector.h" -#include "DataFormats/Common/interface/DetSetVectorNew.h" -#endif // SIPIXELDIGI_CLASSES_H +#endif // DataFormats_SiPixelDigi_src_classes_h diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index 697b6c467d799..de7779a5c00ea 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -49,7 +49,4 @@ - - - diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 87123219d44e4..e9d203e8c0455 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,7 +1,4 @@ - - - diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc deleted file mode 100644 index 9e2bf3d5820e9..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ /dev/null @@ -1,126 +0,0 @@ -#include - -#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" -#include "DataFormats/Common/interface/DetSetVector.h" -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/DetId/interface/DetIdCollection.h" -#include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" -#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" -#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/ESWatcher.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" - -class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { -public: - explicit SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig); - ~SiPixelDigiErrorsFromSoA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; - - const edm::ESGetToken cablingToken_; - const edm::EDGetTokenT digiErrorSoAGetToken_; - const edm::EDPutTokenT> errorPutToken_; - const edm::EDPutTokenT tkErrorPutToken_; - const edm::EDPutTokenT userErrorPutToken_; - const edm::EDPutTokenT> disabledChannelPutToken_; - - edm::ESWatcher cablingWatcher_; - std::unique_ptr cabling_; - - const std::vector tkerrorlist_; - const std::vector usererrorlist_; - - const bool usePhase1_; -}; - -SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) - : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), - digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, - errorPutToken_{produces>()}, - tkErrorPutToken_{produces()}, - userErrorPutToken_{produces("UserErrorModules")}, - disabledChannelPutToken_{produces>()}, - tkerrorlist_(iConfig.getParameter>("ErrorList")), - usererrorlist_(iConfig.getParameter>("UserErrorList")), - usePhase1_(iConfig.getParameter("UsePhase1")) {} - -void SiPixelDigiErrorsFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigiErrorsSoA")); - // the configuration parameters here are named following those in SiPixelRawToDigi - desc.add("CablingMapLabel", "")->setComment("CablingMap label"); - desc.add("UsePhase1", false)->setComment("## Use phase1"); - desc.add>("ErrorList", std::vector{29}) - ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); - desc.add>("UserErrorList", std::vector{40}) - ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); - descriptions.addWithDefaultLabel(desc); -} - -void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - // pack errors into collection - - // initialize cabling map or update if necessary - if (cablingWatcher_.check(iSetup)) { - // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) - const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); - cabling_ = cablingMap->cablingTree(); - LogDebug("map version:") << cabling_->version(); - } - - const auto& digiErrors = iEvent.get(digiErrorSoAGetToken_); - - edm::DetSetVector errorcollection{}; - DetIdCollection tkerror_detidcollection{}; - DetIdCollection usererror_detidcollection{}; - edmNew::DetSetVector disabled_channelcollection{}; - - PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 - const PixelDataFormatter::Errors* formatterErrors = digiErrors.formatterErrors(); - assert(formatterErrors != nullptr); - auto errors = *formatterErrors; // make a copy - PixelDataFormatter::DetErrors nodeterrors; - - auto size = digiErrors.size(); - for (auto i = 0U; i < size; i++) { - SiPixelErrorCompact err = digiErrors.error(i); - if (err.errorType != 0) { - SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); - errors[err.rawId].push_back(error); - } - } - - formatter.unpackFEDErrors(errors, - tkerrorlist_, - usererrorlist_, - errorcollection, - tkerror_detidcollection, - usererror_detidcollection, - disabled_channelcollection, - nodeterrors); - - const uint32_t dummydetid = 0xffffffff; - edm::DetSet& errorDetSet = errorcollection.find_or_insert(dummydetid); - errorDetSet.data = nodeterrors; - - iEvent.emplace(errorPutToken_, std::move(errorcollection)); - iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); - iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); - iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); -} - -DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc deleted file mode 100644 index 554f1425cef59..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer { -public: - explicit SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig); - ~SiPixelDigiErrorsSoAFromCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; - - edm::EDGetTokenT> digiErrorGetToken_; - edm::EDPutTokenT digiErrorPutToken_; - - cms::cuda::host::unique_ptr data_; - cms::cuda::SimpleVector error_ = cms::cuda::make_SimpleVector(0, nullptr); - const SiPixelFormatterErrors* formatterErrors_ = nullptr; -}; - -SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiErrorGetToken_( - consumes>(iConfig.getParameter("src"))), - digiErrorPutToken_(produces()) {} - -void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("src", edm::InputTag("siPixelClustersCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - // Do the transfer in a CUDA stream parallel to the computation CUDA stream - cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; - const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); - formatterErrors_ = &(gpuDigiErrors.formatterErrors()); - - if (gpuDigiErrors.nErrorWords() == 0) - return; - - auto tmp = gpuDigiErrors.dataErrorToHostAsync(ctx.stream()); - error_ = tmp.first; - data_ = std::move(tmp.second); -} - -void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - // The following line copies the data from the pinned host memory to - // regular host memory. In principle that feels unnecessary (why not - // just use the pinned host memory?). There are a few arguments for - // doing it though - // - Now can release the pinned host memory back to the (caching) allocator - // * if we'd like to keep the pinned memory, we'd need to also - // keep the CUDA stream around as long as that, or allow pinned - // host memory to be allocated without a CUDA stream - // - What if a CPU algorithm would produce the same SoA? We can't - // use cudaMallocHost without a GPU... - iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_); - error_ = cms::cuda::make_SimpleVector(0, nullptr); - data_.reset(); - formatterErrors_ = nullptr; -} - -// define as framework plugin -DEFINE_FWK_MODULE(SiPixelDigiErrorsSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc deleted file mode 100644 index 67b1b519d4089..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ /dev/null @@ -1,73 +0,0 @@ -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" - -class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer { -public: - explicit SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig); - ~SiPixelDigisSoAFromCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; - - edm::EDGetTokenT> digiGetToken_; - edm::EDPutTokenT digiPutToken_; - - cms::cuda::PortableHostCollection digis_h_; - - int nDigis_; -}; - -SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiGetToken_(consumes>(iConfig.getParameter("src"))), - digiPutToken_(produces()) {} - -void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("src", edm::InputTag("siPixelClustersCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - // Do the transfer in a CUDA stream parallel to the computation CUDA stream - cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; - - const auto& digis_d = ctx.get(iEvent, digiGetToken_); - - nDigis_ = digis_d.nDigis(); - digis_h_ = cms::cuda::PortableHostCollection(digis_d.view().metadata().size(), ctx.stream()); - cudaCheck(cudaMemcpyAsync(digis_h_.buffer().get(), - digis_d.const_buffer().get(), - digis_d.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); -} - -void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - iEvent.emplace(digiPutToken_, - nDigis_, - digis_h_.view().pdigi(), - digis_h_.view().rawIdArr(), - digis_h_.view().adc(), - digis_h_.view().clus()); -} - -// define as framework plugin -DEFINE_FWK_MODULE(SiPixelDigisSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py index 05263e3e8bdf9..47b545fec5e37 100644 --- a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -3,11 +3,9 @@ from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis siPixelDigisTask = cms.Task( - # SwitchProducer wrapping the legacy pixel digis producer or an alias combining the pixel digis information converted from SoA siPixelDigis ) -# Phase 2 Tracker Modifier +# FIXME remove siPixelDigis until we have Phase 2 pixel digis from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker -# Remove siPixelDigis until we have phase2 pixel digis -phase2_tracker.toReplaceWith(siPixelDigisTask, cms.Task()) #FIXME +phase2_tracker.toReplaceWith(siPixelDigisTask, cms.Task()) diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h index c224483bda40a..7dd7e63b29385 100644 --- a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h @@ -2,7 +2,7 @@ #define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h /* This struct is an implementation detail of this package. - * It's in the interface directory because it needs to be shared by the legacy, CUDA, and Alpaka plugins. + * It's in the interface directory because it needs to be shared by the legacy and Alpaka plugins. */ struct SiPixelClusterThresholds { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 83bdae62636e0..57f19af2724dc 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,33 +1,34 @@ - - - + + - + + - + + + - - - - - - - - - - + + + + + + - - + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc deleted file mode 100644 index 0bfa989c92969..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ /dev/null @@ -1,228 +0,0 @@ -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "DataFormats/Common/interface/DetSetVector.h" -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/DetId/interface/DetId.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -// local include(s) -#include "PixelClusterizerBase.h" - -//#define GPU_DEBUG - -template -class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { -public: - explicit SiPixelDigisClustersFromSoAT(const edm::ParameterSet& iConfig); - ~SiPixelDigisClustersFromSoAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - const edm::ESGetToken topoToken_; - - edm::EDGetTokenT digiGetToken_; - - edm::EDPutTokenT> digiPutToken_; - edm::EDPutTokenT clusterPutToken_; - - const SiPixelClusterThresholds clusterThresholds_; // Cluster threshold in electrons - - const bool produceDigis_; - const bool storeDigis_; -}; - -template -SiPixelDigisClustersFromSoAT::SiPixelDigisClustersFromSoAT(const edm::ParameterSet& iConfig) - : topoToken_(esConsumes()), - digiGetToken_(consumes(iConfig.getParameter("src"))), - clusterPutToken_(produces()), - clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), - iConfig.getParameter("clusterThreshold_otherLayers")), - produceDigis_(iConfig.getParameter("produceDigis")), - storeDigis_(iConfig.getParameter("produceDigis") && iConfig.getParameter("storeDigis")) { - if (produceDigis_) - digiPutToken_ = produces>(); -} - -template -void SiPixelDigisClustersFromSoAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("src", edm::InputTag("siPixelDigisSoA")); - desc.add("clusterThreshold_layer1", gpuClustering::clusterThresholdLayerOne); - desc.add("clusterThreshold_otherLayers", gpuClustering::clusterThresholdOtherLayers); - desc.add("produceDigis", true); - desc.add("storeDigis", true); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - const auto& digis = iEvent.get(digiGetToken_); - const uint32_t nDigis = digis.size(); - const auto& ttopo = iSetup.getData(topoToken_); - constexpr auto maxModules = TrackerTraits::numberOfModules; - - std::unique_ptr> collection; - if (produceDigis_) - collection = std::make_unique>(); - if (storeDigis_) - collection->reserve(maxModules); - auto outputClusters = std::make_unique(); - outputClusters->reserve(maxModules, nDigis / 2); - - edm::DetSet* detDigis = nullptr; - uint32_t detId = 0; - for (uint32_t i = 0; i < nDigis; i++) { - // check for uninitialized digis - // this is set in RawToDigi_kernel in SiPixelRawToClusterGPUKernel.cu - if (digis.rawIdArr(i) == 0) - continue; - // check for noisy/dead pixels (electrons set to 0) - if (digis.adc(i) == 0) - continue; - - detId = digis.rawIdArr(i); - if (storeDigis_) { - detDigis = &collection->find_or_insert(detId); - if ((*detDigis).empty()) - (*detDigis).data.reserve(64); // avoid the first relocations - } - break; - } - - int32_t nclus = -1; - PixelClusterizerBase::AccretionCluster aclusters[TrackerTraits::maxNumClustersPerModules]; -#ifdef EDM_ML_DEBUG - auto totClustersFilled = 0; -#endif - - auto fillClusters = [&](uint32_t detId) { - if (nclus < 0) - return; // this in reality should never happen - edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); - auto layer = (DetId(detId).subdetId() == 1) ? ttopo.pxbLayer(detId) : 0; - auto clusterThreshold = clusterThresholds_.getThresholdForLayerOnCondition(layer == 1); - for (int32_t ic = 0; ic < nclus + 1; ++ic) { - auto const& acluster = aclusters[ic]; - // in any case we cannot go out of sync with gpu... - if (acluster.charge < clusterThreshold) - edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " - << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic - << " size/charge " << acluster.isize << '/' << acluster.charge; - // sort by row (x) - spc.emplace_back(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); - aclusters[ic].clear(); -#ifdef EDM_ML_DEBUG - ++totClustersFilled; - const auto& cluster{spc.back()}; - LogDebug("SiPixelDigisClustersFromSoA") - << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size(); -#endif - std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { - return cl1.minPixelRow() < cl2.minPixelRow(); - }); - } - nclus = -1; - // sort by row (x) - std::sort_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { - return cl1.minPixelRow() < cl2.minPixelRow(); - }); - if (spc.empty()) - spc.abort(); - }; - -#ifdef GPU_DEBUG - std::cout << "Dumping all digis. nDigis = " << nDigis << std::endl; -#endif - - for (uint32_t i = 0; i < nDigis; i++) { - // check for uninitialized digis - if (digis.rawIdArr(i) == 0) - continue; - // check for noisy/dead pixels (electrons set to 0) - if (digis.adc(i) == 0) - continue; - if (digis.clus(i) > 9000) - continue; // not in cluster; TODO add an assert for the size -#ifdef EDM_ML_DEBUG - assert(digis.rawIdArr(i) > 109999); -#endif - if (detId != digis.rawIdArr(i)) { -#ifdef GPU_DEBUG - std::cout << ">> Closed module --" << detId << "; nclus = " << nclus << std::endl; -#endif - // new module - fillClusters(detId); -#ifdef EDM_ML_DEBUG - assert(nclus == -1); -#endif - detId = digis.rawIdArr(i); - if (storeDigis_) { - detDigis = &collection->find_or_insert(detId); - if ((*detDigis).empty()) - (*detDigis).data.reserve(64); // avoid the first relocations - else { - edm::LogWarning("SiPixelDigisClustersFromSoA") - << "Problem det present twice in input! " << (*detDigis).detId(); - } - } - } - PixelDigi dig(digis.pdigi(i)); - -#ifdef GPU_DEBUG - std::cout << i << ";" << digis.rawIdArr(i) << ";" << digis.clus(i) << ";" << digis.pdigi(i) << ";" << digis.adc(i) - << ";" << dig.row() << ";" << dig.column() << std::endl; -#endif - - if (storeDigis_) - (*detDigis).data.emplace_back(dig); - // fill clusters -#ifdef EDM_ML_DEBUG - assert(digis.clus(i) >= 0); - assert(digis.clus(i) < static_cast(TrackerTraits::maxNumClustersPerModules)); -#endif - nclus = std::max(digis.clus(i), nclus); - auto row = dig.row(); - auto col = dig.column(); - SiPixelCluster::PixelPos pix(row, col); - aclusters[digis.clus(i)].add(pix, digis.adc(i)); - } - - // fill final clusters - if (detId > 0) - fillClusters(detId); - -#ifdef EDM_ML_DEBUG - LogDebug("SiPixelDigisClustersFromSoA") << "filled " << totClustersFilled << " clusters"; -#endif - - if (produceDigis_) - iEvent.put(digiPutToken_, std::move(collection)); - iEvent.put(clusterPutToken_, std::move(outputClusters)); -} - -using SiPixelDigisClustersFromSoAPhase1 = SiPixelDigisClustersFromSoAT; -DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAPhase1); -using SiPixelDigisClustersFromSoAPhase2 = SiPixelDigisClustersFromSoAT; -DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAPhase2); -using SiPixelDigisClustersFromSoAHIonPhase1 = SiPixelDigisClustersFromSoAT; -DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc deleted file mode 100644 index e270d31515842..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc +++ /dev/null @@ -1,163 +0,0 @@ -// C++ includes -#include -#include -#include - -// CMSSW includes -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" -#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -// local includes -#include "SiPixelRawToClusterGPUKernel.h" - -class SiPixelPhase2DigiToClusterCUDA : public edm::stream::EDProducer { -public: - explicit SiPixelPhase2DigiToClusterCUDA(const edm::ParameterSet& iConfig); - ~SiPixelPhase2DigiToClusterCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - using GPUAlgo = pixelgpudetails::SiPixelRawToClusterGPUKernel; - -private: - void acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; - - const edm::ESGetToken geomToken_; - const edm::EDGetTokenT> pixelDigiToken_; - - edm::EDPutTokenT> digiPutToken_; - edm::EDPutTokenT> digiErrorPutToken_; - edm::EDPutTokenT> clusterPutToken_; - - cms::cuda::ContextState ctxState_; - - GPUAlgo gpuAlgo_; - - const bool includeErrors_; - const SiPixelClusterThresholds clusterThresholds_; - uint32_t nDigis_; -}; - -SiPixelPhase2DigiToClusterCUDA::SiPixelPhase2DigiToClusterCUDA(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), - pixelDigiToken_(consumes>(iConfig.getParameter("InputDigis"))), - digiPutToken_(produces>()), - clusterPutToken_(produces>()), - includeErrors_(iConfig.getParameter("IncludeErrors")), - clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), - iConfig.getParameter("clusterThreshold_otherLayers"), - static_cast(iConfig.getParameter("ElectronPerADCGain")), - static_cast(iConfig.getParameter("Phase2ReadoutMode")), - static_cast(iConfig.getParameter("Phase2DigiBaseline")), - static_cast(iConfig.getParameter("Phase2KinkADC"))} { - if (includeErrors_) { - digiErrorPutToken_ = produces>(); - } -} - -void SiPixelPhase2DigiToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("IncludeErrors", true); - desc.add("clusterThreshold_layer1", 4000); - desc.add("clusterThreshold_otherLayers", 4000); - desc.add("ElectronPerADCGain", 1500); - desc.add("Phase2ReadoutMode", 3); - desc.add("Phase2DigiBaseline", 1000); - desc.add("Phase2KinkADC", 8); - desc.add("InputDigis", edm::InputTag("simSiPixelDigis:Pixel")); - descriptions.addWithDefaultLabel(desc); -} - -void SiPixelPhase2DigiToClusterCUDA::acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; - - auto const& input = iEvent.get(pixelDigiToken_); - - const TrackerGeometry* geom_ = &iSetup.getData(geomToken_); - - nDigis_ = 0; - - auto xDigis = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - auto yDigis = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - auto adcDigis = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - auto moduleIds = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - auto packedData = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - auto rawIds = cms::cuda::make_host_unique(gpuClustering::maxNumDigis, ctx.stream()); - - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { - unsigned int detid = DSViter->detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); - auto const gind = genericDet->index(); - for (auto const& px : *DSViter) { - moduleIds[nDigis_] = uint16_t(gind); - - xDigis[nDigis_] = uint16_t(px.row()); - yDigis[nDigis_] = uint16_t(px.column()); - adcDigis[nDigis_] = uint16_t(px.adc()); - - packedData[nDigis_] = uint32_t(px.packedData()); - - rawIds[nDigis_] = uint32_t(detid); - - nDigis_++; - } - } - - if (nDigis_ == 0) - return; - - gpuAlgo_.makePhase2ClustersAsync(clusterThresholds_, - moduleIds.get(), - xDigis.get(), - yDigis.get(), - adcDigis.get(), - packedData.get(), - rawIds.get(), - nDigis_, - ctx.stream()); -} - -void SiPixelPhase2DigiToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - cms::cuda::ScopedContextProduce ctx{ctxState_}; - - if (nDigis_ == 0) { - ctx.emplace(iEvent, digiPutToken_, nDigis_, ctx.stream()); - ctx.emplace(iEvent, clusterPutToken_, pixelTopology::Phase2::numberOfModules, ctx.stream()); - if (includeErrors_) { - ctx.emplace(iEvent, digiErrorPutToken_, SiPixelDigiErrorsCUDA{}); - } - return; - } - - auto tmp = gpuAlgo_.getResults(); - ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); - ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second)); - if (includeErrors_) { - ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors()); - } -} - -// define as framework plugin -#include "FWCore/Framework/interface/MakerMacros.h" -DEFINE_FWK_MODULE(SiPixelPhase2DigiToClusterCUDA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc deleted file mode 100644 index 0a763793d35fd..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ /dev/null @@ -1,315 +0,0 @@ -// C++ includes -#include -#include -#include - -// CMSSW includes -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" -#include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/FEDRawData/interface/FEDRawData.h" -#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" -#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" -#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/ESWatcher.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" -#include "RecoTracker/Record/interface/CkfComponentsRecord.h" - -// local includes -#include "SiPixelRawToClusterGPUKernel.h" - -template -class SiPixelRawToClusterCUDAT : public edm::stream::EDProducer { -public: - explicit SiPixelRawToClusterCUDAT(const edm::ParameterSet& iConfig); - ~SiPixelRawToClusterCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - using GPUAlgo = pixelgpudetails::SiPixelRawToClusterGPUKernel; - -private: - void acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; - - edm::EDGetTokenT rawGetToken_; - - edm::EDPutTokenT> digiPutToken_; - edm::EDPutTokenT> digiErrorPutToken_; - edm::EDPutTokenT> clusterPutToken_; - - cms::cuda::ContextState ctxState_; - - edm::ESWatcher recordWatcher_; - edm::ESGetToken gpuMapToken_; - edm::ESGetToken gainsToken_; - edm::ESGetToken cablingMapToken_; - - std::unique_ptr cabling_; - std::vector fedIds_; - const SiPixelFedCablingMap* cablingMap_ = nullptr; - std::unique_ptr regions_; - - GPUAlgo gpuAlgo_; - PixelDataFormatter::Errors errors_; - - const bool includeErrors_; - const bool useQuality_; - uint32_t nDigis_; - const SiPixelClusterThresholds clusterThresholds_; -}; - -template -SiPixelRawToClusterCUDAT::SiPixelRawToClusterCUDAT(const edm::ParameterSet& iConfig) - : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), - digiPutToken_(produces>()), - clusterPutToken_(produces>()), - gpuMapToken_(esConsumes()), - gainsToken_(esConsumes()), - cablingMapToken_(esConsumes( - edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), - includeErrors_(iConfig.getParameter("IncludeErrors")), - useQuality_(iConfig.getParameter("UseQualityInfo")), - clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), - iConfig.getParameter("clusterThreshold_otherLayers"), - static_cast(iConfig.getParameter("VCaltoElectronGain")), - static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), - static_cast(iConfig.getParameter("VCaltoElectronOffset")), - static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { - if (includeErrors_) { - digiErrorPutToken_ = produces>(); - } - - // regions - if (!iConfig.getParameter("Regions").getParameterNames().empty()) { - regions_ = std::make_unique(iConfig, consumesCollector()); - } -} - -template -void SiPixelRawToClusterCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("IncludeErrors", true); - desc.add("UseQualityInfo", false); - // Note: this parameter is obsolete: it is ignored and will have no effect. - // It is kept to avoid breaking older configurations, and will not be printed in the generated cfi.py file. - desc.addOptionalNode(edm::ParameterDescription("MaxFEDWords", 0, true), false) - ->setComment("This parameter is obsolete and will be ignored."); - //Clustering Thresholds - desc.add("clusterThreshold_layer1", gpuClustering::clusterThresholdLayerOne); - desc.add("clusterThreshold_otherLayers", gpuClustering::clusterThresholdOtherLayers); - desc.add("VCaltoElectronGain", 47.f); - desc.add("VCaltoElectronGain_L1", 50.f); - desc.add("VCaltoElectronOffset", -60.f); - desc.add("VCaltoElectronOffset_L1", -670.f); - desc.add("InputLabel", edm::InputTag("rawDataCollector")); - { - edm::ParameterSetDescription psd0; - psd0.addOptional>("inputs"); - psd0.addOptional>("deltaPhi"); - psd0.addOptional>("maxZ"); - psd0.addOptional("beamSpot"); - desc.add("Regions", psd0) - ->setComment("## Empty Regions PSet means complete unpacking"); - } - desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelRawToClusterCUDAT::acquire(const edm::Event& iEvent, - const edm::EventSetup& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; - - auto hgpuMap = iSetup.getHandle(gpuMapToken_); - if (hgpuMap->hasQuality() != useQuality_) { - throw cms::Exception("LogicError") - << "UseQuality of the module (" << useQuality_ - << ") differs the one from SiPixelROCsStatusAndMappingWrapper. Please fix your configuration."; - } - // get the GPU product already here so that the async transfer can begin - const auto* gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); - - auto hgains = iSetup.getHandle(gainsToken_); - // get the GPU product already here so that the async transfer can begin - const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); - - cms::cuda::device::unique_ptr modulesToUnpackRegional; - const unsigned char* gpuModulesToUnpack; - - if (regions_) { - regions_->run(iEvent, iSetup); - LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); - LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() << " " - << regions_->nForwardModules() << " " << regions_->nModules(); - modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), ctx.stream()); - gpuModulesToUnpack = modulesToUnpackRegional.get(); - } else { - gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(ctx.stream()); - } - - // initialize cabling map or update if necessary - if (recordWatcher_.check(iSetup)) { - // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) - auto cablingMap = iSetup.getTransientHandle(cablingMapToken_); - cablingMap_ = cablingMap.product(); - fedIds_ = cablingMap->fedIds(); - cabling_ = cablingMap->cablingTree(); - LogDebug("map version:") << cabling_->version(); - } - - const auto& buffers = iEvent.get(rawGetToken_); - - errors_.clear(); - - // GPU specific: Data extraction for RawToDigi GPU - unsigned int wordCounter = 0; - unsigned int fedCounter = 0; - bool errorsInEvent = false; - - std::vector index(fedIds_.size(), 0); - std::vector start(fedIds_.size(), nullptr); - std::vector words(fedIds_.size(), 0); - - // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() - ErrorChecker errorcheck; - for (uint32_t i = 0; i < fedIds_.size(); ++i) { - const int fedId = fedIds_[i]; - if (regions_ && !regions_->mayUnpackFED(fedId)) - continue; - - // for GPU - // first 150 index stores the fedId and next 150 will store the - // start index of word in that fed - assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); - fedCounter++; - - // get event data for this fed - const FEDRawData& rawData = buffers.FEDData(fedId); - - // GPU specific - int nWords = rawData.size() / sizeof(cms_uint64_t); - if (nWords == 0) { - continue; - } - - // check CRC bit - const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); - if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { - continue; - } - - // check headers - const cms_uint64_t* header = reinterpret_cast(rawData.data()); - header--; - bool moreHeaders = true; - while (moreHeaders) { - header++; - bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); - moreHeaders = headerStatus; - } - - // check trailers - bool moreTrailers = true; - trailer++; - while (moreTrailers) { - trailer--; - bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); - moreTrailers = trailerStatus; - } - - const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); - const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); - - assert(0 == (ew - bw) % 2); - index[i] = wordCounter; - start[i] = bw; - words[i] = (ew - bw); - wordCounter += (ew - bw); - - } // end of for loop - - nDigis_ = wordCounter; - - if (nDigis_ == 0) - return; - - // copy the FED data to a single cpu buffer - typename GPUAlgo::WordFedAppender wordFedAppender(nDigis_, ctx.stream()); - for (uint32_t i = 0; i < fedIds_.size(); ++i) { - wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]); - } - - gpuAlgo_.makePhase1ClustersAsync(clusterThresholds_, - gpuMap, - gpuModulesToUnpack, - gpuGains, - wordFedAppender, - std::move(errors_), - wordCounter, - fedCounter, - useQuality_, - includeErrors_, - edm::MessageDrop::instance()->debugEnabled, - ctx.stream()); -} - -template -void SiPixelRawToClusterCUDAT::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - cms::cuda::ScopedContextProduce ctx{ctxState_}; - - if (nDigis_ == 0) { - // Cannot use the default constructor here, as it would not allocate memory. - // In the case of no digis, clusters_d are not being instantiated, but are - // still used downstream to initialize TrackingRecHitSoADevice. If there - // are no valid pointers to clusters' Collection columns, instantiation - // of TrackingRecHits fail. Example: workflow 11604.0 - SiPixelDigisCUDA digis_d = SiPixelDigisCUDA(nDigis_, ctx.stream()); - SiPixelClustersCUDA clusters_d = SiPixelClustersCUDA(pixelTopology::Phase1::numberOfModules, ctx.stream()); - ctx.emplace(iEvent, digiPutToken_, std::move(digis_d)); - ctx.emplace(iEvent, clusterPutToken_, std::move(clusters_d)); - if (includeErrors_) { - ctx.emplace(iEvent, digiErrorPutToken_, SiPixelDigiErrorsCUDA{}); - } - return; - } - - auto tmp = gpuAlgo_.getResults(); - ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); - ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second)); - if (includeErrors_) { - ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors()); - } -} - -// define as framework plugin -using SiPixelRawToClusterCUDAPhase1 = SiPixelRawToClusterCUDAT; -DEFINE_FWK_MODULE(SiPixelRawToClusterCUDAPhase1); -using SiPixelRawToClusterCUDAHIonPhase1 = SiPixelRawToClusterCUDAT; -DEFINE_FWK_MODULE(SiPixelRawToClusterCUDAHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu deleted file mode 100644 index 1e2e3ad235b79..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ /dev/null @@ -1,779 +0,0 @@ -/* Sushil Dubey, Shashi Dugad, TIFR, July 2017 - * - * File Name: RawToClusterGPU.cu - * Description: It converts Raw data into Digi Format on GPU - * Finaly the Output of RawToDigi data is given to pixelClusterizer -**/ - -// C++ includes -#include -#include -#include -#include -#include -#include -#include - -// CUDA includes -#include - -// CMSSW includes -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" -#include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -// local includes -#include "SiPixelRawToClusterGPUKernel.h" -#include "gpuCalibPixel.h" -#include "gpuClusterChargeCut.h" -#include "gpuClustering.h" - -//#define GPU_DEBUG - -namespace pixelgpudetails { - - __device__ bool isBarrel(uint32_t rawId) { - return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask)); - } - - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelROCsStatusAndMapping *cablingMap, - uint8_t fed, - uint32_t link, - uint32_t roc) { - uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; - pixelgpudetails::DetIdGPU detId = { - cablingMap->rawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; - return detId; - } - - //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html - //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 - // Convert local pixel to pixelgpudetails::global pixel - __device__ pixelgpudetails::Pixel frameConversion( - bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { - int slopeRow = 0, slopeCol = 0; - int rowOffset = 0, colOffset = 0; - - if (bpix) { - if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; - } else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; - colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; - } // if roc - } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 - if (rocIdInDetUnit < 8) { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; - colOffset = rocIdInDetUnit * pixelgpudetails::numColsInRoc; - } else { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (16 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; - } - } - - } else { // fpix - if (side == -1) { // pannel 1 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; - } else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; - colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; - } - } else { // pannel 2 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; - } else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; - colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; - } - - } // side - } - - uint32_t gRow = rowOffset + slopeRow * local.row; - uint32_t gCol = colOffset + slopeCol * local.col; - // inside frameConversion row: gRow, column: gCol - pixelgpudetails::Pixel global = {gRow, gCol}; - return global; - } - - // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - template - __device__ uint8_t conversionError(uint8_t fedId, uint8_t status) { - uint8_t errorType = 0; - - switch (status) { - case (1): { - if constexpr (debug) - printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); - errorType = 35; - break; - } - case (2): { - if constexpr (debug) - printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); - errorType = 36; - break; - } - case (3): { - if constexpr (debug) - printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); - errorType = 37; - break; - } - case (4): { - if constexpr (debug) - printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); - errorType = 38; - break; - } - default: - if constexpr (debug) - printf("Cabling check returned unexpected result, status = %i\n", status); - }; - - return errorType; - } - - __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { - /// row and column in ROC representation - return ((rocRow < pixelgpudetails::numRowsInRoc) & (rocCol < pixelgpudetails::numColsInRoc)); - } - - __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - - // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - template - __device__ uint8_t - checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelROCsStatusAndMapping *cablingMap) { - uint8_t errorType = (errorWord >> sipixelconstants::ROC_shift) & sipixelconstants::ERROR_mask; - if (errorType < 25) - return 0; - bool errorFound = false; - - switch (errorType) { - case (25): { - errorFound = true; - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + 1; - if (index > 1 && index <= cablingMap->size) { - if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) - errorFound = false; - } - if constexpr (debug) - if (errorFound) - printf("Invalid ROC = 25 found (errorType = 25)\n"); - break; - } - case (26): { - if constexpr (debug) - printf("Gap word found (errorType = 26)\n"); - break; - } - case (27): { - if constexpr (debug) - printf("Dummy word found (errorType = 27)\n"); - break; - } - case (28): { - if constexpr (debug) - printf("Error fifo nearly full (errorType = 28)\n"); - errorFound = true; - break; - } - case (29): { - if constexpr (debug) - printf("Timeout on a channel (errorType = 29)\n"); - if (!((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask)) { - if constexpr (debug) - printf("...2nd errorType=29 error, skip\n"); - break; - } - errorFound = true; - break; - } - case (30): { - if constexpr (debug) - printf("TBM error trailer (errorType = 30)\n"); - int stateMatch_bits = 4; - int stateMatch_shift = 8; - uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits); - int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask; - if (stateMatch != 1 && stateMatch != 8) { - if constexpr (debug) - printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); - break; - } - if (stateMatch == 1) - errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 - errorFound = true; - break; - } - case (31): { - if constexpr (debug) - printf("Event number error (errorType = 31)\n"); - errorFound = true; - break; - } - default: - errorFound = false; - }; - - return errorFound ? errorType : 0; - } - - // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - template - __device__ uint32_t - getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelROCsStatusAndMapping *cablingMap) { - uint32_t rID = 0xffffffff; - - switch (errorType) { - case 25: - case 29: - case 30: - case 31: - case 36: - case 40: { - uint32_t roc = 1; - uint32_t link = sipixelconstants::getLink(errWord); - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; - if (rID_temp != gpuClustering::invalidModuleId) - rID = rID_temp; - break; - } - case 37: - case 38: { - uint32_t roc = sipixelconstants::getROC(errWord); - uint32_t link = sipixelconstants::getLink(errWord); - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; - if (rID_temp != gpuClustering::invalidModuleId) - rID = rID_temp; - break; - } - default: - break; - }; - - return rID; - } - - // Kernel to perform Raw to Digi conversion - template - __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping *cablingMap, - const unsigned char *modToUnp, - const uint32_t wordCounter, - const uint32_t *word, - const uint8_t *fedIds, - SiPixelDigisSoA::View digisView, - cms::cuda::SimpleVector *err, - bool useQualityInfo, - bool includeErrors) { - //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); - - int32_t first = threadIdx.x + blockIdx.x * blockDim.x; - for (int32_t iloop = first, nend = wordCounter; iloop < nend; iloop += blockDim.x * gridDim.x) { - auto gIndex = iloop; - auto dvgi = digisView[gIndex]; - dvgi.xx() = 0; - dvgi.yy() = 0; - dvgi.adc() = 0; - bool skipROC = false; - - uint8_t fedId = fedIds[gIndex / 2]; // +1200; - - // initialize (too many coninue below) - dvgi.pdigi() = 0; - dvgi.rawIdArr() = 0; - dvgi.moduleId() = gpuClustering::invalidModuleId; - - uint32_t ww = word[gIndex]; // Array containing 32 bit raw data - if (ww == 0) { - // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization - continue; - } - - uint32_t link = sipixelconstants::getLink(ww); // Extract link - uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link - - uint8_t errorType = checkROC(ww, fedId, link, cablingMap); - skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); - if (includeErrors and skipROC) { - uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap); - if (rID != 0xffffffff) // store errors only for valid DetIds - err->push_back(SiPixelErrorCompact{rID, ww, errorType, fedId}); - continue; - } - - // check for spurious channels - if (roc > MAX_ROC or link > MAX_LINK) { - uint32_t rawId = getRawId(cablingMap, fedId, link, 1).rawId; - if constexpr (debug) { - printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex); - } - if (roc > MAX_ROC and roc < 25) { - uint8_t error = conversionError(fedId, 2); - err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); - } - continue; - } - - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; - if (useQualityInfo) { - skipROC = cablingMap->badRocs[index]; - if (skipROC) - continue; - } - skipROC = modToUnp[index]; - if (skipROC) - continue; - - pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); - uint32_t rawId = detId.rawId; - uint32_t layer = 0; - int side = 0, panel = 0, module = 0; - bool barrel = isBarrel(rawId); - if (barrel) { - layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; - module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; - side = (module < 5) ? -1 : 1; - } else { - // endcap ids - layer = 0; - panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; - side = (panel == 1) ? -1 : 1; - } - - // ***special case of layer to 1 be handled here - pixelgpudetails::Pixel localPix; - if (layer == 1) { - uint32_t col = sipixelconstants::getCol(ww); - uint32_t row = sipixelconstants::getRow(ww); - localPix.row = row; - localPix.col = col; - if (includeErrors) { - if (not rocRowColIsValid(row, col)) { - uint8_t error = conversionError(fedId, 3); //use the device function and fill the arrays - err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); - if constexpr (debug) - printf("BPIX1 Error status: %i\n", error); - continue; - } - } - } else { - // ***conversion rules for dcol and pxid - uint32_t dcol = sipixelconstants::getDCol(ww); - uint32_t pxid = sipixelconstants::getPxId(ww); - uint32_t row = pixelgpudetails::numRowsInRoc - pxid / 2; - uint32_t col = dcol * 2 + pxid % 2; - localPix.row = row; - localPix.col = col; - if (includeErrors and not dcolIsValid(dcol, pxid)) { - uint8_t error = conversionError(fedId, 3); - err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); - if constexpr (debug) - printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); - continue; - } - } - - pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix); - dvgi.xx() = globalPix.row; // origin shifting by 1 0-159 - dvgi.yy() = globalPix.col; // origin shifting by 1 0-415 - dvgi.adc() = sipixelconstants::getADC(ww); - dvgi.pdigi() = pixelgpudetails::pack(globalPix.row, globalPix.col, dvgi.adc()); - dvgi.moduleId() = detId.moduleId; - dvgi.rawIdArr() = rawId; - } // end of loop (gIndex < end) - - } // end of Raw to Digi kernel - - template - __global__ void fillHitsModuleStart(uint32_t const *__restrict__ clusInModule, - uint32_t *__restrict__ moduleStart, - uint32_t const *__restrict__ nModules, - uint32_t *__restrict__ nModules_Clusters) { - constexpr int nMaxModules = TrackerTraits::numberOfModules; - constexpr int startBPIX2 = TrackerTraits::layerStart[1]; - - constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; - - assert(startBPIX2 < nMaxModules); - assert(nMaxModules < 4096); // easy to extend at least till 32*1024 - assert(nMaxModules > 1024); - - assert(1 == gridDim.x); - assert(0 == blockIdx.x); - - int first = threadIdx.x; - - // limit to MaxHitsInModule; - for (int i = first, iend = nMaxModules; i < iend; i += blockDim.x) { - moduleStart[i + 1] = std::min(maxHitsInModule, clusInModule[i]); - } - - constexpr bool isPhase2 = std::is_base_of::value; - __shared__ uint32_t ws[32]; - cms::cuda::blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws); - constexpr int lastModules = isPhase2 ? 1024 : nMaxModules - 1024; - cms::cuda::blockPrefixScan(moduleStart + 1024 + 1, moduleStart + 1024 + 1, lastModules, ws); - - if constexpr (isPhase2) { - cms::cuda::blockPrefixScan(moduleStart + 2048 + 1, moduleStart + 2048 + 1, 1024, ws); - cms::cuda::blockPrefixScan(moduleStart + 3072 + 1, moduleStart + 3072 + 1, nMaxModules - 3072, ws); - } - - for (int i = first + 1025, iend = isPhase2 ? 2049 : nMaxModules + 1; i < iend; i += blockDim.x) { - moduleStart[i] += moduleStart[1024]; - } - __syncthreads(); - - if constexpr (isPhase2) { - for (int i = first + 2049, iend = 3073; i < iend; i += blockDim.x) { - moduleStart[i] += moduleStart[2048]; - } - __syncthreads(); - for (int i = first + 3073, iend = nMaxModules + 1; i < iend; i += blockDim.x) { - moduleStart[i] += moduleStart[3072]; - } - __syncthreads(); - } - - if (threadIdx.x == 0) { - // copy the number of modules - nModules_Clusters[0] = *nModules; - // last element holds the number of all clusters - nModules_Clusters[1] = moduleStart[nMaxModules]; - // element 96 is the start of BPIX2 (i.e. the number of clusters in BPIX1) - nModules_Clusters[2] = moduleStart[startBPIX2]; - } - -#ifdef GPU_DEBUG - uint16_t maxH = isPhase2 ? 3027 : 1024; - assert(0 == moduleStart[0]); - auto c0 = std::min(maxHitsInModule, clusInModule[0]); - assert(c0 == moduleStart[1]); - assert(moduleStart[maxH] >= moduleStart[maxH - 1]); - assert(moduleStart[maxH + 1] >= moduleStart[maxH]); - assert(moduleStart[nMaxModules] >= moduleStart[maxH + 1]); - - constexpr int startFP1 = TrackerTraits::numberOfModulesInBarrel; - constexpr int startLastFwd = TrackerTraits::layerStart[TrackerTraits::numberOfLayers]; - for (int i = first, iend = nMaxModules + 1; i < iend; i += blockDim.x) { - if (0 != i) - assert(moduleStart[i] >= moduleStart[i - i]); - // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] - // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] - if (i == startBPIX2 || i == startFP1 || i == startLastFwd || i == nMaxModules) - printf("moduleStart %d %d\n", i, moduleStart[i]); - } - -#endif - } - - // Interface to outside - template - void SiPixelRawToClusterGPUKernel::makePhase1ClustersAsync( - const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping *cablingMap, - const unsigned char *modToUnp, - const SiPixelGainForHLTonGPU *gains, - const WordFedAppender &wordFed, - SiPixelFormatterErrors &&errors, - const uint32_t wordCounter, - const uint32_t fedCounter, - bool useQualityInfo, - bool includeErrors, - bool debug, - cudaStream_t stream) { - // we're not opting for calling this function in case of early events - assert(wordCounter != 0); - nDigis = wordCounter; - -#ifdef GPU_DEBUG - std::cout << "decoding " << wordCounter << " digis." << std::endl; -#endif - - // since wordCounter != 0 we're not allocating 0 bytes, - // digis_d = SiPixelDigisCUDA(wordCounter, stream); - digis_d = SiPixelDigisCUDA(size_t(wordCounter), stream); - if (includeErrors) { - digiErrors_d = SiPixelDigiErrorsCUDA(wordCounter, std::move(errors), stream); - } - clusters_d = SiPixelClustersCUDA(TrackerTraits::numberOfModules, stream); - - // Begin Raw2Digi block - { - const int threadsPerBlock = 512; - const int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; // fill it all - - assert(0 == wordCounter % 2); - // wordCounter is the total no of words in each event to be trasfered on device - auto word_d = cms::cuda::make_device_unique(wordCounter, stream); - auto fedId_d = cms::cuda::make_device_unique(wordCounter, stream); - - cudaCheck( - cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync( - fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream)); - - // Launch rawToDigi kernel - if (debug) - RawToDigi_kernel<<>>( // - cablingMap, - modToUnp, - wordCounter, - word_d.get(), - fedId_d.get(), - digis_d.view(), - digiErrors_d.error(), // returns nullptr if default-constructed - useQualityInfo, - includeErrors); - else - RawToDigi_kernel<<>>( // - cablingMap, - modToUnp, - wordCounter, - word_d.get(), - fedId_d.get(), - digis_d.view(), - digiErrors_d.error(), // returns nullptr if default-constructed - useQualityInfo, - includeErrors); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); -#endif - - if (includeErrors) { - digiErrors_d.copyErrorToHostAsync(stream); - } - } - // End of Raw2Digi and passing data for clustering - - { - // clusterizer ... - using namespace gpuClustering; - int threadsPerBlock = 256; - int blocks = - (std::max(int(wordCounter), int(TrackerTraits::numberOfModules)) + threadsPerBlock - 1) / threadsPerBlock; - - gpuCalibPixel::calibDigis<<>>(clusterThresholds, - digis_d.view().moduleId(), - digis_d.view().xx(), - digis_d.view().yy(), - digis_d.view().adc(), - gains, - wordCounter, - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->clusModuleStart()); - - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); -#endif - -#ifdef GPU_DEBUG - std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock - << " threads\n"; -#endif - - countModules<<>>( - digis_d->moduleId(), clusters_d->moduleStart(), digis_d->clus(), wordCounter); - cudaCheck(cudaGetLastError()); - - // should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) - threadsPerBlock = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; - blocks = TrackerTraits::numberOfModules; -#ifdef GPU_DEBUG - std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; -#endif - - findClus<<>>(digis_d->rawIdArr(), - digis_d->moduleId(), - digis_d->xx(), - digis_d->yy(), - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->moduleId(), - digis_d->clus(), - wordCounter); - - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); -#endif - - // apply charge cut - clusterChargeCut<<>>(clusterThresholds, - digis_d->moduleId(), - digis_d->adc(), - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->moduleId(), - digis_d->clus(), - wordCounter); - - cudaCheck(cudaGetLastError()); - - // count the module start indices already here (instead of - // rechits) so that the number of clusters/hits can be made - // available in the rechit producer without additional points of - // synchronization/ExternalWork - auto nModules_Clusters_d = cms::cuda::make_device_unique(3, stream); - // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d->clusInModule(), - clusters_d->clusModuleStart(), - clusters_d->moduleStart(), - nModules_Clusters_d.get()); - - // copy to host - nModules_Clusters_h = cms::cuda::make_host_unique(3, stream); - cudaCheck(cudaMemcpyAsync( - nModules_Clusters_h.get(), nModules_Clusters_d.get(), 3 * sizeof(uint32_t), cudaMemcpyDefault, stream)); - -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); -#endif - - } // end clusterizer scope - } - - template - void SiPixelRawToClusterGPUKernel::makePhase2ClustersAsync( - const SiPixelClusterThresholds clusterThresholds, - const uint16_t *moduleIds, - const uint16_t *xDigis, - const uint16_t *yDigis, - const uint16_t *adcDigis, - const uint32_t *packedData, - const uint32_t *rawIds, - const uint32_t numDigis, - cudaStream_t stream) { - using namespace gpuClustering; - nDigis = numDigis; - digis_d = SiPixelDigisCUDA(numDigis, stream); - - cudaCheck(cudaMemcpyAsync(digis_d->moduleId(), moduleIds, sizeof(uint16_t) * numDigis, cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(digis_d->xx(), xDigis, sizeof(uint16_t) * numDigis, cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(digis_d->yy(), yDigis, sizeof(uint16_t) * numDigis, cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(digis_d->adc(), adcDigis, sizeof(uint16_t) * numDigis, cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(digis_d->pdigi(), packedData, sizeof(uint32_t) * numDigis, cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(digis_d->rawIdArr(), rawIds, sizeof(uint32_t) * numDigis, cudaMemcpyDefault, stream)); - - clusters_d = SiPixelClustersCUDA(TrackerTraits::numberOfModules, stream); - - nModules_Clusters_h = cms::cuda::make_host_unique(2, stream); - - int threadsPerBlock = 512; - int blocks = (int(numDigis) + threadsPerBlock - 1) / threadsPerBlock; - - gpuCalibPixel::calibDigisPhase2<<>>(clusterThresholds, - digis_d->moduleId(), - digis_d->adc(), - numDigis, - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->clusModuleStart()); - - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); - std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; -#endif - - countModules<<>>( - digis_d->moduleId(), clusters_d->moduleStart(), digis_d->clus(), numDigis); - cudaCheck(cudaGetLastError()); - - // read the number of modules into a data member, used by getProduct()) - cudaCheck(cudaMemcpyAsync( - &(nModules_Clusters_h[0]), clusters_d->moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream)); - - threadsPerBlock = 256; - blocks = TrackerTraits::numberOfModules; - -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); - std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; -#endif - findClus<<>>(digis_d->rawIdArr(), - digis_d->moduleId(), - digis_d->xx(), - digis_d->yy(), - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->moduleId(), - digis_d->clus(), - numDigis); - - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); - std::cout << "CUDA clusterChargeCut kernel launch with " << blocks << " blocks of " << threadsPerBlock - << " threads\n"; -#endif - - // apply charge cut - clusterChargeCut<<>>(clusterThresholds, - digis_d->moduleId(), - digis_d->adc(), - clusters_d->moduleStart(), - clusters_d->clusInModule(), - clusters_d->moduleId(), - digis_d->clus(), - numDigis); - cudaCheck(cudaGetLastError()); - - auto nModules_Clusters_d = cms::cuda::make_device_unique(3, stream); - -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); - std::cout << "CUDA fillHitsModuleStart kernel launch \n"; -#endif - - // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d->clusInModule(), - clusters_d->clusModuleStart(), - clusters_d->moduleStart(), - nModules_Clusters_d.get()); - - nModules_Clusters_h = cms::cuda::make_host_unique(3, stream); - cudaCheck(cudaMemcpyAsync( - nModules_Clusters_h.get(), nModules_Clusters_d.get(), 3 * sizeof(uint32_t), cudaMemcpyDefault, stream)); - -#ifdef GPU_DEBUG - cudaCheck(cudaStreamSynchronize(stream)); -#endif - } // - - template class SiPixelRawToClusterGPUKernel; - template class SiPixelRawToClusterGPUKernel; - template class SiPixelRawToClusterGPUKernel; -} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h deleted file mode 100644 index fe9cc260a5853..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ /dev/null @@ -1,165 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h - -#include - -#include - -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" -#include "FWCore/Utilities/interface/typedefs.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -//#define GPU_DEBUG - -struct SiPixelROCsStatusAndMapping; -class SiPixelGainForHLTonGPU; - -namespace pixelgpudetails { - - inline namespace phase1geometry { - const uint32_t layerStartBit = 20; - const uint32_t ladderStartBit = 12; - const uint32_t moduleStartBit = 2; - - const uint32_t panelStartBit = 10; - const uint32_t diskStartBit = 18; - const uint32_t bladeStartBit = 12; - - const uint32_t layerMask = 0xF; - const uint32_t ladderMask = 0xFF; - const uint32_t moduleMask = 0x3FF; - const uint32_t panelMask = 0x3; - const uint32_t diskMask = 0xF; - const uint32_t bladeMask = 0x3F; - } // namespace phase1geometry - - const uint32_t maxROCIndex = 8; - const uint32_t numRowsInRoc = 80; - const uint32_t numColsInRoc = 52; - - const uint32_t MAX_WORD = 2000; - - struct DetIdGPU { - uint32_t rawId; - uint32_t rocInDet; - uint32_t moduleId; - }; - - struct Pixel { - uint32_t row; - uint32_t col; - }; - - inline constexpr pixelchannelidentifierimpl::Packing packing() { return PixelChannelIdentifier::thePacking; } - - inline constexpr uint32_t pack(uint32_t row, uint32_t col, uint32_t adc, uint32_t flag = 0) { - constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); - adc = std::min(adc, uint32_t(thePacking.max_adc)); - - return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); - } - - constexpr uint32_t pixelToChannel(int row, int col) { - constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); - return (row << thePacking.column_width) | col; - } - - template - class SiPixelRawToClusterGPUKernel { - public: - class WordFedAppender { - public: - WordFedAppender(uint32_t words, cudaStream_t stream) - : word_{cms::cuda::make_host_unique(words, stream)}, - fedId_{cms::cuda::make_host_unique(words, stream)} {} - - void initializeWordFed(int fedId, unsigned int index, cms_uint32_t const* src, unsigned int length) { - std::memcpy(word_.get() + index, src, sizeof(cms_uint32_t) * length); - std::memset(fedId_.get() + index / 2, fedId - FEDNumbering::MINSiPixeluTCAFEDID, length / 2); - } - - const unsigned int* word() const { return word_.get(); } - const unsigned char* fedId() const { return fedId_.get(); } - - private: - cms::cuda::host::unique_ptr word_; - cms::cuda::host::unique_ptr fedId_; - }; - - SiPixelRawToClusterGPUKernel() = default; - ~SiPixelRawToClusterGPUKernel() = default; - - SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; - SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete; - SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; - SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; - - void makePhase1ClustersAsync(const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping* cablingMap, - const unsigned char* modToUnp, - const SiPixelGainForHLTonGPU* gains, - const WordFedAppender& wordFed, - SiPixelFormatterErrors&& errors, - const uint32_t wordCounter, - const uint32_t fedCounter, - bool useQualityInfo, - bool includeErrors, - bool debug, - cudaStream_t stream); - - void makePhase2ClustersAsync(const SiPixelClusterThresholds clusterThresholds, - const uint16_t* moduleIds, - const uint16_t* xDigis, - const uint16_t* yDigis, - const uint16_t* adcDigis, - const uint32_t* packedData, - const uint32_t* rawIds, - const uint32_t numDigis, - cudaStream_t stream); - - std::pair getResults() { - digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); - assert(nModules_Clusters_h[2] <= nModules_Clusters_h[1]); - clusters_d.setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); - -#ifdef GPU_DEBUG - std::cout << "SiPixelClusterizerCUDA results:" << std::endl - << " > no. of digis: " << nDigis << std::endl - << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl - << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl - << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; -#endif - // need to explicitly deallocate while the associated CUDA - // stream is still alive - // - // technically the statement above is not true anymore now that - // the CUDA streams are cached within the cms::cuda::StreamCache, but it is - // still better to release as early as possible - nModules_Clusters_h.reset(); - return std::make_pair(std::move(digis_d), std::move(clusters_d)); - } - - SiPixelDigiErrorsCUDA&& getErrors() { return std::move(digiErrors_d); } - - private: - uint32_t nDigis; - - // Data to be put in the event - cms::cuda::host::unique_ptr nModules_Clusters_h; - SiPixelDigisCUDA digis_d; - SiPixelClustersCUDA clusters_d; - SiPixelDigiErrorsCUDA digiErrors_d; - }; - -} // namespace pixelgpudetails - -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h deleted file mode 100644 index 869beb74564b8..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h - -#include -#include -#include -#include - -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -namespace gpuCalibPixel { - - using gpuClustering::invalidModuleId; - - // template - __global__ void calibDigis(SiPixelClusterThresholds clusterThresholds, - uint16_t* id, - uint16_t const* __restrict__ x, - uint16_t const* __restrict__ y, - uint16_t* adc, - SiPixelGainForHLTonGPU const* __restrict__ ped, - int numElements, - uint32_t* __restrict__ moduleStart, // just to zero first - uint32_t* __restrict__ nClustersInModule, // just to zero them - uint32_t* __restrict__ clusModuleStart // just to zero first - ) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - - const float VCaltoElectronGain = clusterThresholds.vCaltoElectronGain; - const float VCaltoElectronGain_L1 = clusterThresholds.vCaltoElectronGain_L1; - const float VCaltoElectronOffset = clusterThresholds.vCaltoElectronOffset; - const float VCaltoElectronOffset_L1 = clusterThresholds.vCaltoElectronOffset_L1; - - // zero for next kernels... - if (0 == first) - clusModuleStart[0] = moduleStart[0] = 0; - for (int i = first; i < phase1PixelTopology::numberOfModules; i += gridDim.x * blockDim.x) { - nClustersInModule[i] = 0; - } - - for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { - if (invalidModuleId == id[i]) - continue; - - bool isDeadColumn = false, isNoisyColumn = false; - - int row = x[i]; - int col = y[i]; - - auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); - float pedestal = ret.first; - float gain = ret.second; - // float pedestal = 0; float gain = 1.; - if (isDeadColumn | isNoisyColumn) { - printf("bad pixel at %d in %d\n", i, id[i]); - id[i] = invalidModuleId; - adc[i] = 0; - } else { - float vcal = float(adc[i]) * gain - pedestal * gain; - - float conversionFactor = id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; - float offset = id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; - vcal = vcal * conversionFactor + offset; - - adc[i] = std::clamp(int(vcal), 100, int(std::numeric_limits::max())); - } - } - } - - __global__ void calibDigisPhase2(SiPixelClusterThresholds clusterThresholds, - uint16_t* id, - uint16_t* adc, - int numElements, - uint32_t* __restrict__ moduleStart, // just to zero first - uint32_t* __restrict__ nClustersInModule, // just to zero them - uint32_t* __restrict__ clusModuleStart // just to zero first - ) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - // zero for next kernels... - - const float ElectronPerADCGain = clusterThresholds.electronPerADCGain; - const int8_t Phase2ReadoutMode = clusterThresholds.phase2ReadoutMode; - const uint16_t Phase2DigiBaseline = clusterThresholds.phase2DigiBaseline; - const uint8_t Phase2KinkADC = clusterThresholds.phase2KinkADC; - - if (0 == first) - clusModuleStart[0] = moduleStart[0] = 0; - for (int i = first; i < phase2PixelTopology::numberOfModules; i += gridDim.x * blockDim.x) { - nClustersInModule[i] = 0; - } - - for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { - if (invalidModuleId == id[i]) - continue; - - const int mode = (Phase2ReadoutMode < -1 ? -1 : Phase2ReadoutMode); - - int adc_int = adc[i]; - - if (mode < 0) - adc_int = int(adc_int * ElectronPerADCGain); - else { - if (adc_int < Phase2KinkADC) - adc_int = int((adc_int + 0.5) * ElectronPerADCGain); - else { - const int8_t dspp = (Phase2ReadoutMode < 10 ? Phase2ReadoutMode : 10); - const int8_t ds = int8_t(dspp <= 1 ? 1 : (dspp - 1) * (dspp - 1)); - - adc_int -= Phase2KinkADC; - adc_int *= ds; - adc_int += Phase2KinkADC; - - adc_int = ((adc_int + 0.5 * ds) * ElectronPerADCGain); - } - - adc_int += int(Phase2DigiBaseline); - } - adc[i] = std::min(adc_int, int(std::numeric_limits::max())); - } - } - -} // namespace gpuCalibPixel - -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h deleted file mode 100644 index f8554e341ff9c..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h - -#include -#include - -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -namespace gpuClustering { - - template - __global__ void clusterChargeCut( - SiPixelClusterThresholds - clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers) - uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster) - uint16_t const* __restrict__ adc, // charge of each pixel - uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module - uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module - uint32_t const* __restrict__ moduleId, // module id of each module - int32_t* __restrict__ clusterId, // modified: cluster id of each pixel - uint32_t numElements) { - constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules; - - __shared__ int32_t charge[maxNumClustersPerModules]; - __shared__ uint8_t ok[maxNumClustersPerModules]; - __shared__ uint16_t newclusId[maxNumClustersPerModules]; - - constexpr int startBPIX2 = TrackerTraits::layerStart[1]; - - assert(TrackerTraits::numberOfModules < maxNumModules); - assert(startBPIX2 < TrackerTraits::numberOfModules); - - auto firstModule = blockIdx.x; - auto endModule = moduleStart[0]; - for (auto module = firstModule; module < endModule; module += gridDim.x) { - auto firstPixel = moduleStart[1 + module]; - auto thisModuleId = id[firstPixel]; - while (thisModuleId == invalidModuleId and firstPixel < numElements) { - // skip invalid or duplicate pixels - ++firstPixel; - thisModuleId = id[firstPixel]; - } - if (firstPixel >= numElements) { - // reached the end of the input while skipping the invalid pixels, nothing left to do - break; - } - if (thisModuleId != moduleId[module]) { - // reached the end of the module while skipping the invalid pixels, skip this module - continue; - } - assert(thisModuleId < TrackerTraits::numberOfModules); - - auto nclus = nClustersInModule[thisModuleId]; - if (nclus == 0) - continue; - - if (threadIdx.x == 0 && nclus > maxNumClustersPerModules) - printf("Warning too many clusters in module %d in block %d: %d > %d\n", - thisModuleId, - blockIdx.x, - nclus, - maxNumClustersPerModules); - - auto first = firstPixel + threadIdx.x; - - if (nclus > maxNumClustersPerModules) { - // remove excess FIXME find a way to cut charge first.... - for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == invalidModuleId) - continue; // not valid - if (id[i] != thisModuleId) - break; // end of module - if (clusterId[i] >= maxNumClustersPerModules) { - id[i] = invalidModuleId; - clusterId[i] = invalidModuleId; - } - } - nclus = maxNumClustersPerModules; - } - -#ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x); -#endif - - assert(nclus <= maxNumClustersPerModules); - for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { - charge[i] = 0; - } - __syncthreads(); - - for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == invalidModuleId) - continue; // not valid - if (id[i] != thisModuleId) - break; // end of module - atomicAdd(&charge[clusterId[i]], adc[i]); - } - __syncthreads(); - - auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2); - - bool good = true; - for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { - newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0; - if (0 == ok[i]) - good = false; - } - - // if all clusters above threshold do nothing - if (__syncthreads_and(good)) - continue; - - // renumber - __shared__ uint16_t ws[32]; - constexpr auto maxThreads = 1024; - auto minClust = nclus > maxThreads ? maxThreads : nclus; - - cms::cuda::blockPrefixScan(newclusId, newclusId, minClust, ws); - if constexpr (maxNumClustersPerModules > maxThreads) //only if needed - { - for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) { - cms::cuda::blockPrefixScan(newclusId + offset, newclusId + offset, nclus - offset, ws); - for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) { - uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1; - newclusId[i] += newclusId[prevBlockEnd]; - } - __syncthreads(); - } - } - assert(nclus > newclusId[nclus - 1]); - - nClustersInModule[thisModuleId] = newclusId[nclus - 1]; - - // reassign id - for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == invalidModuleId) - continue; // not valid - if (id[i] != thisModuleId) - break; // end of module - if (0 == ok[clusterId[i]]) - clusterId[i] = id[i] = invalidModuleId; - else - clusterId[i] = newclusId[clusterId[i]] - 1; - } - - // done - __syncthreads(); - } // loop on modules - } - -} // namespace gpuClustering - -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h deleted file mode 100644 index 1a9395b8e7229..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ /dev/null @@ -1,410 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h - -#include -#include - -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -//#define GPU_DEBUG - -namespace gpuClustering { - - // Phase-1 pixel modules - constexpr uint32_t pixelSizeX = 160; - constexpr uint32_t pixelSizeY = 416; - - namespace pixelStatus { - // Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones - enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; - - constexpr uint32_t bits = 2; - constexpr uint32_t mask = (0x01 << bits) - 1; - constexpr uint32_t valuesPerWord = sizeof(uint32_t) * 8 / bits; - constexpr uint32_t size = pixelSizeX * pixelSizeY / valuesPerWord; - - __device__ static constexpr inline uint32_t getIndex(uint16_t x, uint16_t y) { - return (pixelSizeX * y + x) / valuesPerWord; - } - - __device__ constexpr inline uint32_t getShift(uint16_t x, uint16_t y) { return (x % valuesPerWord) * 2; } - - __device__ constexpr inline Status getStatus(uint32_t const* __restrict__ status, uint16_t x, uint16_t y) { - uint32_t index = getIndex(x, y); - uint32_t shift = getShift(x, y); - return Status{(status[index] >> shift) & mask}; - } - - __device__ constexpr inline bool isDuplicate(uint32_t const* __restrict__ status, uint16_t x, uint16_t y) { - return getStatus(status, x, y) == kDuplicate; - } - - __device__ constexpr inline void promote(uint32_t* __restrict__ status, const uint16_t x, const uint16_t y) { - uint32_t index = getIndex(x, y); - uint32_t shift = getShift(x, y); - uint32_t old_word = status[index]; - uint32_t expected = old_word; - do { - expected = old_word; - Status old_status{(old_word >> shift) & mask}; - if (kDuplicate == old_status) { - // nothing to do - return; - } - Status new_status = (kEmpty == old_status) ? kFound : kDuplicate; - uint32_t new_word = old_word | (static_cast(new_status) << shift); - old_word = atomicCAS(&status[index], expected, new_word); - } while (expected != old_word); - } - - } // namespace pixelStatus - -#ifdef GPU_DEBUG - __device__ uint32_t gMaxHit = 0; -#endif - - template - __global__ void countModules(uint16_t const* __restrict__ id, - uint32_t* __restrict__ moduleStart, - int32_t* __restrict__ clusterId, - int numElements) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - - [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; - - assert(nMaxModules < maxNumModules); - for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { - clusterId[i] = i; - if (invalidModuleId == id[i]) - continue; - auto j = i - 1; - while (j >= 0 and id[j] == invalidModuleId) - --j; - if (j < 0 or id[j] != id[i]) { - // boundary... - auto loc = atomicInc(moduleStart, nMaxModules); - moduleStart[loc + 1] = i; - } - } - } - - template - __global__ void findClus(uint32_t* __restrict__ rawIdArr, - uint16_t* __restrict__ id, // module id of each pixel - uint16_t const* __restrict__ x, // local coordinates of each pixel - uint16_t const* __restrict__ y, // - uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module - uint32_t* __restrict__ nClustersInModule, // output: number of clusters found in each module - uint32_t* __restrict__ moduleId, // output: module id of each module - int32_t* __restrict__ clusterId, // output: cluster id of each pixel - int numElements) { - // status is only used for Phase-1, but it cannot be declared conditionally only if isPhase2 is false; - // to minimize the impact on Phase-2 reconstruction it is declared with a very small size. - constexpr bool isPhase2 = std::is_base_of::value; - constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : pixelStatus::size; - __shared__ uint32_t status[pixelStatusSize]; // packed words array used to store the PixelStatus of each pixel - __shared__ int msize; - - auto firstModule = blockIdx.x; - auto endModule = moduleStart[0]; - - assert(TrackerTraits::numberOfModules < maxNumModules); - - for (auto module = firstModule; module < endModule; module += gridDim.x) { - auto firstPixel = moduleStart[1 + module]; - auto thisModuleId = id[firstPixel]; - assert(thisModuleId < TrackerTraits::numberOfModules); - -#ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); -#endif - - auto first = firstPixel + threadIdx.x; - - // find the index of the first pixel not belonging to this module (or invalid) - msize = numElements; - __syncthreads(); - - // skip threads not associated to an existing pixel - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == invalidModuleId) // skip invalid pixels - continue; - if (id[i] != thisModuleId) { // find the first pixel in a different module - atomicMin(&msize, i); - break; - } - } - - //init hist (ymax=416 < 512 : 9bits) - //6000 max pixels required for HI operations with no measurable impact on pp performance - constexpr uint32_t maxPixInModule = TrackerTraits::maxPixInModule; - constexpr auto nbins = TrackerTraits::clusterBinning; - constexpr auto nbits = TrackerTraits::clusterBits; - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter ws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); - - // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) - if (0 == threadIdx.x) { - if (msize - firstPixel > maxPixInModule) { - printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); - msize = maxPixInModule + firstPixel; - } -#ifdef GPU_DEBUG - printf("pixelInModule > %d\n", msize - firstPixel); -#endif - } - - __syncthreads(); - assert(msize - firstPixel <= maxPixInModule); - -#ifdef GPU_DEBUG - __shared__ uint32_t totGood; - totGood = 0; - __syncthreads(); -#endif - - // remove duplicate pixels - if constexpr (not isPhase2) { - if (msize > 1) { - for (uint32_t i = threadIdx.x; i < pixelStatus::size; i += blockDim.x) { - status[i] = 0; - } - __syncthreads(); - for (int i = first; i < msize - 1; i += blockDim.x) { - // skip invalid pixels - if (id[i] == invalidModuleId) - continue; - pixelStatus::promote(status, x[i], y[i]); - } - __syncthreads(); - for (int i = first; i < msize - 1; i += blockDim.x) { - // skip invalid pixels - if (id[i] == invalidModuleId) - continue; - if (pixelStatus::isDuplicate(status, x[i], y[i])) { - // printf("found dup %d %d %d %d\n", i, id[i], x[i], y[i]); - id[i] = invalidModuleId; - rawIdArr[i] = 0; - } - } - __syncthreads(); - } - } - - // fill histo - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == invalidModuleId) // skip invalid pixels - continue; - hist.count(y[i]); -#ifdef GPU_DEBUG - atomicAdd(&totGood, 1); -#endif - } - __syncthreads(); - if (threadIdx.x < 32) - ws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(ws); - __syncthreads(); -#ifdef GPU_DEBUG - assert(hist.size() == totGood); - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("histo size %d\n", hist.size()); -#endif - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == invalidModuleId) // skip invalid pixels - continue; - hist.fill(y[i], i - firstPixel); - } - -#ifdef __CUDA_ARCH__ - // assume that we can cover the whole module with up to 16 blockDim.x-wide iterations - constexpr int maxiter = 16; - if (threadIdx.x == 0 && (hist.size() / blockDim.x) >= maxiter) - printf("THIS IS NOT SUPPOSED TO HAPPEN too many hits in module %d: %d for block size %d\n", - thisModuleId, - hist.size(), - blockDim.x); -#else - auto maxiter = hist.size(); -#endif - // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event - constexpr int maxNeighbours = 10; - assert((hist.size() / blockDim.x) <= maxiter); - // nearest neighbour - uint16_t nn[maxiter][maxNeighbours]; - uint8_t nnn[maxiter]; // number of nn - for (uint32_t k = 0; k < maxiter; ++k) - nnn[k] = 0; - - __syncthreads(); // for hit filling! - -#ifdef GPU_DEBUG - // look for anomalous high occupancy - __shared__ uint32_t n40, n60; - n40 = n60 = 0; - __syncthreads(); - for (auto j = threadIdx.x; j < Hist::nbins(); j += blockDim.x) { - if (hist.size(j) > 60) - atomicAdd(&n60, 1); - if (hist.size(j) > 40) - atomicAdd(&n40, 1); - } - __syncthreads(); - if (0 == threadIdx.x) { - if (n60 > 0) - printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); - else if (n40 > 0) - printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); - } - __syncthreads(); -#endif - - // fill NN - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - assert(k < maxiter); - auto p = hist.begin() + j; - auto i = *p + firstPixel; - assert(id[i] != invalidModuleId); - assert(id[i] == thisModuleId); // same module - int be = Hist::bin(y[i] + 1); - auto e = hist.end(be); - ++p; - assert(0 == nnn[k]); - for (; p < e; ++p) { - auto m = (*p) + firstPixel; - assert(m != i); - assert(int(y[m]) - int(y[i]) >= 0); - assert(int(y[m]) - int(y[i]) <= 1); - if (std::abs(int(x[m]) - int(x[i])) > 1) - continue; - auto l = nnn[k]++; - assert(l < maxNeighbours); - nn[k][l] = *p; - } - } - - // for each pixel, look at all the pixels until the end of the module; - // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; - // after the loop, all the pixel in each cluster should have the id equeal to the lowest - // pixel in the cluster ( clus[i] == i ). - bool more = true; - int nloops = 0; - while (__syncthreads_or(more)) { - if (1 == nloops % 2) { - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - auto p = hist.begin() + j; - auto i = *p + firstPixel; - auto m = clusterId[i]; - while (m != clusterId[m]) - m = clusterId[m]; - clusterId[i] = m; - } - } else { - more = false; - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - auto p = hist.begin() + j; - auto i = *p + firstPixel; - for (int kk = 0; kk < nnn[k]; ++kk) { - auto l = nn[k][kk]; - auto m = l + firstPixel; - assert(m != i); - auto old = atomicMin_block(&clusterId[m], clusterId[i]); - // do we need memory fence? - if (old != clusterId[i]) { - // end the loop only if no changes were applied - more = true; - } - atomicMin_block(&clusterId[i], old); - } // nnloop - } // pixel loop - } - ++nloops; - } // end while - -#ifdef GPU_DEBUG - { - __shared__ int n0; - if (threadIdx.x == 0) - n0 = nloops; - __syncthreads(); - auto ok = n0 == nloops; - assert(__syncthreads_and(ok)); - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("# loops %d\n", nloops); - } -#endif - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a pixels with clus[i] == i; - // mark these pixels with a negative id. - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == invalidModuleId) // skip invalid pixels - continue; - if (clusterId[i] == i) { - auto old = atomicInc(&foundClusters, 0xffffffff); - clusterId[i] = -(old + 1); - } - } - __syncthreads(); - - // propagate the negative id to all the pixels in the cluster. - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == invalidModuleId) // skip invalid pixels - continue; - if (clusterId[i] >= 0) { - // mark each pixel in a cluster with the same id as the first one - clusterId[i] = clusterId[clusterId[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == invalidModuleId) { // skip invalid pixels - clusterId[i] = invalidClusterId; - continue; - } - clusterId[i] = -clusterId[i] - 1; - } - __syncthreads(); - - if (threadIdx.x == 0) { - nClustersInModule[thisModuleId] = foundClusters; - moduleId[module] = thisModuleId; -#ifdef GPU_DEBUG - if (foundClusters > gMaxHit) { - gMaxHit = foundClusters; - if (foundClusters > 8) - printf("max hit %d in %d\n", foundClusters, thisModuleId); - } -#endif -#ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - printf("%d clusters in module %d\n", foundClusters, thisModuleId); -#endif - } - } // module loop - } -} // namespace gpuClustering - -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index 7bb795f3bab02..b9b3a3676b4ae 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -33,28 +33,3 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp b/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp deleted file mode 100644 index 19a3b8d014c9c..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu deleted file mode 100644 index 19a3b8d014c9c..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu +++ /dev/null @@ -1 +0,0 @@ -#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h deleted file mode 100644 index 5b70ded261ddf..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ /dev/null @@ -1,409 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/launch.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#endif // __CUDACC__ - -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" - -// local includes, for testing only -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" - -int main(void) { -#ifdef __CUDACC__ - cms::cudatest::requireDevices(); -#endif // __CUDACC__ - - using namespace gpuClustering; - using pixelTopology::Phase1; - - constexpr int numElements = 256 * maxNumModules; - const SiPixelClusterThresholds clusterThresholds( - clusterThresholdLayerOne, clusterThresholdOtherLayers, 0.f, 0.f, 0.f, 0.f); - - // these in reality are already on GPU - auto h_raw = std::make_unique(numElements); - auto h_id = std::make_unique(numElements); - auto h_x = std::make_unique(numElements); - auto h_y = std::make_unique(numElements); - auto h_adc = std::make_unique(numElements); - auto h_clus = std::make_unique(numElements); - -#ifdef __CUDACC__ - auto d_raw = cms::cuda::make_device_unique(numElements, nullptr); - auto d_id = cms::cuda::make_device_unique(numElements, nullptr); - auto d_x = cms::cuda::make_device_unique(numElements, nullptr); - auto d_y = cms::cuda::make_device_unique(numElements, nullptr); - auto d_adc = cms::cuda::make_device_unique(numElements, nullptr); - auto d_clus = cms::cuda::make_device_unique(numElements, nullptr); - auto d_moduleStart = cms::cuda::make_device_unique(maxNumModules + 1, nullptr); - auto d_clusInModule = cms::cuda::make_device_unique(maxNumModules, nullptr); - auto d_moduleId = cms::cuda::make_device_unique(maxNumModules, nullptr); -#else // __CUDACC__ - auto h_moduleStart = std::make_unique(maxNumModules + 1); - auto h_clusInModule = std::make_unique(maxNumModules); - auto h_moduleId = std::make_unique(maxNumModules); -#endif // __CUDACC__ - - // later random number - int n = 0; - int ncl = 0; - int y[10] = {5, 7, 9, 1, 3, 0, 4, 8, 2, 6}; - - auto generateClusters = [&](int kn) { - auto addBigNoise = 1 == kn % 2; - if (addBigNoise) { - constexpr int MaxPixels = 1000; - int id = 666; - for (int x = 0; x < 140; x += 3) { - for (int yy = 0; yy < 400; yy += 3) { - h_id[n] = id; - h_x[n] = x; - h_y[n] = yy; - h_adc[n] = 1000; - ++n; - ++ncl; - if (MaxPixels <= ncl) - break; - } - if (MaxPixels <= ncl) - break; - } - } - - { - // isolated - int id = 42; - int x = 10; - ++ncl; - h_id[n] = id; - h_x[n] = x; - h_y[n] = x; - h_adc[n] = kn == 0 ? 100 : 5000; - ++n; - - // first column - ++ncl; - h_id[n] = id; - h_x[n] = x; - h_y[n] = 0; - h_adc[n] = 5000; - ++n; - // first columns - ++ncl; - h_id[n] = id; - h_x[n] = x + 80; - h_y[n] = 2; - h_adc[n] = 5000; - ++n; - h_id[n] = id; - h_x[n] = x + 80; - h_y[n] = 1; - h_adc[n] = 5000; - ++n; - - // last column - ++ncl; - h_id[n] = id; - h_x[n] = x; - h_y[n] = 415; - h_adc[n] = 5000; - ++n; - // last columns - ++ncl; - h_id[n] = id; - h_x[n] = x + 80; - h_y[n] = 415; - h_adc[n] = 2500; - ++n; - h_id[n] = id; - h_x[n] = x + 80; - h_y[n] = 414; - h_adc[n] = 2500; - ++n; - - // diagonal - ++ncl; - for (int x = 20; x < 25; ++x) { - h_id[n] = id; - h_x[n] = x; - h_y[n] = x; - h_adc[n] = 1000; - ++n; - } - ++ncl; - // reversed - for (int x = 45; x > 40; --x) { - h_id[n] = id; - h_x[n] = x; - h_y[n] = x; - h_adc[n] = 1000; - ++n; - } - ++ncl; - h_id[n++] = invalidModuleId; // error - // messy - int xx[5] = {21, 25, 23, 24, 22}; - for (int k = 0; k < 5; ++k) { - h_id[n] = id; - h_x[n] = xx[k]; - h_y[n] = 20 + xx[k]; - h_adc[n] = 1000; - ++n; - } - // holes - ++ncl; - for (int k = 0; k < 5; ++k) { - h_id[n] = id; - h_x[n] = xx[k]; - h_y[n] = 100; - h_adc[n] = kn == 2 ? 100 : 1000; - ++n; - if (xx[k] % 2 == 0) { - h_id[n] = id; - h_x[n] = xx[k]; - h_y[n] = 101; - h_adc[n] = 1000; - ++n; - } - } - } - { - // id == 0 (make sure it works! - int id = 0; - int x = 10; - ++ncl; - h_id[n] = id; - h_x[n] = x; - h_y[n] = x; - h_adc[n] = 5000; - ++n; - } - // all odd id - for (int id = 11; id <= 1800; id += 2) { - if ((id / 20) % 2) - h_id[n++] = invalidModuleId; // error - for (int x = 0; x < 40; x += 4) { - ++ncl; - if ((id / 10) % 2) { - for (int k = 0; k < 10; ++k) { - h_id[n] = id; - h_x[n] = x; - h_y[n] = x + y[k]; - h_adc[n] = 100; - ++n; - h_id[n] = id; - h_x[n] = x + 1; - h_y[n] = x + y[k] + 2; - h_adc[n] = 1000; - ++n; - } - } else { - for (int k = 0; k < 10; ++k) { - h_id[n] = id; - h_x[n] = x; - h_y[n] = x + y[9 - k]; - h_adc[n] = kn == 2 ? 10 : 1000; - ++n; - if (y[k] == 3) - continue; // hole - if (id == 51) { - h_id[n++] = invalidModuleId; - h_id[n++] = invalidModuleId; - } // error - h_id[n] = id; - h_x[n] = x + 1; - h_y[n] = x + y[k] + 2; - h_adc[n] = kn == 2 ? 10 : 1000; - ++n; - } - } - } - } - }; // end lambda - for (auto kkk = 0; kkk < 5; ++kkk) { - n = 0; - ncl = 0; - generateClusters(kkk); - - std::cout << "created " << n << " digis in " << ncl << " clusters" << std::endl; - assert(n <= numElements); - - uint32_t nModules = 0; -#ifdef __CUDACC__ - size_t size32 = n * sizeof(unsigned int); - size_t size16 = n * sizeof(unsigned short); - // size_t size8 = n * sizeof(uint8_t); - - cudaCheck(cudaMemcpy(d_moduleStart.get(), &nModules, sizeof(uint32_t), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(d_id.get(), h_id.get(), size16, cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(d_x.get(), h_x.get(), size16, cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(d_y.get(), h_y.get(), size16, cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(d_adc.get(), h_adc.get(), size16, cudaMemcpyHostToDevice)); - - // Launch CUDA Kernels - int threadsPerBlock = (kkk == 5) ? 512 : ((kkk == 3) ? 128 : 256); - int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; - std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock - << " threads\n"; - - cms::cuda::launch( - countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); - - blocksPerGrid = maxNumModules; //nModules; - - std::cout << "CUDA findModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock - << " threads\n"; - cudaCheck(cudaMemset(d_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t))); - - cms::cuda::launch(findClus, - {blocksPerGrid, threadsPerBlock}, - d_raw.get(), - d_id.get(), - d_x.get(), - d_y.get(), - d_moduleStart.get(), - d_clusInModule.get(), - d_moduleId.get(), - d_clus.get(), - n); - cudaDeviceSynchronize(); - cudaCheck(cudaMemcpy(&nModules, d_moduleStart.get(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - - uint32_t nclus[maxNumModules], moduleId[nModules]; - cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); - - std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" - << std::endl; - for (auto i = maxNumModules; i > 0; i--) - if (nclus[i - 1] > 0) { - std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; - break; - } - if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) - std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - - cms::cuda::launch(clusterChargeCut, - {blocksPerGrid, threadsPerBlock}, - clusterThresholds, - d_id.get(), - d_adc.get(), - d_moduleStart.get(), - d_clusInModule.get(), - d_moduleId.get(), - d_clus.get(), - n); - - cudaDeviceSynchronize(); -#else // __CUDACC__ - h_moduleStart[0] = nModules; - countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); - memset(h_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t)); - - findClus(h_raw.get(), - h_id.get(), - h_x.get(), - h_y.get(), - h_moduleStart.get(), - h_clusInModule.get(), - h_moduleId.get(), - h_clus.get(), - n); - - nModules = h_moduleStart[0]; - auto nclus = h_clusInModule.get(); - - std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" - << std::endl; - for (auto i = maxNumModules; i > 0; i--) - if (nclus[i - 1] > 0) { - std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; - break; - } - if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) - std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - - clusterChargeCut(clusterThresholds, - h_id.get(), - h_adc.get(), - h_moduleStart.get(), - h_clusInModule.get(), - h_moduleId.get(), - h_clus.get(), - n); -#endif // __CUDACC__ - - std::cout << "found " << nModules << " Modules active" << std::endl; - -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(h_id.get(), d_id.get(), size16, cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(h_clus.get(), d_clus.get(), size32, cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#endif // __CUDACC__ - - std::set clids; - for (int i = 0; i < n; ++i) { - assert(h_id[i] != 666); // only noise - if (h_id[i] == invalidModuleId) - continue; - assert(h_clus[i] >= 0); - assert(h_clus[i] < int(nclus[h_id[i]])); - clids.insert(h_id[i] * 1000 + h_clus[i]); - // clids.insert(h_clus[i]); - } - - // verify no hole in numbering - auto p = clids.begin(); - auto cmid = (*p) / 1000; - assert(0 == (*p) % 1000); - auto c = p; - ++c; - std::cout << "first clusters " << *p << ' ' << *c << ' ' << nclus[cmid] << ' ' << nclus[(*c) / 1000] << std::endl; - std::cout << "last cluster " << *clids.rbegin() << ' ' << nclus[(*clids.rbegin()) / 1000] << std::endl; - for (; c != clids.end(); ++c) { - auto cc = *c; - auto pp = *p; - auto mid = cc / 1000; - auto pnc = pp % 1000; - auto nc = cc % 1000; - if (mid != cmid) { - assert(0 == cc % 1000); - assert(nclus[cmid] - 1 == pp % 1000); - // if (nclus[cmid]-1 != pp%1000) std::cout << "error size " << mid << ": " << nclus[mid] << ' ' << pp << std::endl; - cmid = mid; - p = c; - continue; - } - p = c; - // assert(nc==pnc+1); - if (nc != pnc + 1) - std::cout << "error " << mid << ": " << nc << ' ' << pnc << std::endl; - } - - std::cout << "found " << std::accumulate(nclus, nclus + maxNumModules, 0) << ' ' << clids.size() << " clusters" - << std::endl; - for (auto i = maxNumModules; i > 0; i--) - if (nclus[i - 1] > 0) { - std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; - break; - } - // << " and " << seeds.size() << " seeds" << std::endl; - } /// end loop kkk - return 0; -} diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index 62787f4c989c1..99e0635e054c6 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,6 +1,5 @@ - @@ -11,8 +10,6 @@ - - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h deleted file mode 100644 index 15c24dfefb420..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h -#define RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h - -#include - -#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" -#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -class MagneticField; -template -class PixelCPEFast final : public PixelCPEGenericBase { -public: - PixelCPEFast(edm::ParameterSet const &conf, - const MagneticField *, - const TrackerGeometry &, - const TrackerTopology &, - const SiPixelLorentzAngle *, - const SiPixelGenErrorDBObject *, - const SiPixelLorentzAngle *); - - ~PixelCPEFast() override = default; - - static void fillPSetDescription(edm::ParameterSetDescription &desc); - - // The return value can only be used safely in kernels launched on - // the same cudaStream, or after cudaStreamSynchronize. - using ParamsOnGPU = pixelCPEforGPU::ParamsOnGPUT; - using LayerGeometry = pixelCPEforGPU::LayerGeometryT; - using AverageGeometry = pixelTopology::AverageGeometryT; - - const ParamsOnGPU *getGPUProductAsync(cudaStream_t cudaStream) const; - - ParamsOnGPU const &getCPUProduct() const { return cpuData_; } - -private: - LocalPoint localPosition(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; - LocalError localError(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; - - void errorFromTemplates(DetParam const &theDetParam, ClusterParamGeneric &theClusterParam, float qclus) const; - - //--- DB Error Parametrization object, new light templates - std::vector thePixelGenError_; - - // allocate this with posix malloc to be compatible with the cpu workflow - std::vector detParamsGPU_; - pixelCPEforGPU::CommonParams commonParamsGPU_; - LayerGeometry layerGeometry_; - AverageGeometry averageGeometry_; - ParamsOnGPU cpuData_; - - struct GPUData { - ~GPUData(); - // not needed if not used on CPU... - ParamsOnGPU paramsOnGPU_h; - ParamsOnGPU *paramsOnGPU_d = nullptr; // copy of the above on the Device - }; - cms::cuda::ESProduct gpuData_; - - void fillParamsForGpu(); -}; - -#endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h index a97add7edb7b3..f86de181db5d5 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h @@ -7,11 +7,9 @@ #include #include +#include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" -#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" -#include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" namespace pixelCPEforDevice { diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h deleted file mode 100644 index e7c8ad5554f36..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ /dev/null @@ -1,435 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h -#define RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h - -#include -#include -#include -#include - -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "DataFormats/GeometrySurface/interface/SOARotation.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" -#include "CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h" - -namespace CPEFastParametrisation { - // From https://cmssdt.cern.ch/dxr/CMSSW/source/CondFormats/SiPixelTransient/src/SiPixelGenError.cc#485-486 - // qbin: int (0-4) describing the charge of the cluster - // [0: 1.5; - using Rotation = SOARotation; - - // all modules are identical! - struct CommonParams { - float theThicknessB; - float theThicknessE; - float thePitchX; - float thePitchY; - - uint16_t maxModuleStride; - uint8_t numberOfLaddersInBarrel; - }; - - struct DetParams { - bool isBarrel; - bool isPosZ; - uint16_t layer; - uint16_t index; - uint32_t rawId; - - float shiftX; - float shiftY; - float chargeWidthX; - float chargeWidthY; - uint16_t pixmx; // max pix charge - - uint16_t nRowsRoc; //we don't need 2^16 columns, is worth to use 15 + 1 for sign - uint16_t nColsRoc; - uint16_t nRows; - uint16_t nCols; - - uint32_t numPixsInModule; - - float x0, y0, z0; // the vertex in the local coord of the detector - - float apeXX, apeYY; // ape^2 - uint8_t sx2, sy1, sy2; - uint8_t sigmax[CPEFastParametrisation::kNumErrorBins], sigmax1[CPEFastParametrisation::kNumErrorBins], - sigmay[CPEFastParametrisation::kNumErrorBins]; // in micron - float xfact[CPEFastParametrisation::kGenErrorQBins], yfact[CPEFastParametrisation::kGenErrorQBins]; - int minCh[CPEFastParametrisation::kGenErrorQBins]; - - Frame frame; - }; - - template - struct LayerGeometryT { - uint32_t layerStart[TrackerTopology::numberOfLayers + 1]; - uint8_t layer[pixelTopology::layerIndexSize]; - uint16_t maxModuleStride; - }; - - // using LayerGeometry = LayerGeometryT; - // using LayerGeometryPhase2 = LayerGeometryT; - - template - struct ParamsOnGPUT { - using LayerGeometry = LayerGeometryT; - using AverageGeometry = pixelTopology::AverageGeometryT; - - CommonParams const* m_commonParams; - DetParams const* m_detParams; - LayerGeometry const* m_layerGeometry; - AverageGeometry const* m_averageGeometry; - - constexpr CommonParams const& __restrict__ commonParams() const { - CommonParams const* __restrict__ l = m_commonParams; - return *l; - } - constexpr DetParams const& __restrict__ detParams(int i) const { - DetParams const* __restrict__ l = m_detParams; - return l[i]; - } - constexpr LayerGeometry const& __restrict__ layerGeometry() const { return *m_layerGeometry; } - constexpr AverageGeometry const& __restrict__ averageGeometry() const { return *m_averageGeometry; } - - __device__ uint8_t layer(uint16_t id) const { - return __ldg(m_layerGeometry->layer + id / m_layerGeometry->maxModuleStride); - }; - }; - - // SOA (on device) - template - struct ClusParamsT { - uint32_t minRow[N]; - uint32_t maxRow[N]; - uint32_t minCol[N]; - uint32_t maxCol[N]; - - int32_t q_f_X[N]; - int32_t q_l_X[N]; - int32_t q_f_Y[N]; - int32_t q_l_Y[N]; - - int32_t charge[N]; - - float xpos[N]; - float ypos[N]; - - float xerr[N]; - float yerr[N]; - - int16_t xsize[N]; // (*8) clipped at 127 if negative is edge.... - int16_t ysize[N]; - - Status status[N]; - }; - - constexpr int32_t MaxHitsInIter = gpuClustering::maxHitsInIter(); - using ClusParams = ClusParamsT; - - constexpr inline void computeAnglesFromDet( - DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { - // x,y local position on det - auto gvx = x - detParams.x0; - auto gvy = y - detParams.y0; - auto gvz = -1.f / detParams.z0; - // normalization not required as only ratio used... - // calculate angles - cotalpha = gvx * gvz; - cotbeta = gvy * gvz; - } - - constexpr inline float correction(int sizeM1, - int q_f, //!< Charge in the first pixel. - int q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-shift at half thickness - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big) //!< true if the last is big - { - if (0 == sizeM1) // size 1 - return 0; - - float w_eff = 0; - bool simple = true; - if (1 == sizeM1) { // size 2 - //--- Width of the clusters minus the edge (first and last) pixels. - //--- In the note, they are denoted x_F and x_L (and y_F and y_L) - // assert(lower_edge_last_pix >= upper_edge_first_pix); - auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm - - //--- Predicted charge width from geometry - auto w_pred = theThickness * cot_angle // geometric correction (in cm) - - lorentz_shift; // (in cm) &&& check fpix! - - w_eff = std::abs(w_pred) - w_inner; - - //--- If the observed charge width is inconsistent with the expectations - //--- based on the track, do *not* use w_pred-w_inner. Instead, replace - //--- it with an *average* effective charge width, which is the average - //--- length of the edge pixels. - - // this can produce "large" regressions for very small numeric differences - simple = (w_eff < 0.0f) | (w_eff > pitch); - } - - if (simple) { - //--- Total length of the two edge pixels (first+last) - float sum_of_edge = 2.0f; - if (first_is_big) - sum_of_edge += 1.0f; - if (last_is_big) - sum_of_edge += 1.0f; - w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) - } - - //--- Finally, compute the position in this projection - float qdiff = q_l - q_f; - float qsum = q_l + q_f; - - //--- Temporary fix for clusters with both first and last pixel with charge = 0 - if (qsum == 0) - qsum = 1.0f; - - return 0.5f * (qdiff / qsum) * w_eff; - } - - template - constexpr inline void position(CommonParams const& __restrict__ comParams, - DetParams const& __restrict__ detParams, - ClusParams& cp, - uint32_t ic) { - constexpr int maxSize = TrackerTraits::maxSizeCluster; - //--- Upper Right corner of Lower Left pixel -- in measurement frame - uint16_t llx = cp.minRow[ic] + 1; - uint16_t lly = cp.minCol[ic] + 1; - - //--- Lower Left corner of Upper Right pixel -- in measurement frame - uint16_t urx = cp.maxRow[ic]; - uint16_t ury = cp.maxCol[ic]; - - uint16_t llxl = llx, llyl = lly, urxl = urx, uryl = ury; - - llxl = TrackerTraits::localX(llx); - llyl = TrackerTraits::localY(lly); - urxl = TrackerTraits::localX(urx); - uryl = TrackerTraits::localY(ury); - - auto mx = llxl + urxl; - auto my = llyl + uryl; - - int xsize = int(urxl) + 2 - int(llxl); - int ysize = int(uryl) + 2 - int(llyl); - assert(xsize >= 0); // 0 if bixpix... - assert(ysize >= 0); - - if (TrackerTraits::isBigPixX(cp.minRow[ic])) - ++xsize; - if (TrackerTraits::isBigPixX(cp.maxRow[ic])) - ++xsize; - if (TrackerTraits::isBigPixY(cp.minCol[ic])) - ++ysize; - if (TrackerTraits::isBigPixY(cp.maxCol[ic])) - ++ysize; - - int unbalanceX = 8.f * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); - int unbalanceY = 8.f * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); - - xsize = 8 * xsize - unbalanceX; - ysize = 8 * ysize - unbalanceY; - - cp.xsize[ic] = std::min(xsize, maxSize); - cp.ysize[ic] = std::min(ysize, maxSize); - - if (cp.minRow[ic] == 0 || cp.maxRow[ic] == uint32_t(detParams.nRows - 1)) - cp.xsize[ic] = -cp.xsize[ic]; - - if (cp.minCol[ic] == 0 || cp.maxCol[ic] == uint32_t(detParams.nCols - 1)) - cp.ysize[ic] = -cp.ysize[ic]; - - // apply the lorentz offset correction - float xoff = 0.5f * float(detParams.nRows) * comParams.thePitchX; - float yoff = 0.5f * float(detParams.nCols) * comParams.thePitchY; - - //correction for bigpixels for phase1 - xoff = xoff + TrackerTraits::bigPixXCorrection * comParams.thePitchX; - yoff = yoff + TrackerTraits::bigPixYCorrection * comParams.thePitchY; - - // apply the lorentz offset correction - auto xPos = detParams.shiftX + (comParams.thePitchX * 0.5f * float(mx)) - xoff; - auto yPos = detParams.shiftY + (comParams.thePitchY * 0.5f * float(my)) - yoff; - - float cotalpha = 0, cotbeta = 0; - - computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); - - auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; - - auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], - cp.q_f_X[ic], - cp.q_l_X[ic], - llxl, - urxl, - detParams.chargeWidthX, // lorentz shift in cm - thickness, - cotalpha, - comParams.thePitchX, - TrackerTraits::isBigPixX(cp.minRow[ic]), - TrackerTraits::isBigPixX(cp.maxRow[ic])); - - auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], - cp.q_f_Y[ic], - cp.q_l_Y[ic], - llyl, - uryl, - detParams.chargeWidthY, // lorentz shift in cm - thickness, - cotbeta, - comParams.thePitchY, - TrackerTraits::isBigPixY(cp.minCol[ic]), - TrackerTraits::isBigPixY(cp.maxCol[ic])); - - cp.xpos[ic] = xPos + xcorr; - cp.ypos[ic] = yPos + ycorr; - } - - template - constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, - DetParams const& __restrict__ detParams, - ClusParams& cp, - uint32_t ic) { - // Edge cluster errors - cp.xerr[ic] = 0.0050; - cp.yerr[ic] = 0.0085; - - // FIXME these are errors form Run1 - float xerr_barrel_l1_def = TrackerTraits::xerr_barrel_l1_def; - float yerr_barrel_l1_def = TrackerTraits::yerr_barrel_l1_def; - float xerr_barrel_ln_def = TrackerTraits::xerr_barrel_ln_def; - float yerr_barrel_ln_def = TrackerTraits::yerr_barrel_ln_def; - float xerr_endcap_def = TrackerTraits::xerr_endcap_def; - float yerr_endcap_def = TrackerTraits::yerr_endcap_def; - - constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; //TODO MOVE THESE SOMEWHERE ELSE - constexpr float yerr_barrel_l1[] = { - 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; - constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; - constexpr float yerr_barrel_ln[] = { - 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; - constexpr float xerr_endcap[] = {0.0020, 0.0020}; - constexpr float yerr_endcap[] = {0.00210}; - - auto sx = cp.maxRow[ic] - cp.minRow[ic]; - auto sy = cp.maxCol[ic] - cp.minCol[ic]; - - // is edgy ? - bool isEdgeX = cp.xsize[ic] < 1; - bool isEdgeY = cp.ysize[ic] < 1; - - // is one and big? - bool isBig1X = ((0 == sx) && TrackerTraits::isBigPixX(cp.minRow[ic])); - bool isBig1Y = ((0 == sy) && TrackerTraits::isBigPixY(cp.minCol[ic])); - - if (!isEdgeX && !isBig1X) { - if (not detParams.isBarrel) { - cp.xerr[ic] = sx < std::size(xerr_endcap) ? xerr_endcap[sx] : xerr_endcap_def; - } else if (detParams.layer == 1) { - cp.xerr[ic] = sx < std::size(xerr_barrel_l1) ? xerr_barrel_l1[sx] : xerr_barrel_l1_def; - } else { - cp.xerr[ic] = sx < std::size(xerr_barrel_ln) ? xerr_barrel_ln[sx] : xerr_barrel_ln_def; - } - } - - if (!isEdgeY && !isBig1Y) { - if (not detParams.isBarrel) { - cp.yerr[ic] = sy < std::size(yerr_endcap) ? yerr_endcap[sy] : yerr_endcap_def; - } else if (detParams.layer == 1) { - cp.yerr[ic] = sy < std::size(yerr_barrel_l1) ? yerr_barrel_l1[sy] : yerr_barrel_l1_def; - } else { - cp.yerr[ic] = sy < std::size(yerr_barrel_ln) ? yerr_barrel_ln[sy] : yerr_barrel_ln_def; - } - } - } - - template - constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, - DetParams const& __restrict__ detParams, - ClusParams& cp, - uint32_t ic) { - // Edge cluster errors - cp.xerr[ic] = 0.0050f; - cp.yerr[ic] = 0.0085f; - - auto sx = cp.maxRow[ic] - cp.minRow[ic]; - auto sy = cp.maxCol[ic] - cp.minCol[ic]; - - // is edgy ? (size is set negative: see above) - bool isEdgeX = cp.xsize[ic] < 1; - bool isEdgeY = cp.ysize[ic] < 1; - // is one and big? - bool isOneX = (0 == sx); - bool isOneY = (0 == sy); - bool isBigX = TrackerTraits::isBigPixX(cp.minRow[ic]); - bool isBigY = TrackerTraits::isBigPixY(cp.minCol[ic]); - - auto ch = cp.charge[ic]; - auto bin = 0; - for (; bin < CPEFastParametrisation::kGenErrorQBins - 1; ++bin) - // find first bin which minimum charge exceeds cluster charge - if (ch < detParams.minCh[bin + 1]) - break; - - // in detParams qBins are reversed bin0 -> smallest charge, bin4-> largest charge - // whereas in CondFormats/SiPixelTransient/src/SiPixelGenError.cc it is the opposite - // so we reverse the bin here -> kGenErrorQBins - 1 - bin - cp.status[ic].qBin = CPEFastParametrisation::kGenErrorQBins - 1 - bin; - cp.status[ic].isOneX = isOneX; - cp.status[ic].isBigX = (isOneX & isBigX) | isEdgeX; - cp.status[ic].isOneY = isOneY; - cp.status[ic].isBigY = (isOneY & isBigY) | isEdgeY; - - auto xoff = -float(TrackerTraits::xOffset) * comParams.thePitchX; - int low_value = 0; - int high_value = CPEFastParametrisation::kNumErrorBins - 1; - int bin_value = float(CPEFastParametrisation::kNumErrorBins) * (cp.xpos[ic] + xoff) / (2 * xoff); - // return estimated bin value truncated to [0, 15] - int jx = std::clamp(bin_value, low_value, high_value); - - auto toCM = [](uint8_t x) { return float(x) * 1.e-4f; }; - - if (not isEdgeX) { - cp.xerr[ic] = isOneX ? toCM(isBigX ? detParams.sx2 : detParams.sigmax1[jx]) - : detParams.xfact[bin] * toCM(detParams.sigmax[jx]); - } - - auto ey = cp.ysize[ic] > 8 ? detParams.sigmay[std::min(cp.ysize[ic] - 9, 15)] : detParams.sy1; - if (not isEdgeY) { - cp.yerr[ic] = isOneY ? toCM(isBigY ? detParams.sy2 : detParams.sy1) : detParams.yfact[bin] * toCM(ey); - } - } - - //for Phase2 -> fallback to error from size - template <> - constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, - DetParams const& __restrict__ detParams, - ClusParams& cp, - uint32_t ic) { - errorFromSize(comParams, detParams, cp, ic); - } - -} // namespace pixelCPEforGPU - -#endif // RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 35a973120e9fd..847cc4d53c4de 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -5,16 +5,7 @@ - - - - - - - - - - + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc deleted file mode 100644 index 171cfd1baad55..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include - -#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/ESProducer.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/ModuleFactory.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "MagneticField/Engine/interface/MagneticField.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" - -template -class PixelCPEFastESProducerT : public edm::ESProducer { -public: - PixelCPEFastESProducerT(const edm::ParameterSet& p); - std::unique_ptr produce(const TkPixelCPERecord&); - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - edm::ESGetToken magfieldToken_; - edm::ESGetToken pDDToken_; - edm::ESGetToken hTTToken_; - edm::ESGetToken lorentzAngleToken_; - edm::ESGetToken lorentzAngleWidthToken_; - edm::ESGetToken genErrorDBObjectToken_; - - edm::ParameterSet pset_; - bool useErrorsFromTemplates_; -}; - -using namespace edm; - -template -PixelCPEFastESProducerT::PixelCPEFastESProducerT(const edm::ParameterSet& p) : pset_(p) { - auto const& myname = p.getParameter("ComponentName"); - auto const& magname = p.getParameter("MagneticFieldRecord"); - useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); - - auto cc = setWhatProduced(this, myname); - magfieldToken_ = cc.consumes(magname); - pDDToken_ = cc.consumes(); - hTTToken_ = cc.consumes(); - lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); - lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); - if (useErrorsFromTemplates_) { - genErrorDBObjectToken_ = cc.consumes(); - } -} - -template -std::unique_ptr PixelCPEFastESProducerT::produce( - const TkPixelCPERecord& iRecord) { - // add the new la width object - const SiPixelLorentzAngle* lorentzAngleWidthProduct = nullptr; - lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); - - const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; - - // Errors take only from new GenError - if (useErrorsFromTemplates_) { // do only when generrors are needed - genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); - //} else { - //std::cout<<" pass an empty GenError pointer"<>(pset_, - &iRecord.get(magfieldToken_), - iRecord.get(pDDToken_), - iRecord.get(hTTToken_), - &iRecord.get(lorentzAngleToken_), - genErrorDBObjectProduct, - lorentzAngleWidthProduct); -} - -template -void PixelCPEFastESProducerT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - // from PixelCPEBase - PixelCPEBase::fillPSetDescription(desc); - - // from PixelCPEFast - PixelCPEFast::fillPSetDescription(desc); - - // used by PixelCPEFast - desc.add("EdgeClusterErrorX", 50.0); - desc.add("EdgeClusterErrorY", 85.0); - desc.add("UseErrorsFromTemplates", true); - desc.add("TruncatePixelCharge", true); - - std::string name = "PixelCPEFast"; - name += TrackerTraits::nameModifier; - desc.add("ComponentName", name); - desc.add("MagneticFieldRecord", edm::ESInputTag()); - - descriptions.addWithDefaultLabel(desc); -} - -using PixelCPEFastESProducerPhase1 = PixelCPEFastESProducerT; -DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducerPhase1); -using PixelCPEFastESProducerPhase2 = PixelCPEFastESProducerT; -DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducerPhase2); -using PixelCPEFastESProducerHIonPhase1 = PixelCPEFastESProducerT; -DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducerHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu deleted file mode 100644 index b1e5e1c3c90e9..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu +++ /dev/null @@ -1,107 +0,0 @@ -// C++ headers -#include -#include - -// CUDA runtime -#include - -// CMSSW headers -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#include "PixelRecHitGPUKernel.h" -#include "gpuPixelRecHits.h" - -//#define GPU_DEBUG - -namespace { - template - __global__ void setHitsLayerStart(uint32_t const* __restrict__ hitsModuleStart, - pixelCPEforGPU::ParamsOnGPUT const* cpeParams, - uint32_t* hitsLayerStart) { - auto i = blockIdx.x * blockDim.x + threadIdx.x; - constexpr auto m = TrackerTraits::numberOfLayers; - - assert(0 == hitsModuleStart[0]); - - if (i <= m) { - hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; -#ifdef GPU_DEBUG - int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; - printf("LayerStart %d/%d at module %d: %d - %d\n", - i, - m, - cpeParams->layerGeometry().layerStart[i], - hitsLayerStart[i], - hitsLayerStart[i] - old); -#endif - } - } -} // namespace - -namespace pixelgpudetails { - - template - TrackingRecHitSoADevice PixelRecHitGPUKernel::makeHitsAsync( - SiPixelDigisCUDA const& digis_d, - SiPixelClustersCUDA const& clusters_d, - BeamSpotCUDA const& bs_d, - pixelCPEforGPU::ParamsOnGPUT const* cpeParams, - cudaStream_t stream) const { - using namespace gpuPixelRecHits; - auto nHits = clusters_d.nClusters(); - - TrackingRecHitSoADevice hits_d( - nHits, clusters_d.offsetBPIX2(), cpeParams, clusters_d->clusModuleStart(), stream); - - int activeModulesWithDigis = digis_d.nModules(); - // protect from empty events - if (activeModulesWithDigis) { - int threadsPerBlock = 128; - int blocks = activeModulesWithDigis; - -#ifdef GPU_DEBUG - std::cout << "launching getHits kernel for " << blocks << " blocks" << std::endl; -#endif - getHits<<>>( - cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.const_view(), hits_d.view()); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); -#endif - - // assuming full warp of threads is better than a smaller number... - if (nHits) { - setHitsLayerStart - <<<1, 32, 0, stream>>>(clusters_d->clusModuleStart(), cpeParams, hits_d.view().hitsLayerStart().data()); - cudaCheck(cudaGetLastError()); - constexpr auto nLayers = TrackerTraits::numberOfLayers; - cms::cuda::fillManyFromVector(&(hits_d.view().phiBinner()), - nLayers, - hits_d.view().iphi(), - hits_d.view().hitsLayerStart().data(), - nHits, - 256, - hits_d.view().phiBinnerStorage(), - stream); - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); -#endif - } - } - -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); - std::cout << "PixelRecHitGPUKernel -> DONE!" << std::endl; -#endif - - return hits_d; - } - - template class PixelRecHitGPUKernel; - template class PixelRecHitGPUKernel; - template class PixelRecHitGPUKernel; -} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h deleted file mode 100644 index 407a18be04fa9..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h -#define RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h - -#include - -#include - -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -//#define GPU_DEBUG - -namespace pixelgpudetails { - - template - class PixelRecHitGPUKernel { - public: - PixelRecHitGPUKernel() = default; - ~PixelRecHitGPUKernel() = default; - - PixelRecHitGPUKernel(const PixelRecHitGPUKernel&) = delete; - PixelRecHitGPUKernel(PixelRecHitGPUKernel&&) = delete; - PixelRecHitGPUKernel& operator=(const PixelRecHitGPUKernel&) = delete; - PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete; - - using ParamsOnGPU = pixelCPEforGPU::ParamsOnGPUT; - - TrackingRecHitSoADevice makeHitsAsync(SiPixelDigisCUDA const& digis_d, - SiPixelClustersCUDA const& clusters_d, - BeamSpotCUDA const& bs_d, - ParamsOnGPU const* cpeParams, - cudaStream_t stream) const; - }; - -} // namespace pixelgpudetails - -#endif // RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc deleted file mode 100644 index 6a5364beed69a..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include - -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -#include "PixelRecHitGPUKernel.h" - -template -class SiPixelRecHitCUDAT : public edm::global::EDProducer<> { -public: - explicit SiPixelRecHitCUDAT(const edm::ParameterSet& iConfig); - ~SiPixelRecHitCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - const edm::ESGetToken cpeToken_; - const edm::EDGetTokenT> tBeamSpot; - const edm::EDGetTokenT> token_; - const edm::EDGetTokenT> tokenDigi_; - const edm::EDPutTokenT>> tokenHit_; - - const pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; -}; - -template -SiPixelRecHitCUDAT::SiPixelRecHitCUDAT(const edm::ParameterSet& iConfig) - : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), - tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), - token_(consumes>(iConfig.getParameter("src"))), - tokenDigi_(consumes>(iConfig.getParameter("src"))), - tokenHit_(produces>>()) {} - -template -void SiPixelRecHitCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA")); - desc.add("src", edm::InputTag("siPixelClustersPreSplittingCUDA")); - - std::string cpe = "PixelCPEFast"; - cpe += TrackerTraits::nameModifier; - desc.add("CPE", cpe); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelRecHitCUDAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& es) const { - PixelCPEFast const* fcpe = dynamic_cast*>(&es.getData(cpeToken_)); - if (not fcpe) { - throw cms::Exception("Configuration") << "SiPixelRecHitCUDA can only use a CPE of type PixelCPEFast"; - } - - edm::Handle> hclusters; - iEvent.getByToken(token_, hclusters); - - cms::cuda::ScopedContextProduce ctx{*hclusters}; - auto const& clusters = ctx.get(*hclusters); - - edm::Handle> hdigis; - iEvent.getByToken(tokenDigi_, hdigis); - auto const& digis = ctx.get(*hdigis); - - edm::Handle> hbs; - iEvent.getByToken(tBeamSpot, hbs); - auto const& bs = ctx.get(*hbs); - - ctx.emplace(iEvent, - tokenHit_, - gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream())); -} - -using SiPixelRecHitCUDAPhase1 = SiPixelRecHitCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitCUDAPhase1); - -using SiPixelRecHitCUDAPhase2 = SiPixelRecHitCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitCUDAPhase2); - -using SiPixelRecHitCUDAHIonPhase1 = SiPixelRecHitCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitCUDAHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index e63d44763b9e7..0c3f6dd2dbb74 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -83,10 +83,6 @@ #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -// Make heterogeneous framework happy -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" - using namespace std; namespace cms { @@ -111,21 +107,17 @@ namespace cms { void produce(edm::Event& e, const edm::EventSetup& c) override; //--- Execute the position estimator algorithm(s). - void run(edm::Event& e, - edm::Handle inputhandle, + void run(edm::Handle inputhandle, SiPixelRecHitCollectionNew& output, TrackerGeometry const& geom); private: - using HMSstorage = HostProduct; - // TO DO: maybe allow a map of pointers? PixelCPEBase const* cpe_ = nullptr; // What we got (for now, one ptr to base class) edm::InputTag const src_; std::string const cpeName_; edm::EDGetTokenT const tPixelCluster_; edm::EDPutTokenT const tPut_; - edm::EDPutTokenT const tHost_; edm::ESGetToken const tTrackerGeom_; edm::ESGetToken const tCPE_; bool m_newCont; // save also in emdNew::DetSetVector @@ -139,7 +131,6 @@ namespace cms { cpeName_(conf.getParameter("CPE")), tPixelCluster_(consumes(src_)), tPut_(produces()), - tHost_(produces()), tTrackerGeom_(esConsumes()), tCPE_(esConsumes(edm::ESInputTag("", cpeName_))) {} @@ -165,8 +156,7 @@ namespace cms { // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - - run(e, input, output, geom); + run(input, output, geom); output.shrink_to_fit(); e.emplace(tPut_, std::move(output)); @@ -177,8 +167,7 @@ namespace cms { //! and make a RecHit to store the result. //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::run(edm::Event& iEvent, - edm::Handle inputhandle, + void SiPixelRecHitConverter::run(edm::Handle inputhandle, SiPixelRecHitCollectionNew& output, TrackerGeometry const& geom) { if (!cpe_) { @@ -191,37 +180,7 @@ namespace cms { int numberOfDetUnits = 0; int numberOfClusters = 0; - const SiPixelClusterCollectionNew& input = *inputhandle; - - // allocate a buffer for the indices of the clusters - auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); - // hitsModuleStart is a non-owning pointer to the buffer - auto hitsModuleStart = hmsp.get(); - // fill cluster arrays - std::array clusInModule{}; - for (auto const& dsv : input) { - unsigned int detid = dsv.detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); - auto gind = genericDet->index(); - // FIXME to be changed to support Phase2 - if (gind >= int(gpuClustering::maxNumModules)) - continue; - auto const nclus = dsv.size(); - assert(nclus > 0); - clusInModule[gind] = nclus; - numberOfClusters += nclus; - } - hitsModuleStart[0] = 0; - assert(clusInModule.size() > gpuClustering::maxNumModules); - for (int i = 1, n = clusInModule.size(); i < n; ++i) - hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule[i - 1]; - assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); - - // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart - iEvent.emplace(tHost_, std::move(hmsp)); - - numberOfClusters = 0; + SiPixelClusterCollectionNew const& input = *inputhandle; for (auto const& dsv : input) { numberOfDetUnits++; unsigned int detid = dsv.detId(); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc deleted file mode 100644 index 8ef3f74da5751..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc +++ /dev/null @@ -1,203 +0,0 @@ -#include - -#include - -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "DataFormats/Common/interface/DetSetVectorNew.h" -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - -template -class SiPixelRecHitFromCUDAT : public edm::stream::EDProducer { -public: - explicit SiPixelRecHitFromCUDAT(const edm::ParameterSet& iConfig); - ~SiPixelRecHitFromCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - - using HMSstorage = HostProduct; - using HitsOnDevice = TrackingRecHitSoADevice; - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - const edm::ESGetToken geomToken_; - const edm::EDGetTokenT> hitsToken_; // CUDA hits - const edm::EDGetTokenT clusterToken_; // legacy clusters - const edm::EDPutTokenT rechitsPutToken_; // legacy rechits - const edm::EDPutTokenT hostPutToken_; - - uint32_t nHits_; - cms::cuda::host::unique_ptr store32_; - cms::cuda::host::unique_ptr hitsModuleStart_; -}; - -template -SiPixelRecHitFromCUDAT::SiPixelRecHitFromCUDAT(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), - hitsToken_(consumes>(iConfig.getParameter("pixelRecHitSrc"))), - clusterToken_(consumes(iConfig.getParameter("src"))), - rechitsPutToken_(produces()), - hostPutToken_(produces()) {} - -template -void SiPixelRecHitFromCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); - desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelRecHitFromCUDAT::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(hitsToken_); - - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - - auto const& inputData = ctx.get(inputDataWrapped); - - nHits_ = inputData.nHits(); - LogDebug("SiPixelRecHitFromCUDA") << "converting " << nHits_ << " Hits"; - - if (0 == nHits_) - return; - store32_ = inputData.localCoordToHostAsync(ctx.stream()); - - hitsModuleStart_ = inputData.hitsModuleStartToHostAsync(ctx.stream()); -} - -template -void SiPixelRecHitFromCUDAT::produce(edm::Event& iEvent, edm::EventSetup const& es) { - // allocate a buffer for the indices of the clusters - constexpr auto nMaxModules = TrackerTraits::numberOfModules; - auto hmsp = std::make_unique(nMaxModules + 1); - - SiPixelRecHitCollection output; - output.reserve(nMaxModules, nHits_); - - if (0 == nHits_) { - iEvent.emplace(rechitsPutToken_, std::move(output)); - iEvent.emplace(hostPutToken_, std::move(hmsp)); - return; - } - output.reserve(nMaxModules, nHits_); - - std::copy(hitsModuleStart_.get(), hitsModuleStart_.get() + nMaxModules + 1, hmsp.get()); - // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart - iEvent.emplace(hostPutToken_, std::move(hmsp)); - - auto xl = store32_.get(); - auto yl = xl + nHits_; - auto xe = yl + nHits_; - auto ye = xe + nHits_; - - const TrackerGeometry* geom = &es.getData(geomToken_); - - edm::Handle hclusters = iEvent.getHandle(clusterToken_); - auto const& input = *hclusters; - - constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; - - int numberOfDetUnits = 0; - int numberOfClusters = 0; - for (auto const& dsv : input) { - numberOfDetUnits++; - unsigned int detid = dsv.detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); - auto gind = genericDet->index(); - const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); - assert(pixDet); - SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); - auto fc = hitsModuleStart_[gind]; - auto lc = hitsModuleStart_[gind + 1]; - auto nhits = lc - fc; - - assert(lc > fc); - LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() - << " legacy clusters" << ' ' << fc << ',' << lc << "\n"; - if (nhits > maxHitsInModule) - edm::LogWarning("SiPixelRecHitFromCUDA") << fmt::sprintf( - "Too many clusters %d in module %d. Only the first %d hits will be converted", nhits, gind, maxHitsInModule); - nhits = std::min(nhits, maxHitsInModule); - - LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() - << " legacy clusters" << ' ' << lc << ',' << fc; - - if (0 == nhits) - continue; - auto jnd = [&](int k) { return fc + k; }; - assert(nhits <= dsv.size()); - if (nhits != dsv.size()) { - edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); - } - for (auto const& clust : dsv) { - assert(clust.originalId() >= 0); - assert(clust.originalId() < dsv.size()); - if (clust.originalId() >= nhits) - continue; - auto ij = jnd(clust.originalId()); - LocalPoint lp(xl[ij], yl[ij]); - LocalError le(xe[ij], 0, ye[ij]); - SiPixelRecHitQuality::QualWordType rqw = 0; - - numberOfClusters++; - - /* cpu version.... (for reference) - std::tuple tuple = cpe_->getParameters( clust, *genericDet ); - LocalPoint lp( std::get<0>(tuple) ); - LocalError le( std::get<1>(tuple) ); - SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); - */ - - // Create a persistent edm::Ref to the cluster - edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); - // Make a RecHit and add it to the DetSet - recHitsOnDetUnit.emplace_back(lp, le, rqw, *genericDet, cluster); - // ============================= - - LogDebug("SiPixelRecHitFromCUDA") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; - - } // <-- End loop on Clusters - - // LogDebug("SiPixelRecHitGPU") - LogDebug("SiPixelRecHitFromCUDA") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; - - } // <-- End loop on DetUnits - - LogDebug("SiPixelRecHitFromCUDA") << "found " << numberOfDetUnits << " dets, " << numberOfClusters << " clusters"; - - iEvent.emplace(rechitsPutToken_, std::move(output)); -} - -using SiPixelRecHitFromCUDAPhase1 = SiPixelRecHitFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitFromCUDAPhase1); - -using SiPixelRecHitFromCUDAPhase2 = SiPixelRecHitFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitFromCUDAPhase2); - -using SiPixelRecHitFromCUDAHIonPhase1 = SiPixelRecHitFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitFromCUDAHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc deleted file mode 100644 index c9ba2728243a6..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc +++ /dev/null @@ -1,103 +0,0 @@ -#include - -#include - -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "DataFormats/Common/interface/DetSetVectorNew.h" -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - -template -class SiPixelRecHitSoAFromCUDAT : public edm::stream::EDProducer { -public: - explicit SiPixelRecHitSoAFromCUDAT(const edm::ParameterSet& iConfig); - ~SiPixelRecHitSoAFromCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - using HMSstorage = HostProduct; - using HitsOnHost = TrackingRecHitSoAHost; - using HitsOnDevice = TrackingRecHitSoADevice; - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - const edm::EDGetTokenT> hitsTokenGPU_; // CUDA hits - const edm::EDPutTokenT hitsPutTokenCPU_; - const edm::EDPutTokenT hostPutToken_; - - uint32_t nHits_; - HitsOnHost hits_h_; -}; - -template -SiPixelRecHitSoAFromCUDAT::SiPixelRecHitSoAFromCUDAT(const edm::ParameterSet& iConfig) - : hitsTokenGPU_(consumes(iConfig.getParameter("pixelRecHitSrc"))), - hitsPutTokenCPU_(produces()), - hostPutToken_(produces()) {} - -template -void SiPixelRecHitSoAFromCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelRecHitSoAFromCUDAT::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(hitsTokenGPU_); - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - auto const& inputData = ctx.get(inputDataWrapped); - - nHits_ = inputData.nHits(); - hits_h_ = HitsOnHost(nHits_, ctx.stream()); - cudaCheck(cudaMemcpyAsync(hits_h_.buffer().get(), - inputData.const_buffer().get(), - inputData.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); // Copy data from Device to Host - LogDebug("SiPixelRecHitSoAFromCUDA") << "copying to cpu SoA" << inputData.nHits() << " Hits"; -} - -template -void SiPixelRecHitSoAFromCUDAT::produce(edm::Event& iEvent, edm::EventSetup const& es) { - auto hmsp = std::make_unique(TrackerTraits::numberOfModules + 1); - - if (nHits_ > 0) - std::copy(hits_h_.view().hitsModuleStart().begin(), hits_h_.view().hitsModuleStart().end(), hmsp.get()); - - iEvent.emplace(hostPutToken_, std::move(hmsp)); - iEvent.emplace(hitsPutTokenCPU_, std::move(hits_h_)); -} - -using SiPixelRecHitSoAFromCUDAPhase1 = SiPixelRecHitSoAFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromCUDAPhase1); - -using SiPixelRecHitSoAFromCUDAPhase2 = SiPixelRecHitSoAFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromCUDAPhase2); - -using SiPixelRecHitSoAFromCUDAHIonPhase1 = SiPixelRecHitSoAFromCUDAT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromCUDAHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc deleted file mode 100644 index 21da864c1c348..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ /dev/null @@ -1,296 +0,0 @@ -#include - -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/Common/interface/DetSetVectorNew.h" -#include "DataFormats/Common/interface/Handle.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" - -#include "gpuPixelRecHits.h" - -template -class SiPixelRecHitSoAFromLegacyT : public edm::global::EDProducer<> { -public: - explicit SiPixelRecHitSoAFromLegacyT(const edm::ParameterSet& iConfig); - ~SiPixelRecHitSoAFromLegacyT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - - using HitModuleStart = std::array; - using HMSstorage = HostProduct; - using HitsOnHost = TrackingRecHitSoAHost; - -private: - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - const edm::ESGetToken geomToken_; - const edm::ESGetToken cpeToken_; - const edm::EDGetTokenT bsGetToken_; - const edm::EDGetTokenT clusterToken_; // Legacy Clusters - const edm::EDPutTokenT tokenHit_; - const edm::EDPutTokenT tokenModuleStart_; - const bool convert2Legacy_; -}; - -template -SiPixelRecHitSoAFromLegacyT::SiPixelRecHitSoAFromLegacyT(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), - cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), - bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, - clusterToken_{consumes(iConfig.getParameter("src"))}, - tokenHit_{produces()}, - tokenModuleStart_{produces()}, - convert2Legacy_(iConfig.getParameter("convertToLegacy")) { - if (convert2Legacy_) - produces(); -} - -template -void SiPixelRecHitSoAFromLegacyT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); - std::string cpeName = "PixelCPEFast"; - cpeName += TrackerTraits::nameModifier; - desc.add("CPE", cpeName); - desc.add("convertToLegacy", false); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SiPixelRecHitSoAFromLegacyT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& es) const { - const TrackerGeometry* geom_ = &es.getData(geomToken_); - PixelCPEFast const* fcpe = dynamic_cast*>(&es.getData(cpeToken_)); - if (not fcpe) { - throw cms::Exception("Configuration") << "SiPixelRecHitSoAFromLegacy can only use a CPE of type PixelCPEFast"; - } - auto const& cpeView = fcpe->getCPUProduct(); - - const reco::BeamSpot& bs = iEvent.get(bsGetToken_); - - BeamSpotPOD bsHost; - bsHost.x = bs.x0(); - bsHost.y = bs.y0(); - bsHost.z = bs.z0(); - - edm::Handle hclusters; - iEvent.getByToken(clusterToken_, hclusters); - auto const& input = *hclusters; - - constexpr int nModules = TrackerTraits::numberOfModules; - constexpr int startBPIX2 = pixelTopology::layerStart(1); - - // allocate a buffer for the indices of the clusters - auto hmsp = std::make_unique(nModules + 1); - auto hitsModuleStart = hmsp.get(); - // wrap the buffer in a HostProduct - auto hms = std::make_unique(std::move(hmsp)); - // move the HostProduct to the Event, without reallocating the buffer or affecting hitsModuleStart - iEvent.put(tokenModuleStart_, std::move(hms)); - - // legacy output - auto legacyOutput = std::make_unique(); - - std::vector, SiPixelCluster>> clusterRef; - - constexpr uint32_t maxHitsInModule = TrackerTraits::maxNumClustersPerModules; - - cms::cuda::PortableHostCollection> clusters_h(nModules + 1); - - memset(clusters_h.view().clusInModule(), 0, (nModules + 1) * sizeof(uint32_t)); // needed?? - memset(clusters_h.view().moduleStart(), 0, (nModules + 1) * sizeof(uint32_t)); - memset(clusters_h.view().moduleId(), 0, (nModules + 1) * sizeof(uint32_t)); - memset(clusters_h.view().clusModuleStart(), 0, (nModules + 1) * sizeof(uint32_t)); - - assert(0 == clusters_h.view()[nModules].clusInModule()); - clusters_h.view()[1].moduleStart() = 0; - - // fill cluster arrays - int numberOfClusters = 0; - for (auto const& dsv : input) { - unsigned int detid = dsv.detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); - auto gind = genericDet->index(); - assert(gind < nModules); - auto const nclus = dsv.size(); - clusters_h.view()[gind].clusInModule() = nclus; - numberOfClusters += nclus; - } - clusters_h.view()[0].clusModuleStart() = 0; - - for (int i = 1; i < nModules + 1; ++i) { - clusters_h.view()[i].clusModuleStart() = - clusters_h.view()[i - 1].clusModuleStart() + clusters_h.view()[i - 1].clusInModule(); - } - - assert((uint32_t)numberOfClusters == clusters_h.view()[nModules].clusModuleStart()); - // output SoA - // element 96 is the start of BPIX2 (i.e. the number of clusters in BPIX1) - HitsOnHost output( - numberOfClusters, clusters_h.view()[startBPIX2].clusModuleStart(), &cpeView, clusters_h.view().clusModuleStart()); - - if (0 == numberOfClusters) { - iEvent.emplace(tokenHit_, std::move(output)); - if (convert2Legacy_) - iEvent.put(std::move(legacyOutput)); - return; - } - - if (convert2Legacy_) - legacyOutput->reserve(nModules, numberOfClusters); - - int numberOfDetUnits = 0; - int numberOfHits = 0; - for (auto const& dsv : input) { - numberOfDetUnits++; - unsigned int detid = dsv.detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); - auto const gind = genericDet->index(); - assert(gind < nModules); - const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); - assert(pixDet); - auto const nclus = dsv.size(); - - assert(clusters_h.view()[gind].clusInModule() == nclus); - if (0 == nclus) - continue; // is this really possible? - - auto const fc = clusters_h.view()[gind].clusModuleStart(); - auto const lc = clusters_h.view()[gind + 1].clusModuleStart(); - assert(lc > fc); - LogDebug("SiPixelRecHitSoAFromLegacy") << "in det " << gind << ": conv " << nclus << " hits from " << dsv.size() - << " legacy clusters" << ' ' << fc << ',' << lc; - assert((lc - fc) == nclus); - if (nclus > maxHitsInModule) - printf( - "WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, maxHitsInModule); - - // count digis - uint32_t ndigi = 0; - for (auto const& clust : dsv) { - assert(clust.size() > 0); - ndigi += clust.size(); - } - - cms::cuda::PortableHostCollection digis_h(ndigi); - - clusterRef.clear(); - clusters_h.view()[0].moduleId() = gind; - - uint32_t ic = 0; - ndigi = 0; - //filling digis - for (auto const& clust : dsv) { - assert(clust.size() > 0); - for (int i = 0, nd = clust.size(); i < nd; ++i) { - auto px = clust.pixel(i); - digis_h.view()[ndigi].xx() = px.x; - digis_h.view()[ndigi].yy() = px.y; - digis_h.view()[ndigi].adc() = px.adc; - digis_h.view()[ndigi].moduleId() = gind; - digis_h.view()[ndigi].clus() = ic; - ++ndigi; - } - - if (convert2Legacy_) - clusterRef.emplace_back(edmNew::makeRefTo(hclusters, &clust)); - ic++; - } - assert(nclus == ic); - - numberOfHits += nclus; - // filled creates view - assert(digis_h.view()[0].adc() != 0); - // we run on blockId.x==0 - - gpuPixelRecHits::getHits(&cpeView, &bsHost, digis_h.view(), ndigi, clusters_h.view(), output.view()); - for (auto h = fc; h < lc; ++h) - if (h - fc < maxHitsInModule) - assert(gind == output.view()[h].detectorIndex()); - else - assert(gpuClustering::invalidModuleId == output.view()[h].detectorIndex()); - - if (convert2Legacy_) { - SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(*legacyOutput, detid); - for (auto h = fc; h < lc; ++h) { - auto ih = h - fc; - - if (ih >= maxHitsInModule) - break; - - assert(ih < clusterRef.size()); - LocalPoint lp(output.view()[h].xLocal(), output.view()[h].yLocal()); - LocalError le(output.view()[h].xerrLocal(), 0, output.view()[h].yerrLocal()); - - SiPixelRecHitQuality::QualWordType rqw = 0; - SiPixelRecHit hit(lp, le, rqw, *genericDet, clusterRef[ih]); - recHitsOnDetUnit.push_back(hit); - } - } - } - - assert(numberOfHits == numberOfClusters); - - // fill data structure to support CA - constexpr auto nLayers = TrackerTraits::numberOfLayers; - for (auto i = 0U; i < nLayers + 1; ++i) { - output.view().hitsLayerStart()[i] = clusters_h.view()[cpeView.layerGeometry().layerStart[i]].clusModuleStart(); - LogDebug("SiPixelRecHitSoAFromLegacy") - << "Layer n." << i << " - starting at module: " << cpeView.layerGeometry().layerStart[i] - << " - starts ad cluster: " << output->hitsLayerStart()[i] << "\n"; - } - - cms::cuda::fillManyFromVector(&(output.view().phiBinner()), - nLayers, - output.view().iphi(), - output.view().hitsLayerStart().data(), - output.view().nHits(), - 256, - output.view().phiBinnerStorage()); - - LogDebug("SiPixelRecHitSoAFromLegacy") << "created HitSoa for " << numberOfClusters << " clusters in " - << numberOfDetUnits << " Dets" - << "\n"; - - // copy pointer to data (SoA view) to allocated buffer - memcpy(hitsModuleStart, clusters_h.view().clusModuleStart(), nModules * sizeof(uint32_t)); - - iEvent.emplace(tokenHit_, std::move(output)); - if (convert2Legacy_) - iEvent.put(std::move(legacyOutput)); -} - -using SiPixelRecHitSoAFromLegacyPhase1 = SiPixelRecHitSoAFromLegacyT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacyPhase1); - -using SiPixelRecHitSoAFromLegacyPhase2 = SiPixelRecHitSoAFromLegacyT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacyPhase2); - -using SiPixelRecHitSoAFromLegacyHIonPhase1 = SiPixelRecHitSoAFromLegacyT; -DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacyHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h deleted file mode 100644 index 55c556bd63048..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ /dev/null @@ -1,210 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h -#define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h - -#include -#include -#include - -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "DataFormats/Math/interface/approx_atan2.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - -//#define GPU_DEBUG - -namespace gpuPixelRecHits { - - template - __global__ void getHits(pixelCPEforGPU::ParamsOnGPUT const* __restrict__ cpeParams, - BeamSpotPOD const* __restrict__ bs, - SiPixelDigisSoA::ConstView digis, - int numElements, - SiPixelClustersCUDASOAConstView clusters, - TrackingRecHitSoAView hits) { - // FIXME - // the compiler seems NOT to optimize loads from views (even in a simple test case) - // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature - // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) - - assert(cpeParams); - - // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) - if (0 == blockIdx.x) { - auto& agc = hits.averageGeometry(); - auto const& ag = cpeParams->averageGeometry(); - auto nLadders = TrackerTraits::numberOfLaddersInBarrel; - - for (int il = threadIdx.x, nl = nLadders; il < nl; il += blockDim.x) { - agc.ladderZ[il] = ag.ladderZ[il] - bs->z; - agc.ladderX[il] = ag.ladderX[il] - bs->x; - agc.ladderY[il] = ag.ladderY[il] - bs->y; - agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); - agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; - agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; - } - - if (0 == threadIdx.x) { - agc.endCapZ[0] = ag.endCapZ[0] - bs->z; - agc.endCapZ[1] = ag.endCapZ[1] - bs->z; - } - } - - // to be moved in common namespace... - using gpuClustering::invalidModuleId; - constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter; - - using ClusParams = pixelCPEforGPU::ClusParams; - - // as usual one block per module - __shared__ ClusParams clusParams; - - auto me = clusters[blockIdx.x].moduleId(); - int nclus = clusters[me].clusInModule(); - - if (0 == nclus) - return; -#ifdef GPU_DEBUG - if (threadIdx.x == 0) { - auto k = clusters[1 + blockIdx.x].moduleStart(); - while (digis[k].moduleId() == invalidModuleId) - ++k; - assert(digis[k].moduleId() == me); - } - - if (me % 100 == 1) - if (threadIdx.x == 0) - printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart()); -#endif - - for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { - int nClusInIter = std::min(MaxHitsInIter, endClus - startClus); - int lastClus = startClus + nClusInIter; - assert(nClusInIter <= nclus); - assert(nClusInIter > 0); - assert(lastClus <= nclus); - - assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); - - // init - for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { - clusParams.minRow[ic] = std::numeric_limits::max(); - clusParams.maxRow[ic] = 0; - clusParams.minCol[ic] = std::numeric_limits::max(); - clusParams.maxCol[ic] = 0; - clusParams.charge[ic] = 0; - clusParams.q_f_X[ic] = 0; - clusParams.q_l_X[ic] = 0; - clusParams.q_f_Y[ic] = 0; - clusParams.q_l_Y[ic] = 0; - } - - __syncthreads(); - - // one thread per "digi" - auto first = clusters[1 + blockIdx.x].moduleStart() + threadIdx.x; - for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis[i].moduleId(); - if (id == invalidModuleId) - continue; // not valid - if (id != me) - break; // end of module - auto cl = digis[i].clus(); - if (cl < startClus || cl >= lastClus) - continue; - cl -= startClus; - assert(cl >= 0); - assert(cl < MaxHitsInIter); - auto x = digis[i].xx(); - auto y = digis[i].yy(); - atomicMin(&clusParams.minRow[cl], x); - atomicMax(&clusParams.maxRow[cl], x); - atomicMin(&clusParams.minCol[cl], y); - atomicMax(&clusParams.maxCol[cl], y); - } - - __syncthreads(); - - auto pixmx = cpeParams->detParams(me).pixmx; - for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis[i].moduleId(); - if (id == invalidModuleId) - continue; // not valid - if (id != me) - break; // end of module - auto cl = digis[i].clus(); - if (cl < startClus || cl >= lastClus) - continue; - cl -= startClus; - assert(cl >= 0); - assert(cl < MaxHitsInIter); - auto x = digis[i].xx(); - auto y = digis[i].yy(); - auto ch = digis[i].adc(); - atomicAdd(&clusParams.charge[cl], ch); - ch = std::min(ch, pixmx); - if (clusParams.minRow[cl] == x) - atomicAdd(&clusParams.q_f_X[cl], ch); - if (clusParams.maxRow[cl] == x) - atomicAdd(&clusParams.q_l_X[cl], ch); - if (clusParams.minCol[cl] == y) - atomicAdd(&clusParams.q_f_Y[cl], ch); - if (clusParams.maxCol[cl] == y) - atomicAdd(&clusParams.q_l_Y[cl], ch); - } - - __syncthreads(); - - // next one cluster per thread... - - first = clusters[me].clusModuleStart() + startClus; - for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { - auto h = first + ic; // output index in global memory - - assert(h < hits.nHits()); - assert(h < clusters[me + 1].clusModuleStart()); - - pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - - pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - - // store it - hits[h].chargeAndStatus().charge = clusParams.charge[ic]; - hits[h].chargeAndStatus().status = clusParams.status[ic]; - hits[h].detectorIndex() = me; - - float xl, yl; - hits[h].xLocal() = xl = clusParams.xpos[ic]; - hits[h].yLocal() = yl = clusParams.ypos[ic]; - - hits[h].clusterSizeX() = clusParams.xsize[ic]; - hits[h].clusterSizeY() = clusParams.ysize[ic]; - - hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX; - hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY; - - // keep it local for computations - float xg, yg, zg; - // to global and compute phi... - cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); - // here correct for the beamspot... - xg -= bs->x; - yg -= bs->y; - zg -= bs->z; - - hits[h].xGlobal() = xg; - hits[h].yGlobal() = yg; - hits[h].zGlobal() = zg; - - hits[h].rGlobal() = std::sqrt(xg * xg + yg * yg); - hits[h].iphi() = unsafe_atan2s<7>(yg, xg); - } - __syncthreads(); - } // end loop on batches - } - -} // namespace gpuPixelRecHits - -#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc deleted file mode 100644 index 8ab0c265a866f..0000000000000 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ /dev/null @@ -1,531 +0,0 @@ -#include - -#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" -#include "DataFormats/DetId/interface/DetId.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "MagneticField/Engine/interface/MagneticField.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" - -// Services -// this is needed to get errors from templates - -namespace { - constexpr float micronsToCm = 1.0e-4; -} - -//----------------------------------------------------------------------------- -//! The constructor. -//----------------------------------------------------------------------------- -template -PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, - const MagneticField* mag, - const TrackerGeometry& geom, - const TrackerTopology& ttopo, - const SiPixelLorentzAngle* lorentzAngle, - const SiPixelGenErrorDBObject* genErrorDBObject, - const SiPixelLorentzAngle* lorentzAngleWidth) - : PixelCPEGenericBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, lorentzAngleWidth) { - // Use errors from templates or from GenError - if (useErrorsFromTemplates_) { - if (!SiPixelGenError::pushfile(*genErrorDBObject_, thePixelGenError_)) - throw cms::Exception("InvalidCalibrationLoaded") - << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " - << (*genErrorDBObject_).version(); - } - - fillParamsForGpu(); - - cpuData_ = { - &commonParamsGPU_, - detParamsGPU_.data(), - &layerGeometry_, - &averageGeometry_, - }; -} - -template -const pixelCPEforGPU::ParamsOnGPUT* PixelCPEFast::getGPUProductAsync( - cudaStream_t cudaStream) const { - using ParamsOnGPU = pixelCPEforGPU::ParamsOnGPUT; - using LayerGeometry = pixelCPEforGPU::LayerGeometryT; - using AverageGeometry = pixelTopology::AverageGeometryT; - - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { - // and now copy to device... - - cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); - cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_detParams, - this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams))); - cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_averageGeometry, sizeof(AverageGeometry))); - cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_layerGeometry, sizeof(LayerGeometry))); - cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_d, sizeof(ParamsOnGPU))); - cudaCheck(cudaMemcpyAsync(data.paramsOnGPU_d, &data.paramsOnGPU_h, sizeof(ParamsOnGPU), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_commonParams, - &this->commonParamsGPU_, - sizeof(pixelCPEforGPU::CommonParams), - cudaMemcpyDefault, - stream)); - cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_averageGeometry, - &this->averageGeometry_, - sizeof(AverageGeometry), - cudaMemcpyDefault, - stream)); - cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_layerGeometry, - &this->layerGeometry_, - sizeof(LayerGeometry), - cudaMemcpyDefault, - stream)); - cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_detParams, - this->detParamsGPU_.data(), - this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams), - cudaMemcpyDefault, - stream)); - }); - return data.paramsOnGPU_d; -} - -template -void PixelCPEFast::fillParamsForGpu() { - // - // this code executes only once per job, computation inefficiency is not an issue - // many code blocks are repeated: better keep the computation local and self oconsistent as blocks may in future move around, be deleted ... - // It is valid only for Phase1 and the version of GenError in DB used in late 2018 and in 2021 - - commonParamsGPU_.theThicknessB = m_DetParams.front().theThickness; - commonParamsGPU_.theThicknessE = m_DetParams.back().theThickness; - commonParamsGPU_.thePitchX = m_DetParams[0].thePitchX; - commonParamsGPU_.thePitchY = m_DetParams[0].thePitchY; - - commonParamsGPU_.numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; - - LogDebug("PixelCPEFast") << "pitch & thickness " << commonParamsGPU_.thePitchX << ' ' << commonParamsGPU_.thePitchY - << " " << commonParamsGPU_.theThicknessB << ' ' << commonParamsGPU_.theThicknessE; - - // zero average geometry - memset(&averageGeometry_, 0, sizeof(pixelTopology::AverageGeometryT)); - - uint32_t oldLayer = 0; - uint32_t oldLadder = 0; - float rl = 0; - float zl = 0; - float miz = 500, mxz = 0; - float pl = 0; - int nl = 0; - detParamsGPU_.resize(m_DetParams.size()); - - for (auto i = 0U; i < m_DetParams.size(); ++i) { - auto& p = m_DetParams[i]; - auto& g = detParamsGPU_[i]; - - g.nRowsRoc = p.theDet->specificTopology().rowsperroc(); - g.nColsRoc = p.theDet->specificTopology().colsperroc(); - g.nRows = p.theDet->specificTopology().rocsX() * g.nRowsRoc; - g.nCols = p.theDet->specificTopology().rocsY() * g.nColsRoc; - - g.numPixsInModule = g.nRows * g.nCols; - - assert(p.theDet->index() == int(i)); - assert(commonParamsGPU_.thePitchY == p.thePitchY); - assert(commonParamsGPU_.thePitchX == p.thePitchX); - - g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); - g.isPosZ = p.theDet->surface().position().z() > 0; - g.layer = ttopo_.layer(p.theDet->geographicalId()); - g.index = i; // better be! - g.rawId = p.theDet->geographicalId(); - auto thickness = g.isBarrel ? commonParamsGPU_.theThicknessB : commonParamsGPU_.theThicknessE; - assert(thickness == p.theThickness); - - auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); - if (oldLayer != g.layer) { - oldLayer = g.layer; - LogDebug("PixelCPEFast") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) - << g.layer << " starting at " << g.rawId << '\n' - << "old layer had " << nl << " ladders"; - nl = 0; - } - if (oldLadder != ladder) { - oldLadder = ladder; - LogDebug("PixelCPEFast") << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) - << ladder << " starting at " << g.rawId << '\n' - << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f << ' ' - << miz << ' ' << mxz; - rl = 0; - zl = 0; - pl = 0; - miz = 500; - mxz = 0; - nl++; - } - - g.shiftX = 0.5f * p.lorentzShiftInCmX; - g.shiftY = 0.5f * p.lorentzShiftInCmY; - g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; - g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; - - g.x0 = p.theOrigin.x(); - g.y0 = p.theOrigin.y(); - g.z0 = p.theOrigin.z(); - - auto vv = p.theDet->surface().position(); - auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); - g.frame = pixelCPEforGPU::Frame(vv.x(), vv.y(), vv.z(), rr); - - zl += vv.z(); - miz = std::min(miz, std::abs(vv.z())); - mxz = std::max(mxz, std::abs(vv.z())); - rl += vv.perp(); - pl += vv.phi(); // (not obvious) - - // errors ..... - ClusterParamGeneric cp; - - cp.with_track_angle = false; - - auto lape = p.theDet->localAlignmentError(); - if (lape.invalid()) - lape = LocalError(); // zero.... - - g.apeXX = lape.xx(); - g.apeYY = lape.yy(); - - auto toMicron = [&](float x) { return std::min(511, int(x * 1.e4f + 0.5f)); }; - - // average angle - auto gvx = p.theOrigin.x() + 40.f * commonParamsGPU_.thePitchX; - auto gvy = p.theOrigin.y(); - auto gvz = 1.f / p.theOrigin.z(); - //--- Note that the normalization is not required as only the ratio used - - { - // calculate angles (fed into errorFromTemplates) - cp.cotalpha = gvx * gvz; - cp.cotbeta = gvy * gvz; - - errorFromTemplates(p, cp, 20000.); - } - -#ifdef EDM_ML_DEBUG - auto m = 10000.f; - for (float qclus = 15000; qclus < 35000; qclus += 15000) { - errorFromTemplates(p, cp, qclus); - LogDebug("PixelCPEFast") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 - << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2; - } - LogDebug("PixelCPEFast") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); -#endif // EDM_ML_DEBUG - - g.pixmx = std::max(0, cp.pixmx); - g.sx2 = toMicron(cp.sx2); - g.sy1 = std::max(21, toMicron(cp.sy1)); // for some angles sy1 is very small - g.sy2 = std::max(55, toMicron(cp.sy2)); // sometimes sy2 is smaller than others (due to angle?) - - //sample xerr as function of position - // moduleOffsetX is the definition of TrackerTraits::xOffset, - // needs to be calculated because for Phase2 the modules are not uniform - float moduleOffsetX = -(0.5f * float(g.nRows) + TrackerTraits::bigPixXCorrection); - auto const xoff = moduleOffsetX * commonParamsGPU_.thePitchX; - - for (int ix = 0; ix < CPEFastParametrisation::kNumErrorBins; ++ix) { - auto x = xoff * (1.f - (0.5f + float(ix)) / 8.f); - auto gvx = p.theOrigin.x() - x; - auto gvy = p.theOrigin.y(); - auto gvz = 1.f / p.theOrigin.z(); - cp.cotbeta = gvy * gvz; - cp.cotalpha = gvx * gvz; - errorFromTemplates(p, cp, 20000.f); - g.sigmax[ix] = toMicron(cp.sigmax); - g.sigmax1[ix] = toMicron(cp.sx1); - LogDebug("PixelCPEFast") << "sigmax vs x " << i << ' ' << x << ' ' << cp.cotalpha << ' ' << int(g.sigmax[ix]) - << ' ' << int(g.sigmax1[ix]) << ' ' << 10000.f * cp.sigmay << std::endl; - } -#ifdef EDM_ML_DEBUG - // sample yerr as function of position - // moduleOffsetY is the definition of TrackerTraits::yOffset (removed) - float moduleOffsetY = 0.5f * float(g.nCols) + TrackerTraits::bigPixYCorrection; - auto const yoff = -moduleOffsetY * commonParamsGPU_.thePitchY; - - for (int ix = 0; ix < CPEFastParametrisation::kNumErrorBins; ++ix) { - auto y = yoff * (1.f - (0.5f + float(ix)) / 8.f); - auto gvx = p.theOrigin.x() + 40.f * commonParamsGPU_.thePitchY; - auto gvy = p.theOrigin.y() - y; - auto gvz = 1.f / p.theOrigin.z(); - cp.cotbeta = gvy * gvz; - cp.cotalpha = gvx * gvz; - errorFromTemplates(p, cp, 20000.f); - LogDebug("PixelCPEFast") << "sigmay vs y " << i << ' ' << y << ' ' << cp.cotbeta << ' ' << 10000.f * cp.sigmay - << std::endl; - } -#endif // EDM_ML_DEBUG - - // calculate angles (repeated) - cp.cotalpha = gvx * gvz; - cp.cotbeta = gvy * gvz; - auto aveCB = cp.cotbeta; - - // sample x by charge - int qbin = CPEFastParametrisation::kGenErrorQBins; // low charge - int k = 0; - for (int qclus = 1000; qclus < 200000; qclus += 1000) { - errorFromTemplates(p, cp, qclus); - if (cp.qBin_ == qbin) - continue; - qbin = cp.qBin_; - g.xfact[k] = cp.sigmax; - g.yfact[k] = cp.sigmay; - g.minCh[k++] = qclus; -#ifdef EDM_ML_DEBUG - LogDebug("PixelCPEFast") << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << qclus << ' ' << cp.qBin_ << ' ' - << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' - << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; -#endif // EDM_ML_DEBUG - } - - assert(k <= CPEFastParametrisation::kGenErrorQBins); - - // fill the rest (sometimes bin 4 is missing) - for (int kk = k; kk < CPEFastParametrisation::kGenErrorQBins; ++kk) { - g.xfact[kk] = g.xfact[k - 1]; - g.yfact[kk] = g.yfact[k - 1]; - g.minCh[kk] = g.minCh[k - 1]; - } - auto detx = 1.f / g.xfact[0]; - auto dety = 1.f / g.yfact[0]; - for (int kk = 0; kk < CPEFastParametrisation::kGenErrorQBins; ++kk) { - g.xfact[kk] *= detx; - g.yfact[kk] *= dety; - } - // sample y in "angle" (estimated from cluster size) - float ys = 8.f - 4.f; // apperent bias of half pixel (see plot) - // plot: https://indico.cern.ch/event/934821/contributions/3974619/attachments/2091853/3515041/DigilessReco.pdf page 25 - // sample yerr as function of "size" - for (int iy = 0; iy < CPEFastParametrisation::kNumErrorBins; ++iy) { - ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3) - if (CPEFastParametrisation::kNumErrorBins - 1 == iy) - ys += 8.f; // last bin for "overflow" - // cp.cotalpha = ys*(commonParamsGPU_.thePitchX/(8.f*thickness)); // use this to print sampling in "x" (and comment the line below) - cp.cotbeta = std::copysign(ys * (commonParamsGPU_.thePitchY / (8.f * thickness)), aveCB); - errorFromTemplates(p, cp, 20000.f); - g.sigmay[iy] = toMicron(cp.sigmay); - LogDebug("PixelCPEFast") << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha << '/' - << cp.cotbeta << ' ' << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) << std::endl; - } - } // loop over det - - constexpr int numberOfModulesInLadder = TrackerTraits::numberOfModulesInLadder; - constexpr int numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; - constexpr int numberOfModulesInBarrel = TrackerTraits::numberOfModulesInBarrel; - - constexpr float ladderFactor = 1.f / float(numberOfModulesInLadder); - - constexpr int firstEndcapPos = TrackerTraits::firstEndcapPos; - constexpr int firstEndcapNeg = TrackerTraits::firstEndcapNeg; - - // compute ladder baricenter (only in global z) for the barrel - // - auto& aveGeom = averageGeometry_; - int il = 0; - for (int im = 0, nm = numberOfModulesInBarrel; im < nm; ++im) { - auto const& g = detParamsGPU_[im]; - il = im / numberOfModulesInLadder; - assert(il < int(numberOfLaddersInBarrel)); - auto z = g.frame.z(); - aveGeom.ladderZ[il] += ladderFactor * z; - aveGeom.ladderMinZ[il] = std::min(aveGeom.ladderMinZ[il], z); - aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); - aveGeom.ladderX[il] += ladderFactor * g.frame.x(); - aveGeom.ladderY[il] += ladderFactor * g.frame.y(); - aveGeom.ladderR[il] += ladderFactor * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); - } - assert(il + 1 == int(numberOfLaddersInBarrel)); - // add half_module and tollerance - constexpr float moduleLength = TrackerTraits::moduleLength; - constexpr float module_tolerance = 0.2f; - for (int il = 0, nl = numberOfLaddersInBarrel; il < nl; ++il) { - aveGeom.ladderMinZ[il] -= (0.5f * moduleLength - module_tolerance); - aveGeom.ladderMaxZ[il] += (0.5f * moduleLength - module_tolerance); - } - - // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) - for (auto im = TrackerTraits::layerStart[firstEndcapPos]; im < TrackerTraits::layerStart[firstEndcapPos + 1]; ++im) { - auto const& g = detParamsGPU_[im]; - aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); - } - for (auto im = TrackerTraits::layerStart[firstEndcapNeg]; im < TrackerTraits::layerStart[firstEndcapNeg + 1]; ++im) { - auto const& g = detParamsGPU_[im]; - aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); - } - // correct for outer ring being closer - aveGeom.endCapZ[0] -= TrackerTraits::endcapCorrection; - aveGeom.endCapZ[1] += TrackerTraits::endcapCorrection; -#ifdef EDM_ML_DEBUG - for (int jl = 0, nl = numberOfLaddersInBarrel; jl < nl; ++jl) { - LogDebug("PixelCPEFast") << jl << ':' << aveGeom.ladderR[jl] << '/' - << std::sqrt(aveGeom.ladderX[jl] * aveGeom.ladderX[jl] + - aveGeom.ladderY[jl] * aveGeom.ladderY[jl]) - << ',' << aveGeom.ladderZ[jl] << ',' << aveGeom.ladderMinZ[jl] << ',' - << aveGeom.ladderMaxZ[jl] << '\n'; - } - LogDebug("PixelCPEFast") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; -#endif // EDM_ML_DEBUG - - // fill Layer and ladders geometry - memset(&layerGeometry_, 0, sizeof(pixelCPEforGPU::LayerGeometryT)); - memcpy(layerGeometry_.layerStart, - TrackerTraits::layerStart, - sizeof(pixelCPEforGPU::LayerGeometryT::layerStart)); - memcpy(layerGeometry_.layer, pixelTopology::layer.data(), pixelTopology::layer.size()); - layerGeometry_.maxModuleStride = pixelTopology::maxModuleStride; -} - -template -PixelCPEFast::GPUData::~GPUData() { - if (paramsOnGPU_d != nullptr) { - cudaFree((void*)paramsOnGPU_h.m_commonParams); - cudaFree((void*)paramsOnGPU_h.m_detParams); - cudaFree((void*)paramsOnGPU_h.m_averageGeometry); - cudaFree((void*)paramsOnGPU_h.m_layerGeometry); - cudaFree(paramsOnGPU_d); - } -} - -template -void PixelCPEFast::errorFromTemplates(DetParam const& theDetParam, - ClusterParamGeneric& theClusterParam, - float qclus) const { - float locBz = theDetParam.bz; - float locBx = theDetParam.bx; - LogDebug("PixelCPEFast") << "PixelCPEFast::localPosition(...) : locBz = " << locBz; - - theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster - - theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster - theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster - theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel - theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster - theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster - theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster - - float dummy; - - SiPixelGenError gtempl(thePixelGenError_); - int gtemplID = theDetParam.detTemplateId; - - theClusterParam.qBin_ = gtempl.qbin(gtemplID, - theClusterParam.cotalpha, - theClusterParam.cotbeta, - locBz, - locBx, - qclus, - false, - theClusterParam.pixmx, - theClusterParam.sigmay, - dummy, - theClusterParam.sigmax, - dummy, - theClusterParam.sy1, - dummy, - theClusterParam.sy2, - dummy, - theClusterParam.sx1, - dummy, - theClusterParam.sx2, - dummy); - - theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; - theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; - theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; - - theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; - theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; - theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; -} - -//----------------------------------------------------------------------------- -//! Hit position in the local frame (in cm). Unlike other CPE's, this -//! one converts everything from the measurement frame (in channel numbers) -//! into the local frame (in centimeters). -//----------------------------------------------------------------------------- -template -LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, - ClusterParam& theClusterParamBase) const { - ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); - - if (useErrorsFromTemplates_) { - errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); - } else { - theClusterParam.qBin_ = 0; - } - - int q_f_X; //!< Q of the first pixel in X - int q_l_X; //!< Q of the last pixel in X - int q_f_Y; //!< Q of the first pixel in Y - int q_l_Y; //!< Q of the last pixel in Y - collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); - - // do GPU like ... - pixelCPEforGPU::ClusParams cp; - - cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); - cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); - cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); - cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); - - cp.q_f_X[0] = q_f_X; - cp.q_l_X[0] = q_l_X; - cp.q_f_Y[0] = q_f_Y; - cp.q_l_Y[0] = q_l_Y; - - cp.charge[0] = theClusterParam.theCluster->charge(); - - auto ind = theDetParam.theDet->index(); - pixelCPEforGPU::position(commonParamsGPU_, detParamsGPU_[ind], cp, 0); - auto xPos = cp.xpos[0]; - auto yPos = cp.ypos[0]; - - // set the error (mind ape....) - pixelCPEforGPU::errorFromDB(commonParamsGPU_, detParamsGPU_[ind], cp, 0); - theClusterParam.sigmax = cp.xerr[0]; - theClusterParam.sigmay = cp.yerr[0]; - - LogDebug("PixelCPEFast") << " in PixelCPEFast:localPosition - pos = " << xPos << " " << yPos << " size " - << cp.maxRow[0] - cp.minRow[0] << ' ' << cp.maxCol[0] - cp.minCol[0]; - - //--- Now put the two together - LocalPoint pos_in_local(xPos, yPos); - return pos_in_local; -} - -//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ - -//------------------------------------------------------------------------- -// Hit error in the local frame -//------------------------------------------------------------------------- -template -LocalError PixelCPEFast::localError(DetParam const& theDetParam, - ClusterParam& theClusterParamBase) const { - ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); - - auto xerr = theClusterParam.sigmax; - auto yerr = theClusterParam.sigmay; - - LogDebug("PixelCPEFast") << " errors " << xerr << " " << yerr; - - auto xerr_sq = xerr * xerr; - auto yerr_sq = yerr * yerr; - - return LocalError(xerr_sq, 0, yerr_sq); -} - -template -void PixelCPEFast::fillPSetDescription(edm::ParameterSetDescription& desc) { - // call PixelCPEGenericBase fillPSetDescription to add common rechit errors - PixelCPEGenericBase::fillPSetDescription(desc); -} - -template class PixelCPEFast; -template class PixelCPEFast; -template class PixelCPEFast; diff --git a/RecoTauTag/HLTProducers/BuildFile.xml b/RecoTauTag/HLTProducers/BuildFile.xml index 6f4aa24552400..d7f729479c7b9 100644 --- a/RecoTauTag/HLTProducers/BuildFile.xml +++ b/RecoTauTag/HLTProducers/BuildFile.xml @@ -13,9 +13,6 @@ - - - diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc deleted file mode 100644 index 7778fb7b9bd71..0000000000000 --- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc +++ /dev/null @@ -1,822 +0,0 @@ -/* - * \class L2TauTagProducer - * - * L2Tau identification using Convolutional NN. - * - * \author Valeria D'Amante, Università di Siena and INFN Pisa - * Konstantin Androsov, EPFL and ETHZ -*/ -#include -#include -#include -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "DataFormats/Math/interface/deltaR.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/isFinite.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" -#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h" -#include "Geometry/CaloGeometry/interface/CaloGeometry.h" -#include "Geometry/CaloTopology/interface/HcalTopology.h" -#include "Geometry/Records/interface/CaloGeometryRecord.h" -#include "DataFormats/CaloRecHit/interface/CaloRecHit.h" -#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" -#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" -#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h" -#include "DataFormats/HcalDetId/interface/HcalDetId.h" -#include "DataFormats/HcalRecHit/interface/HBHERecHit.h" -#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h" -#include "DataFormats/HcalRecHit/interface/HFRecHit.h" -#include "DataFormats/HcalRecHit/interface/HORecHit.h" -#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h" -#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" -#include "DataFormats/TrackReco/interface/HitPattern.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" - -namespace L2TauTagNNv1 { - constexpr int nCellEta = 5; - constexpr int nCellPhi = 5; - constexpr int nVars = 31; - constexpr float dR_max = 0.5; - enum class NNInputs { - nVertices = 0, - l1Tau_pt, - l1Tau_eta, - l1Tau_hwIso, - EcalEnergySum, - EcalSize, - EcalEnergyStdDev, - EcalDeltaEta, - EcalDeltaPhi, - EcalChi2, - EcalEnergySumForPositiveChi2, - EcalSizeForPositiveChi2, - HcalEnergySum, - HcalSize, - HcalEnergyStdDev, - HcalDeltaEta, - HcalDeltaPhi, - HcalChi2, - HcalEnergySumForPositiveChi2, - HcalSizeForPositiveChi2, - PatatrackPtSum, - PatatrackSize, - PatatrackSizeWithVertex, - PatatrackPtSumWithVertex, - PatatrackChargeSum, - PatatrackDeltaEta, - PatatrackDeltaPhi, - PatatrackChi2OverNdof, - PatatrackNdof, - PatatrackDxy, - PatatrackDz - }; - - const std::map varNameMap = { - {NNInputs::nVertices, "nVertices"}, - {NNInputs::l1Tau_pt, "l1Tau_pt"}, - {NNInputs::l1Tau_eta, "l1Tau_eta"}, - {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"}, - {NNInputs::EcalEnergySum, "EcalEnergySum"}, - {NNInputs::EcalSize, "EcalSize"}, - {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"}, - {NNInputs::EcalDeltaEta, "EcalDeltaEta"}, - {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"}, - {NNInputs::EcalChi2, "EcalChi2"}, - {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"}, - {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"}, - {NNInputs::HcalEnergySum, "HcalEnergySum"}, - {NNInputs::HcalSize, "HcalSize"}, - {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"}, - {NNInputs::HcalDeltaEta, "HcalDeltaEta"}, - {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"}, - {NNInputs::HcalChi2, "HcalChi2"}, - {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"}, - {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"}, - {NNInputs::PatatrackPtSum, "PatatrackPtSum"}, - {NNInputs::PatatrackSize, "PatatrackSize"}, - {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"}, - {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"}, - {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"}, - {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"}, - {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"}, - {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"}, - {NNInputs::PatatrackNdof, "PatatrackNdof"}, - {NNInputs::PatatrackDxy, "PatatrackDxy"}, - {NNInputs::PatatrackDz, "PatatrackDz"}}; -} // namespace L2TauTagNNv1 -namespace { - inline float& getCellImpl( - tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) { - return cellGridMatrix.tensor()(tau_idx, phi_idx, eta_idx, static_cast(NNInput_idx)); - } -} // namespace -struct normDictElement { - float mean; - float std; - float min; - float max; -}; - -struct L2TauNNProducerCacheData { - L2TauNNProducerCacheData() : graphDef(nullptr), session(nullptr) {} - tensorflow::GraphDef* graphDef; - tensorflow::Session* session; - std::vector normVec; -}; - -class L2TauNNProducer : public edm::stream::EDProducer> { -public: - using TrackSoAHost = pixelTrack::TrackSoAHostPhase1; - - struct caloRecHitCollections { - const HBHERecHitCollection* hbhe; - const HORecHitCollection* ho; - const EcalRecHitCollection* eb; - const EcalRecHitCollection* ee; - const CaloGeometry* geometry; - }; - - struct InputDescTau { - std::string CollectionName; - edm::EDGetTokenT inputToken_; - }; - - static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max; - static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellEta); - static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellPhi); - - explicit L2TauNNProducer(const edm::ParameterSet&, const L2TauNNProducerCacheData*); - static void fillDescriptions(edm::ConfigurationDescriptions&); - static std::unique_ptr initializeGlobalCache(const edm::ParameterSet&); - static void globalEndJob(L2TauNNProducerCacheData*); - -private: - void checknan(tensorflow::Tensor& tensor, int debugLevel); - void standardizeTensor(tensorflow::Tensor& tensor); - std::vector getTauScore(const tensorflow::Tensor& cellGridMatrix); - void produce(edm::Event& event, const edm::EventSetup& eventsetup) override; - void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus); - void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const caloRecHitCollections& caloRecHits); - void fillPatatracks(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const TrackSoAHost& patatracks_tsoa, - const ZVertexSoAHost& patavtx_soa, - const reco::BeamSpot& beamspot, - const MagneticField* magfi); - void selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa, - const TrackSoAHost& patatracks_tsoa, - std::vector& trkGood, - std::vector& vtxGood); - - std::pair impactParameter(int it, - const TrackSoAHost& patatracks_tsoa, - float patatrackPhi, - const reco::BeamSpot& beamspot, - const MagneticField* magfi); - template - std::tuple getEtaPhiIndices(const VPos& position, const LVec& tau_p4); - template - std::tuple getEtaPhiIndices(float eta, float phi, const LVec& tau_p4); - -private: - const int debugLevel_; - const edm::EDGetTokenT tauTriggerToken_; - std::vector L1TauDesc_; - const edm::EDGetTokenT hbheToken_; - const edm::EDGetTokenT hoToken_; - const edm::EDGetTokenT ebToken_; - const edm::EDGetTokenT eeToken_; - const edm::ESGetToken geometryToken_; - const edm::ESGetToken bFieldToken_; - const edm::EDGetTokenT pataVerticesToken_; - const edm::EDGetTokenT pataTracksToken_; - const edm::EDGetTokenT beamSpotToken_; - const unsigned int maxVtx_; - const float fractionSumPt2_; - const float minSumPt2_; - const float trackPtMin_; - const float trackPtMax_; - const float trackChi2Max_; - std::string inputTensorName_; - std::string outputTensorName_; - const L2TauNNProducerCacheData* L2cacheData_; -}; - -std::unique_ptr L2TauNNProducer::initializeGlobalCache(const edm::ParameterSet& cfg) { - std::unique_ptr cacheData = std::make_unique(); - cacheData->normVec.reserve(L2TauTagNNv1::nVars); - - auto const graphPath = edm::FileInPath(cfg.getParameter("graphPath")).fullPath(); - - cacheData->graphDef = tensorflow::loadGraphDef(graphPath); - cacheData->session = tensorflow::createSession(cacheData->graphDef); - - boost::property_tree::ptree loadPtreeRoot; - auto const normalizationDict = edm::FileInPath(cfg.getParameter("normalizationDict")).fullPath(); - boost::property_tree::read_json(normalizationDict, loadPtreeRoot); - for (const auto& [key, val] : L2TauTagNNv1::varNameMap) { - boost::property_tree::ptree var = loadPtreeRoot.get_child(val); - normDictElement current_element; - current_element.mean = var.get_child("mean").get_value(); - current_element.std = var.get_child("std").get_value(); - current_element.min = var.get_child("min").get_value(); - current_element.max = var.get_child("max").get_value(); - cacheData->normVec.push_back(current_element); - } - return cacheData; -} -void L2TauNNProducer::globalEndJob(L2TauNNProducerCacheData* cacheData) { - if (cacheData->graphDef != nullptr) { - delete cacheData->graphDef; - } - tensorflow::closeSession(cacheData->session); -} -void L2TauNNProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("debugLevel", 0)->setComment("set debug level for printing out info"); - edm::ParameterSetDescription l1TausPset; - l1TausPset.add("L1CollectionName", "DoubleTau")->setComment("Name of collections"); - l1TausPset.add("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")) - ->setComment("Which trigger should the L1 Taus collection pass"); - edm::ParameterSet l1TausPSetDefault; - l1TausPSetDefault.addParameter("L1CollectionName", "DoubleTau"); - l1TausPSetDefault.addParameter("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")); - desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault}); - desc.add("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection"); - desc.add("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection"); - desc.add("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection"); - desc.add("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection"); - desc.add("pataVertices", edm::InputTag("hltPixelVerticesSoA")) - ->setComment("patatrack vertices collection"); - desc.add("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection"); - desc.add("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection"); - desc.add("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)"); - desc.add("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex"); - desc.add("minSumPt2", 0.)->setComment("min sumPt2"); - desc.add("track_pt_min", 1.0)->setComment("min track p_T"); - desc.add("track_pt_max", 10.0)->setComment("max track p_T"); - desc.add("track_chi2_max", 99999.)->setComment("max track chi2"); - desc.add("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb") - ->setComment("path to the saved CNN"); - desc.add("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json") - ->setComment("path to the dictionary for variable standardization"); - descriptions.addWithDefaultLabel(desc); -} - -L2TauNNProducer::L2TauNNProducer(const edm::ParameterSet& cfg, const L2TauNNProducerCacheData* cacheData) - : debugLevel_(cfg.getParameter("debugLevel")), - hbheToken_(consumes(cfg.getParameter("hbheInput"))), - hoToken_(consumes(cfg.getParameter("hoInput"))), - ebToken_(consumes(cfg.getParameter("ebInput"))), - eeToken_(consumes(cfg.getParameter("eeInput"))), - geometryToken_(esConsumes()), - bFieldToken_(esConsumes()), - pataVerticesToken_(consumes(cfg.getParameter("pataVertices"))), - pataTracksToken_(consumes(cfg.getParameter("pataTracks"))), - beamSpotToken_(consumes(cfg.getParameter("BeamSpot"))), - maxVtx_(cfg.getParameter("maxVtx")), - fractionSumPt2_(cfg.getParameter("fractionSumPt2")), - minSumPt2_(cfg.getParameter("minSumPt2")), - trackPtMin_(cfg.getParameter("track_pt_min")), - trackPtMax_(cfg.getParameter("track_pt_max")), - trackChi2Max_(cfg.getParameter("track_chi2_max")) { - if (cacheData->graphDef == nullptr) { - throw cms::Exception("InvalidCacheData") << "Invalid Cache Data."; - } - inputTensorName_ = cacheData->graphDef->node(0).name(); - outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name(); - L2cacheData_ = cacheData; - std::vector L1TauCollections = cfg.getParameter>("L1Taus"); - L1TauDesc_.reserve(L1TauCollections.size()); - for (const auto& l1TauInput : L1TauCollections) { - InputDescTau toInsert; - toInsert.CollectionName = l1TauInput.getParameter("L1CollectionName"); - toInsert.inputToken_ = - consumes(l1TauInput.getParameter("L1TauTrigger")); - L1TauDesc_.push_back(toInsert); - } - for (const auto& desc : L1TauDesc_) - produces>(desc.CollectionName); -} - -void L2TauNNProducer::checknan(tensorflow::Tensor& tensor, int debugLevel) { - using NNInputs = L2TauTagNNv1::NNInputs; - std::vector tensor_shape(tensor.shape().dims()); - for (int d = 0; d < tensor.shape().dims(); d++) { - tensor_shape.at(d) = tensor.shape().dim_size(d); - } - if (tensor_shape.size() != 4) { - throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; - } - for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { - for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { - for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { - for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); - }; - auto nonstd_var = getCell(static_cast(var_idx)); - if (edm::isNotFinite(nonstd_var)) { - edm::LogWarning("InputVar") << "var is nan \nvar name= " - << L2TauTagNNv1::varNameMap.at(static_cast(var_idx)) - << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx - << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx; - if (debugLevel > 2) { - edm::LogWarning("InputVar") << "other vars in same cell \n"; - if (var_idx + 1 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 1)) - << "\t = " << getCell(static_cast(var_idx + 1)); - if (var_idx + 2 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 2)) - << "\t = " << getCell(static_cast(var_idx + 2)); - if (var_idx + 3 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 3)) - << "\t = " << getCell(static_cast(var_idx + 3)); - if (var_idx + 4 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 4)) - << "\t = " << getCell(static_cast(var_idx + 4)); - } - } - } - } - } - } -} - -void L2TauNNProducer::standardizeTensor(tensorflow::Tensor& tensor) { - using NNInputs = L2TauTagNNv1::NNInputs; - std::vector tensor_shape(tensor.shape().dims()); - for (int d = 0; d < tensor.shape().dims(); d++) { - tensor_shape.at(d) = tensor.shape().dim_size(d); - } - if (tensor_shape.size() != 4) { - throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; - } - for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { - for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { - for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { - for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); - }; - float mean = L2cacheData_->normVec.at(var_idx).mean; - float std = L2cacheData_->normVec.at(var_idx).std; - float min = L2cacheData_->normVec.at(var_idx).min; - float max = L2cacheData_->normVec.at(var_idx).max; - float nonstd_var = getCell(static_cast(var_idx)); - float std_var = static_cast((nonstd_var - mean) / std); - if (std_var > max) { - std_var = static_cast(max); - } else if (std_var < min) { - std_var = static_cast(min); - } - getCell(static_cast(var_idx)) = std_var; - } - } - } - } -} - -void L2TauNNProducer::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus) { - using NNInputs = L2TauTagNNv1::NNInputs; - - const int nTaus = allTaus.size(); - for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) { - for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt(); - getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta(); - getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso(); - } - } - } -} - -template -std::tuple L2TauNNProducer::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) { - const float deta = eta - tau_p4.eta(); - const float dphi = reco::deltaPhi(phi, tau_p4.phi()); - const int eta_idx = static_cast(floor((deta + L2TauTagNNv1::dR_max) / dEta_width)); - const int phi_idx = static_cast(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width)); - return std::make_tuple(deta, dphi, eta_idx, phi_idx); -} - -template -std::tuple L2TauNNProducer::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) { - return getEtaPhiIndices(position.eta(), position.phi(), tau_p4); -} - -void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const caloRecHitCollections& caloRecHits) { - using NNInputs = L2TauTagNNv1::NNInputs; - - const int nTaus = allTaus.size(); - float deta, dphi; - int eta_idx = 0; - int phi_idx = 0; - int tau_idx = 0; - - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { - // calorechit_EE - for (const auto& caloRecHit_ee : *caloRecHits.ee) { - if (caloRecHit_ee.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition(); - const float eeCalEn = caloRecHit_ee.energy(); - const float eeCalChi2 = caloRecHit_ee.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::EcalEnergySum) += eeCalEn; - getCell(NNInputs::EcalSize) += 1.; - getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn; - getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn; - getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn; - if (eeCalChi2 >= 0) { - getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn; - getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn; - getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_EB - for (const auto& caloRecHit_eb : *caloRecHits.eb) { - if (caloRecHit_eb.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition(); - const float ebCalEn = caloRecHit_eb.energy(); - const float ebCalChi2 = caloRecHit_eb.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::EcalEnergySum) += ebCalEn; - getCell(NNInputs::EcalSize) += 1.; - getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn; - getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn; - getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn; - if (ebCalChi2 >= 0) { - getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn; - getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn; - getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_HBHE - for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) { - if (caloRecHit_hbhe.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition(); - const float hbheCalEn = caloRecHit_hbhe.energy(); - const float hbheCalChi2 = caloRecHit_hbhe.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::HcalEnergySum) += hbheCalEn; - getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn; - getCell(NNInputs::HcalSize) += 1.; - getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn; - getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn; - if (hbheCalChi2 >= 0) { - getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn; - getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn; - getCell(NNInputs::HcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_HO - for (const auto& caloRecHit_ho : *caloRecHits.ho) { - if (caloRecHit_ho.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition(); - const float hoCalEn = caloRecHit_ho.energy(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::HcalEnergySum) += hoCalEn; - getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn; - getCell(NNInputs::HcalSize) += 1.; - getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn; - getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn; - } - } - - // normalize to sum and define stdDev - for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - /* normalize eCal vars*/ - if (getCell(NNInputs::EcalEnergySum) > 0.) { - getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum); - getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum); - } - if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) { - getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2); - } - if (getCell(NNInputs::EcalSize) > 1.) { - // (stdDev - (enSum*enSum)/size) / (size-1) - getCell(NNInputs::EcalEnergyStdDev) = - (getCell(NNInputs::EcalEnergyStdDev) - - (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) / - (getCell(NNInputs::EcalSize) - 1); - } else { - getCell(NNInputs::EcalEnergyStdDev) = 0.; - } - /* normalize hCal Vars */ - if (getCell(NNInputs::HcalEnergySum) > 0.) { - getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum); - getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum); - } - if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) { - getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2); - } - if (getCell(NNInputs::HcalSize) > 1.) { - // (stdDev - (enSum*enSum)/size) / (size-1) - getCell(NNInputs::HcalEnergyStdDev) = - (getCell(NNInputs::HcalEnergyStdDev) - - (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) / - (getCell(NNInputs::HcalSize) - 1); - } else { - getCell(NNInputs::HcalEnergyStdDev) = 0.; - } - } - } - } -} - -void L2TauNNProducer::selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa, - const TrackSoAHost& patatracks_tsoa, - std::vector& trkGood, - std::vector& vtxGood) { - using patatrackHelpers = TracksUtilities; - const auto maxTracks = patatracks_tsoa.view().metadata().size(); - const int nv = patavtx_soa.view().nvFinal(); - trkGood.clear(); - trkGood.reserve(maxTracks); - vtxGood.clear(); - vtxGood.reserve(nv); - auto const* quality = patatracks_tsoa.view().quality(); - - // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). - std::vector pTSquaredSum(nv, 0); - std::vector nTrkAssociated(nv, 0); - - for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { - auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx); - if (nHits == 0) { - break; - } - int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); - if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { - auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); - ++nTrkAssociated[vtx_ass_to_track]; - if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) { - patatrackPt = std::min(patatrackPt, trackPtMax_); - pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; - } - } - if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { - trkGood.push_back(trk_idx); - } - } - if (nv > 0) { - const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_; - for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) { - auto vtx_idx = patavtx_soa.view()[j].sortInd(); - assert(vtx_idx < nv); - if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && - pTSquaredSum[vtx_idx] > minSumPt2_) { - vtxGood.push_back(vtx_idx); - } - } - } -} - -std::pair L2TauNNProducer::impactParameter(int it, - const TrackSoAHost& patatracks_tsoa, - float patatrackPhi, - const reco::BeamSpot& beamspot, - const MagneticField* magfi) { - /* dxy and dz */ - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - TracksUtilities::copyToDense(patatracks_tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - float sp = std::sin(patatrackPhi); - float cp = std::cos(patatrackPhi); - Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0()); - Plane impPointPlane(BeamSpotPoint, Rotation); - GlobalTrajectoryParameters gp( - impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi); - GlobalPoint vv = gp.position(); - math::XYZPoint pos(vv.x(), vv.y(), vv.z()); - GlobalVector pp = gp.momentum(); - math::XYZVector mom(pp.x(), pp.y(), pp.z()); - auto lambda = M_PI_2 - pp.theta(); - auto phi = pp.phi(); - float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi); - float patatrackDz = - (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) / - std::cos(lambda); - return std::make_pair(patatrackDxy, patatrackDz); -} - -void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const TrackSoAHost& patatracks_tsoa, - const ZVertexSoAHost& patavtx_soa, - const reco::BeamSpot& beamspot, - const MagneticField* magfi) { - using NNInputs = L2TauTagNNv1::NNInputs; - using patatrackHelpers = TracksUtilities; - float deta, dphi; - int eta_idx = 0; - int phi_idx = 0; - int tau_idx = 0; - - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - - std::vector trkGood; - std::vector vtxGood; - - selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood); - - const int nTaus = allTaus.size(); - for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { - const float tauEta = allTaus[tau_idx]->eta(); - const float tauPhi = allTaus[tau_idx]->phi(); - - for (const auto it : trkGood) { - const float patatrackPt = patatracks_tsoa.const_view()[it].pt(); - if (patatrackPt <= 0) - continue; - const float patatrackPhi = patatrackHelpers::phi(patatracks_tsoa.const_view(), it); - const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); - const float patatrackCharge = patatrackHelpers::charge(patatracks_tsoa.const_view(), it); - const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); - const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it); - if (nHits <= 0) - continue; - const int patatrackNdof = 2 * std::min(6, nHits) - 5; - - const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); - if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = - getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::PatatrackPtSum) += patatrackPt; - getCell(NNInputs::PatatrackSize) += 1.; - getCell(NNInputs::PatatrackChargeSum) += patatrackCharge; - getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt; - getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt; - getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt; - getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt; - std::pair impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi); - getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt; - getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt; - if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) { - getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt; - getCell(NNInputs::PatatrackSizeWithVertex) += 1.; - } - } - } - - // normalize to sum and define stdDev - for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - getCell(NNInputs::nVertices) = vtxGood.size(); - if (getCell(NNInputs::PatatrackPtSum) > 0.) { - getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum); - } - } - } - } -} - -std::vector L2TauNNProducer::getTauScore(const tensorflow::Tensor& cellGridMatrix) { - const int nTau = cellGridMatrix.shape().dim_size(0); - std::vector pred_vector(nTau); - if (nTau > 0) { - // Only run the inference if there are taus to process - std::vector pred_tensor; - tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor); - for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) { - pred_vector[tau_idx] = pred_tensor[0].matrix()(tau_idx, 0); - } - } - return pred_vector; -} - -void L2TauNNProducer::produce(edm::Event& event, const edm::EventSetup& eventsetup) { - std::vector> TauCollectionMap(L1TauDesc_.size()); - l1t::TauVectorRef allTaus; - - for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { - l1t::TauVectorRef l1Taus; - auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_); - l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus); - TauCollectionMap.at(inp_idx).resize(l1Taus.size()); - - for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) { - size_t tau_idx; - const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]); - if (iter != allTaus.end()) { - tau_idx = std::distance(allTaus.begin(), iter); - } else { - allTaus.push_back(l1Taus[l1_idx]); - tau_idx = allTaus.size() - 1; - } - TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx; - } - } - const auto ebCal = event.getHandle(ebToken_); - const auto eeCal = event.getHandle(eeToken_); - const auto hbhe = event.getHandle(hbheToken_); - const auto ho = event.getHandle(hoToken_); - auto const& patatracks_SoA = event.get(pataTracksToken_); - auto const& vertices_SoA = event.get(pataVerticesToken_); - const auto bsHandle = event.getHandle(beamSpotToken_); - - auto const fieldESH = eventsetup.getHandle(bFieldToken_); - auto const geometry = eventsetup.getHandle(geometryToken_); - - caloRecHitCollections caloRecHits; - caloRecHits.hbhe = &*hbhe; - caloRecHits.ho = &*ho; - caloRecHits.eb = &*ebCal; - caloRecHits.ee = &*eeCal; - caloRecHits.geometry = &*geometry; - - const int nTaus = allTaus.size(); - tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT, - {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars}); - const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars; - for (int input_idx = 0; input_idx < n_inputs; ++input_idx) { - cellGridMatrix.flat()(input_idx) = 0; - } - fillL1TauVars(cellGridMatrix, allTaus); - - fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits); - - fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product()); - - standardizeTensor(cellGridMatrix); - - if (debugLevel_ > 0) { - checknan(cellGridMatrix, debugLevel_); - } - - std::vector tau_score = getTauScore(cellGridMatrix); - - for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { - const size_t nTau = TauCollectionMap[inp_idx].size(); - auto tau_tags = std::make_unique>(nTau); - for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) { - const auto tau_idx = TauCollectionMap[inp_idx][tau_pos]; - if (debugLevel_ > 0) { - edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t " - << tau_score.at(tau_idx) << std::endl; - } - (*tau_tags)[tau_pos] = tau_score.at(tau_idx); - } - event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName); - } -} -//define this as a plug-in -#include "FWCore/Framework/interface/MakerMacros.h" -DEFINE_FWK_MODULE(L2TauNNProducer); diff --git a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cc b/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cc deleted file mode 100644 index ad3de7be225db..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include "BrokenLineFitOnGPU.h" - -template -void HelixFitOnGPU::launchBrokenLineKernelsOnCPU(const TrackingRecHitSoAConstView &hv, - uint32_t hitsInFit, - uint32_t maxNumberOfTuples) { - assert(tuples_); - -#ifdef BROKENLINE_DEBUG - setlinebuf(stdout); -#endif - - // Fit internals - auto tkidGPU = std::make_unique(maxNumberOfConcurrentFits_); - auto hitsGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double)); - auto hits_geGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float)); - auto fast_fit_resultsGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); - - for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { - // fit triplets - kernel_BLFastFit<3, TrackerTraits>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - 3, - 3, - offset); - - kernel_BLFit<3, TrackerTraits>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - if (fitNas4_) { - riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>( - [this, &hv, &tkidGPU, &hitsGPU, &hits_geGPU, &fast_fit_resultsGPU, &offset](auto i) { - kernel_BLFastFit<4, TrackerTraits>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - 4, - i, - offset); - - kernel_BLFit<4, TrackerTraits>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - }); - - } else { - //Fit these using all the hits they have - riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>( - [this, &hv, &tkidGPU, &hitsGPU, &hits_geGPU, &fast_fit_resultsGPU, &offset](auto i) { - kernel_BLFastFit(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - i, - i, - offset); - - kernel_BLFit(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - }); - - static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack); - - //Fit all the rest using the maximum from previous call - - kernel_BLFastFit(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - TrackerTraits::maxHitsOnTrackForFullFit, - TrackerTraits::maxHitsOnTrack - 1, - offset); - - kernel_BLFit(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - } - - } // loop on concurrent fits -} - -template class HelixFitOnGPU; -template class HelixFitOnGPU; -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cu b/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cu deleted file mode 100644 index 1558fa9ae6176..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cu +++ /dev/null @@ -1,145 +0,0 @@ -#include "BrokenLineFitOnGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -template -void HelixFitOnGPU::launchBrokenLineKernels(const TrackingRecHitSoAConstView& hv, - uint32_t hitsInFit, - uint32_t maxNumberOfTuples, - cudaStream_t stream) { - assert(tuples_); - - auto blockSize = 64; - auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; - - // Fit internals - auto tkidGPU = - cms::cuda::make_device_unique(maxNumberOfConcurrentFits_, stream); - auto hitsGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), stream); - auto hits_geGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), stream); - auto fast_fit_resultsGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream); - - for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { - // fit triplets - - kernel_BLFastFit<3, TrackerTraits><<>>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - 3, - 3, - offset); - cudaCheck(cudaGetLastError()); - - kernel_BLFit<3, TrackerTraits><<>>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - cudaCheck(cudaGetLastError()); - - if (fitNas4_) { - // fit all as 4 - riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this, - &hv, - &tkidGPU, - &hitsGPU, - &hits_geGPU, - &fast_fit_resultsGPU, - &offset, - &numberOfBlocks, - &blockSize, - &stream](auto i) { - kernel_BLFastFit<4, TrackerTraits><<>>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - 4, - 4, - offset); - - cudaCheck(cudaGetLastError()); - - kernel_BLFit<4, TrackerTraits><<>>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - - cudaCheck(cudaGetLastError()); - }); - - } else { - riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this, - &hv, - &tkidGPU, - &hitsGPU, - &hits_geGPU, - &fast_fit_resultsGPU, - &offset, - &numberOfBlocks, - &blockSize, - &stream](auto i) { - kernel_BLFastFit<<>>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - i, - i, - offset); - - kernel_BLFit<<<8, blockSize, 0, stream>>>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - }); - - static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack); - - //Fit all the rest using the maximum from previous call - kernel_BLFastFit - <<>>(tuples_, - tupleMultiplicity_, - hv, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - TrackerTraits::maxHitsOnTrackForFullFit, - TrackerTraits::maxHitsOnTrack - 1, - offset); - - kernel_BLFit - <<<8, blockSize, 0, stream>>>(tupleMultiplicity_, - bField_, - outputSoa_, - tkidGPU.get(), - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get()); - } - - } // loop on concurrent fits -} - -template class HelixFitOnGPU; -template class HelixFitOnGPU; -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.h b/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.h deleted file mode 100644 index 7a3e938fae3ec..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.h +++ /dev/null @@ -1,233 +0,0 @@ -// -// Author: Felice Pantaleo, CERN -// - -//#define BROKENLINE_DEBUG -//#define BL_DUMP_HITS -#include - -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "RecoTracker/PixelTrackFitting/interface/BrokenLine.h" - -#include "HelixFitOnGPU.h" - -template -using Tuples = typename TrackSoA::HitContainer; -template -using OutputSoAView = TrackSoAView; -template -using TupleMultiplicity = caStructures::TupleMultiplicityT; - -// #define BL_DUMP_HITS - -template -__global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets, - TupleMultiplicity const *__restrict__ tupleMultiplicity, - TrackingRecHitSoAConstView hh, - typename TrackerTraits::tindex_type *__restrict__ ptkids, - double *__restrict__ phits, - float *__restrict__ phits_ge, - double *__restrict__ pfast_fit, - uint32_t nHitsL, - uint32_t nHitsH, - int32_t offset) { - constexpr uint32_t hitsInFit = N; - constexpr auto invalidTkId = std::numeric_limits::max(); - - assert(hitsInFit <= nHitsL); - assert(nHitsL <= nHitsH); - assert(phits); - assert(pfast_fit); - assert(foundNtuplets); - assert(tupleMultiplicity); - - // look in bin for this hit multiplicity - auto local_start = blockIdx.x * blockDim.x + threadIdx.x; - int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL); - assert(totTK <= int(tupleMultiplicity->size())); - assert(totTK >= 0); - -#ifdef BROKENLINE_DEBUG - if (0 == local_start) { - printf("%d total Ntuple\n", tupleMultiplicity->size()); - printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit); - } -#endif - - for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; - local_idx += gridDim.x * blockDim.x) { - int tuple_idx = local_idx + offset; - if (tuple_idx >= totTK) { - ptkids[local_idx] = invalidTkId; - break; - } - // get it from the ntuple container (one to one to helix) - auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx); - assert(int(tkid) < foundNtuplets->nOnes()); - - ptkids[local_idx] = tkid; - - auto nHits = foundNtuplets->size(tkid); - - assert(nHits >= nHitsL); - assert(nHits <= nHitsH); - - riemannFit::Map3xNd hits(phits + local_idx); - riemannFit::Map4d fast_fit(pfast_fit + local_idx); - riemannFit::Map6xNf hits_ge(phits_ge + local_idx); - -#ifdef BL_DUMP_HITS - __shared__ int done; - done = 0; - __syncthreads(); - bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1)); -#endif - - // Prepare data structure - auto const *hitId = foundNtuplets->begin(tkid); - - // #define YERR_FROM_DC -#ifdef YERR_FROM_DC - // try to compute more precise error in y - auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal(); - auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal(); - auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal(); - float ux, uy, uz; -#endif - - float incr = std::max(1.f, float(nHits) / float(hitsInFit)); - float n = 0; - for (uint32_t i = 0; i < hitsInFit; ++i) { - int j = int(n + 0.5f); // round - if (hitsInFit - 1 == i) - j = nHits - 1; // force last hit to ensure max lever arm. - assert(j < int(nHits)); - n += incr; - auto hit = hitId[j]; - float ge[6]; - -#ifdef YERR_FROM_DC - auto const &dp = hh.cpeParams().detParams(hh.detectorIndex(hit)); - auto status = hh[hit].chargeAndStatus().status; - int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin; - assert(qbin >= 0 && qbin < 5); - bool nok = (status.isBigY | status.isOneY); - // compute cotanbeta and use it to recompute error - dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz); - auto cb = std::abs(uy / uz); - int bin = - int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4; - int low_value = 0; - int high_value = CPEFastParametrisation::kNumErrorBins - 1; - // return estimated bin value truncated to [0, 15] - bin = std::clamp(bin, low_value, high_value); - float yerr = dp.sigmay[bin] * 1.e-4f; // toCM - yerr *= dp.yfact[qbin]; // inflate - yerr *= yerr; - yerr += dp.apeYY; - yerr = nok ? hh[hit].yerrLocal() : yerr; - dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge); -#else - hh.cpeParams().detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); -#endif - -#ifdef BL_DUMP_HITS - bool dump = foundNtuplets->size(tkid) == 5; - if (dump) { - printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n", - local_idx, - tkid, - hit, - hh[hit].detectorIndex(), - i, - hh[hit].xGlobal(), - hh[hit].yGlobal(), - hh[hit].zGlobal()); - printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]); - } -#endif - - hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal(); - hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; - } - brokenline::fastFit(hits, fast_fit); - - // no NaN here.... - assert(fast_fit(0) == fast_fit(0)); - assert(fast_fit(1) == fast_fit(1)); - assert(fast_fit(2) == fast_fit(2)); - assert(fast_fit(3) == fast_fit(3)); - } -} - -template -__global__ void kernel_BLFit(TupleMultiplicity const *__restrict__ tupleMultiplicity, - double bField, - OutputSoAView results_view, - typename TrackerTraits::tindex_type const *__restrict__ ptkids, - double *__restrict__ phits, - float *__restrict__ phits_ge, - double *__restrict__ pfast_fit) { - assert(results_view.pt()); - assert(results_view.eta()); - assert(results_view.chi2()); - assert(pfast_fit); - constexpr auto invalidTkId = std::numeric_limits::max(); - - // same as above... - // look in bin for this hit multiplicity - auto local_start = blockIdx.x * blockDim.x + threadIdx.x; - for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; - local_idx += gridDim.x * blockDim.x) { - if (invalidTkId == ptkids[local_idx]) - break; - auto tkid = ptkids[local_idx]; - - assert(tkid < TrackerTraits::maxNumberOfTuples); - - riemannFit::Map3xNd hits(phits + local_idx); - riemannFit::Map4d fast_fit(pfast_fit + local_idx); - riemannFit::Map6xNf hits_ge(phits_ge + local_idx); - - brokenline::PreparedBrokenLineData data; - - brokenline::karimaki_circle_fit circle; - riemannFit::LineFit line; - - brokenline::prepareBrokenLineData(hits, fast_fit, bField, data); - brokenline::lineFit(hits_ge, fast_fit, bField, data, line); - brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle); - - TracksUtilities::copyFromCircle( - results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid); - results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2))); - results_view[tkid].eta() = asinhf(line.par(0)); - results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5); - -#ifdef BROKENLINE_DEBUG - if (!(circle.chi2 >= 0) || !(line.chi2 >= 0)) - printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2); - printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", - N, - N, - tkid, - circle.par(0), - circle.par(1), - circle.par(2)); - printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1)); - printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n", - circle.chi2, - line.chi2, - circle.cov(0, 0), - circle.cov(1, 1), - circle.cov(2, 2), - line.cov(0, 0), - line.cov(1, 1)); -#endif - } -} diff --git a/RecoTracker/PixelSeeding/plugins/BuildFile.xml b/RecoTracker/PixelSeeding/plugins/BuildFile.xml index a387c35caa691..1e02488807438 100644 --- a/RecoTracker/PixelSeeding/plugins/BuildFile.xml +++ b/RecoTracker/PixelSeeding/plugins/BuildFile.xml @@ -8,16 +8,7 @@ - - - - - - - - - - + diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletCUDA.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletCUDA.cc deleted file mode 100644 index 06ca6476bc4fa..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletCUDA.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" - -#include "CAHitNtupletGeneratorOnGPU.h" - -template -class CAHitNtupletCUDAT : public edm::global::EDProducer<> { - using HitsConstView = TrackingRecHitSoAConstView; - using HitsOnDevice = TrackingRecHitSoADevice; - using HitsOnHost = TrackingRecHitSoAHost; - - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - - using GPUAlgo = CAHitNtupletGeneratorOnGPU; - -public: - explicit CAHitNtupletCUDAT(const edm::ParameterSet& iConfig); - ~CAHitNtupletCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void beginJob() override; - void endJob() override; - - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - bool onGPU_; - - edm::ESGetToken tokenField_; - edm::EDGetTokenT> tokenHitGPU_; - edm::EDPutTokenT> tokenTrackGPU_; - edm::EDGetTokenT tokenHitCPU_; - edm::EDPutTokenT tokenTrackCPU_; - - GPUAlgo gpuAlgo_; -}; - -template -CAHitNtupletCUDAT::CAHitNtupletCUDAT(const edm::ParameterSet& iConfig) - : onGPU_(iConfig.getParameter("onGPU")), tokenField_(esConsumes()), gpuAlgo_(iConfig, consumesCollector()) { - if (onGPU_) { - tokenHitGPU_ = consumes(iConfig.getParameter("pixelRecHitSrc")); - tokenTrackGPU_ = produces>(); - } else { - tokenHitCPU_ = consumes(iConfig.getParameter("pixelRecHitSrc")); - tokenTrackCPU_ = produces(); - } -} - -template -void CAHitNtupletCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("onGPU", true); - desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); - - GPUAlgo::fillDescriptions(desc); - descriptions.addWithDefaultLabel(desc); -} - -template -void CAHitNtupletCUDAT::beginJob() { - gpuAlgo_.beginJob(); -} - -template -void CAHitNtupletCUDAT::endJob() { - gpuAlgo_.endJob(); -} - -template -void CAHitNtupletCUDAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& es) const { - auto bf = 1. / es.getData(tokenField_).inverseBzAtOriginInGeV(); - - if (onGPU_) { - auto const& hits = iEvent.get(tokenHitGPU_); - - cms::cuda::ScopedContextProduce ctx{hits}; - auto& hits_d = ctx.get(hits); - ctx.emplace(iEvent, tokenTrackGPU_, gpuAlgo_.makeTuplesAsync(hits_d, bf, ctx.stream())); - } else { - auto& hits_h = iEvent.get(tokenHitCPU_); - iEvent.emplace(tokenTrackCPU_, gpuAlgo_.makeTuples(hits_h, bf)); - } -} - -using CAHitNtupletCUDAPhase1 = CAHitNtupletCUDAT; -DEFINE_FWK_MODULE(CAHitNtupletCUDAPhase1); - -using CAHitNtupletCUDAPhase2 = CAHitNtupletCUDAT; -DEFINE_FWK_MODULE(CAHitNtupletCUDAPhase2); - -using CAHitNtupletCUDAHIonPhase1 = CAHitNtupletCUDAT; -DEFINE_FWK_MODULE(CAHitNtupletCUDAHIonPhase1); diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cc deleted file mode 100644 index 7646da18faf17..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cc +++ /dev/null @@ -1,230 +0,0 @@ -#include - -#include "CAHitNtupletGeneratorKernelsImpl.h" - -namespace { - // cuda atomics are NOT atomics on CPU so protect stat update with a mutex - // waiting for a more general solution (incuding multiple devices) to be proposed and implemented - std::mutex lock_stat; -} // namespace - -template -void CAHitNtupletGeneratorKernelsCPU::printCounters(Counters const *counters) { - caHitNtupletGeneratorKernels::kernel_printCounters(counters); -} - -template -void CAHitNtupletGeneratorKernelsCPU::buildDoublets(const HitsConstView &hh, - int32_t offsetBPIX2, - cudaStream_t stream) { - using namespace gpuPixelDoublets; - - using GPUCACell = GPUCACellT; - using OuterHitOfCell = typename GPUCACell::OuterHitOfCell; - using CellNeighbors = typename GPUCACell::CellNeighbors; - using CellTracks = typename GPUCACell::CellTracks; - using OuterHitOfCellContainer = typename GPUCACell::OuterHitOfCellContainer; - - auto nhits = hh.nHits(); - -#ifdef NTUPLE_DEBUG - std::cout << "building Doublets out of " << nhits << " Hits. BPIX2 offset is " << offsetBPIX2 << std::endl; -#endif - - // use "nhits" to heuristically dimension the workspace - - // no need to use the Traits allocations, since we know this is being compiled for the CPU - //this->device_isOuterHitOfCell_ = Traits::template make_unique(std::max(1U, nhits), stream); - this->device_isOuterHitOfCell_ = std::make_unique(std::max(1U, nhits)); - assert(this->device_isOuterHitOfCell_.get()); - this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2}; - - auto cellStorageSize = TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) + - TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks); - // no need to use the Traits allocations, since we know this is being compiled for the CPU - //cellStorage_ = Traits::template make_unique(cellStorageSize, stream); - this->cellStorage_ = std::make_unique(cellStorageSize); - this->device_theCellNeighborsContainer_ = (CellNeighbors *)this->cellStorage_.get(); - this->device_theCellTracksContainer_ = - (CellTracks *)(this->cellStorage_.get() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors)); - - initDoublets(this->isOuterHitOfCell_, - nhits, - this->device_theCellNeighbors_.get(), - this->device_theCellNeighborsContainer_, - this->device_theCellTracks_.get(), - this->device_theCellTracksContainer_); - - // no need to use the Traits allocations, since we know this is being compiled for the CPU - this->device_theCells_ = std::make_unique(this->params_.caParams_.maxNumberOfDoublets_); - if (0 == nhits) - return; // protect against empty events - - // take all layer pairs into account - auto nActualPairs = this->params_.nPairs(); - - assert(nActualPairs <= TrackerTraits::nPairs); - - getDoubletsFromHisto(this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->device_theCellTracks_.get(), - hh, - this->isOuterHitOfCell_, - nActualPairs, - this->params_.caParams_.maxNumberOfDoublets_, - this->device_cellCuts_.get()); -} - -template -void CAHitNtupletGeneratorKernelsCPU::launchKernels(const HitsConstView &hh, - TkSoAView &tracks_view, - cudaStream_t cudaStream) { - using namespace caHitNtupletGeneratorKernels; - - // zero tuples - cms::cuda::launchZero(&tracks_view.hitIndices(), cudaStream); - - uint32_t nhits = hh.metadata().size(); - -#ifdef NTUPLE_DEBUG - std::cout << "start tuple building. N hits " << nhits << std::endl; - if (nhits < 2) - std::cout << "too few hits " << nhits << std::endl; -#endif - - // - // applying conbinatoric cleaning such as fishbone at this stage is too expensive - // - - kernel_connect(this->device_hitTuple_apc_, - this->device_hitToTuple_apc_, // needed only to be reset, ready for next kernel - hh, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->isOuterHitOfCell_, - this->params_.caParams_); - - if (nhits > 1 && this->params_.earlyFishbone_) { - gpuPixelDoublets::fishbone( - hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, false); - } - - kernel_find_ntuplets(hh, - tracks_view, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellTracks_.get(), - this->device_hitTuple_apc_, - this->params_.caParams_); - if (this->params_.doStats_) - kernel_mark_used(this->device_theCells_.get(), this->device_nCells_); - - cms::cuda::finalizeBulk(this->device_hitTuple_apc_, &tracks_view.hitIndices()); - - kernel_fillHitDetIndices(tracks_view, hh); - kernel_fillNLayers(tracks_view, this->device_hitTuple_apc_); - - // remove duplicates (tracks that share a doublet) - kernel_earlyDuplicateRemover( - this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_); - - kernel_countMultiplicity(tracks_view, this->device_tupleMultiplicity_.get()); - cms::cuda::launchFinalize(this->device_tupleMultiplicity_.get(), cudaStream); - kernel_fillMultiplicity(tracks_view, this->device_tupleMultiplicity_.get()); - - if (nhits > 1 && this->params_.lateFishbone_) { - gpuPixelDoublets::fishbone( - hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, true); - } -} - -template -void CAHitNtupletGeneratorKernelsCPU::classifyTuples(const HitsConstView &hh, - TkSoAView &tracks_view, - cudaStream_t cudaStream) { - using namespace caHitNtupletGeneratorKernels; - - int32_t nhits = hh.metadata().size(); - - // classify tracks based on kinematics - kernel_classifyTracks(tracks_view, this->params_.qualityCuts_); - if (this->params_.lateFishbone_) { - // apply fishbone cleaning to good tracks - kernel_fishboneCleaner(this->device_theCells_.get(), this->device_nCells_, tracks_view); - } - - // remove duplicates (tracks that share a doublet) - kernel_fastDuplicateRemover( - this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_); - - // fill hit->track "map" - if (this->params_.doSharedHitCut_ || this->params_.doStats_) { - kernel_countHitInTracks(tracks_view, this->device_hitToTuple_.get()); - cms::cuda::launchFinalize(this->hitToTupleView_, cudaStream); - kernel_fillHitInTracks(tracks_view, this->device_hitToTuple_.get()); - } - - // remove duplicates (tracks that share at least one hit) - if (this->params_.doSharedHitCut_) { - kernel_rejectDuplicate(tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - - kernel_sharedHitCleaner(hh, - tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - if (this->params_.useSimpleTripletCleaner_) { - kernel_simpleTripletCleaner(tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - } else { - kernel_tripletCleaner(tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - } - } - - if (this->params_.doStats_) { - std::lock_guard guard(lock_stat); - kernel_checkOverflows(tracks_view, - this->device_tupleMultiplicity_.get(), - this->device_hitToTuple_.get(), - this->device_hitTuple_apc_, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->device_theCellTracks_.get(), - this->isOuterHitOfCell_, - nhits, - this->params_.caParams_.maxNumberOfDoublets_, - this->counters_); - } - - if (this->params_.doStats_) { - // counters (add flag???) - std::lock_guard guard(lock_stat); - kernel_doStatsForHitInTracks(this->device_hitToTuple_.get(), this->counters_); - kernel_doStatsForTracks(tracks_view, this->counters_); - } - -#ifdef DUMP_GPU_TK_TUPLES - static std::atomic iev(0); - static std::mutex lock; - { - std::lock_guard guard(lock); - ++iev; - kernel_print_found_ntuplets(hh, tracks_view, this->device_hitToTuple_.get(), 0, 1000000, iev); - } -#endif -} - -template class CAHitNtupletGeneratorKernelsCPU; -template class CAHitNtupletGeneratorKernelsCPU; -template class CAHitNtupletGeneratorKernelsCPU; diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu deleted file mode 100644 index e846622b951a8..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu +++ /dev/null @@ -1,393 +0,0 @@ -#include - -#include "CAHitNtupletGeneratorKernelsImpl.h" - -//#define GPU_DEBUG -//#define NTUPLE_DEBUG - -template -void CAHitNtupletGeneratorKernelsGPU::launchKernels(const HitsConstView &hh, - TkSoAView &tracks_view, - cudaStream_t cudaStream) { - using namespace gpuPixelDoublets; - using namespace caHitNtupletGeneratorKernels; - - // zero tuples - cms::cuda::launchZero(&(tracks_view.hitIndices()), cudaStream); //TODO test .data() - - int32_t nhits = hh.metadata().size(); - -#ifdef NTUPLE_DEBUG - std::cout << "start tuple building. N hits " << nhits << std::endl; - if (nhits < 2) - std::cout << "too few hits " << nhits << std::endl; -#endif - - // - // applying conbinatoric cleaning such as fishbone at this stage is too expensive - // - - auto nthTot = 64; - auto stride = 4; - auto blockSize = nthTot / stride; - auto numberOfBlocks = this->nDoubletBlocks(blockSize); - auto rescale = numberOfBlocks / 65536; - blockSize *= (rescale + 1); - numberOfBlocks = this->nDoubletBlocks(blockSize); - assert(numberOfBlocks < 65536); - assert(blockSize > 0 && 0 == blockSize % 16); - dim3 blks(1, numberOfBlocks, 1); - dim3 thrs(stride, blockSize, 1); - - kernel_connect - <<>>(this->device_hitTuple_apc_, - this->device_hitToTuple_apc_, // needed only to be reset, ready for next kernel - hh, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->isOuterHitOfCell_, - this->params_.caParams_); - - cudaCheck(cudaGetLastError()); - - // do not run the fishbone if there are hits only in BPIX1 - if (nhits > this->isOuterHitOfCell_.offset && this->params_.earlyFishbone_) { - auto nthTot = 128; - auto stride = 16; - auto blockSize = nthTot / stride; - auto numberOfBlocks = (nhits - this->isOuterHitOfCell_.offset + blockSize - 1) / blockSize; - dim3 blks(1, numberOfBlocks, 1); - dim3 thrs(stride, blockSize, 1); - fishbone<<>>( - hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, false); - cudaCheck(cudaGetLastError()); - } - - blockSize = 64; - numberOfBlocks = (3 * this->params_.caParams_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize; - kernel_find_ntuplets<<>>(hh, - tracks_view, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellTracks_.get(), - this->device_hitTuple_apc_, - this->params_.caParams_); -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - if (this->params_.doStats_) - kernel_mark_used - <<>>(this->device_theCells_.get(), this->device_nCells_); - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - blockSize = 128; - numberOfBlocks = (HitContainer::ctNOnes() + blockSize - 1) / blockSize; - - cms::cuda::finalizeBulk<<>>(this->device_hitTuple_apc_, - &tracks_view.hitIndices()); //TODO test .data() - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - kernel_fillHitDetIndices<<>>(tracks_view, hh); - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - kernel_fillNLayers - <<>>(tracks_view, this->device_hitTuple_apc_); - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - // remove duplicates (tracks that share a doublet) - numberOfBlocks = this->nDoubletBlocks(blockSize); - - kernel_earlyDuplicateRemover<<>>( - this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - blockSize = 128; - numberOfBlocks = (3 * TrackerTraits::maxNumberOfTuples / 4 + blockSize - 1) / blockSize; - kernel_countMultiplicity - <<>>(tracks_view, this->device_tupleMultiplicity_.get()); - cms::cuda::launchFinalize(this->device_tupleMultiplicity_.get(), cudaStream); - kernel_fillMultiplicity - <<>>(tracks_view, this->device_tupleMultiplicity_.get()); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - // do not run the fishbone if there are hits only in BPIX1 - if (nhits > this->isOuterHitOfCell_.offset && this->params_.lateFishbone_) { - auto nthTot = 128; - auto stride = 16; - auto blockSize = nthTot / stride; - auto numberOfBlocks = (nhits - this->isOuterHitOfCell_.offset + blockSize - 1) / blockSize; - dim3 blks(1, numberOfBlocks, 1); - dim3 thrs(stride, blockSize, 1); - fishbone<<>>( - hh, this->device_theCells_.get(), this->device_nCells_, this->isOuterHitOfCell_, nhits, true); - cudaCheck(cudaGetLastError()); - } - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif -} - -template -void CAHitNtupletGeneratorKernelsGPU::buildDoublets(const HitsConstView &hh, - int32_t offsetBPIX2, - cudaStream_t stream) { - int32_t nhits = hh.metadata().size(); - using namespace gpuPixelDoublets; - - using GPUCACell = GPUCACellT; - using OuterHitOfCell = typename GPUCACell::OuterHitOfCell; - using CellNeighbors = typename GPUCACell::CellNeighbors; - using CellTracks = typename GPUCACell::CellTracks; - using OuterHitOfCellContainer = typename GPUCACell::OuterHitOfCellContainer; - - this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2}; - -#ifdef NTUPLE_DEBUG - std::cout << "building Doublets out of " << nhits << " Hits" << std::endl; -#endif - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - // in principle we can use "nhits" to heuristically dimension the workspace... - this->device_isOuterHitOfCell_ = - cms::cuda::make_device_unique(std::max(1, nhits - offsetBPIX2), stream); - assert(this->device_isOuterHitOfCell_.get()); - - this->isOuterHitOfCell_ = OuterHitOfCell{this->device_isOuterHitOfCell_.get(), offsetBPIX2}; - - this->cellStorage_ = - cms::cuda::make_device_unique(TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) + - TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks), - stream); - this->device_theCellNeighborsContainer_ = (CellNeighbors *)this->cellStorage_.get(); - this->device_theCellTracksContainer_ = - (CellTracks *)(this->cellStorage_.get() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors)); - - { - int threadsPerBlock = 128; - // at least one block! - int blocks = (std::max(1, nhits - offsetBPIX2) + threadsPerBlock - 1) / threadsPerBlock; - initDoublets<<>>(this->isOuterHitOfCell_, - nhits, - this->device_theCellNeighbors_.get(), - this->device_theCellNeighborsContainer_, - this->device_theCellTracks_.get(), - this->device_theCellTracksContainer_); - cudaCheck(cudaGetLastError()); - } - - this->device_theCells_ = - cms::cuda::make_device_unique(this->params_.caParams_.maxNumberOfDoublets_, stream); - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - - if (0 == nhits) - return; // protect against empty events - - // take all layer pairs into account - auto nActualPairs = this->params_.nPairs(); - - int stride = 4; - int threadsPerBlock = TrackerTraits::getDoubletsFromHistoMaxBlockSize / stride; - int blocks = (4 * nhits + threadsPerBlock - 1) / threadsPerBlock; - dim3 blks(1, blocks, 1); - dim3 thrs(stride, threadsPerBlock, 1); - - getDoubletsFromHisto<<>>(this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->device_theCellTracks_.get(), - hh, - this->isOuterHitOfCell_, - nActualPairs, - this->params_.caParams_.maxNumberOfDoublets_, - this->device_cellCuts_.get()); - cudaCheck(cudaGetLastError()); - -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif -} - -template -void CAHitNtupletGeneratorKernelsGPU::classifyTuples(const HitsConstView &hh, - TkSoAView &tracks_view, - cudaStream_t cudaStream) { - using namespace caHitNtupletGeneratorKernels; - - int32_t nhits = hh.metadata().size(); - - auto blockSize = 64; - - // classify tracks based on kinematics - auto numberOfBlocks = this->nQuadrupletBlocks(blockSize); - kernel_classifyTracks - <<>>(tracks_view, this->params_.qualityCuts_); - - if (this->params_.lateFishbone_) { - // apply fishbone cleaning to good tracks - numberOfBlocks = this->nDoubletBlocks(blockSize); - kernel_fishboneCleaner - <<>>(this->device_theCells_.get(), this->device_nCells_, tracks_view); - cudaCheck(cudaGetLastError()); - } - - // mark duplicates (tracks that share a doublet) - numberOfBlocks = this->nDoubletBlocks(blockSize); - kernel_fastDuplicateRemover<<>>( - this->device_theCells_.get(), this->device_nCells_, tracks_view, this->params_.dupPassThrough_); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); -#endif - - if (this->params_.doSharedHitCut_ || this->params_.doStats_) { - // fill hit->track "map" - assert(this->hitToTupleView_.offSize > nhits); - numberOfBlocks = this->nQuadrupletBlocks(blockSize); - kernel_countHitInTracks - <<>>(tracks_view, this->device_hitToTuple_.get()); //CHECK - cudaCheck(cudaGetLastError()); - assert((this->hitToTupleView_.assoc == this->device_hitToTuple_.get()) && - (this->hitToTupleView_.offStorage == this->device_hitToTupleStorage_.get()) && - (this->hitToTupleView_.offSize > 0)); - cms::cuda::launchFinalize(this->hitToTupleView_, cudaStream); - cudaCheck(cudaGetLastError()); - kernel_fillHitInTracks - <<>>(tracks_view, this->device_hitToTuple_.get()); - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); -#endif - } - - if (this->params_.doSharedHitCut_) { - // mark duplicates (tracks that share at least one hit) - numberOfBlocks = (this->hitToTupleView_.offSize + blockSize - 1) / blockSize; - - kernel_rejectDuplicate<<>>( - tracks_view, this->params_.minHitsForSharingCut_, this->params_.dupPassThrough_, this->device_hitToTuple_.get()); - - kernel_sharedHitCleaner - <<>>(hh, - tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - - if (this->params_.useSimpleTripletCleaner_) { - kernel_simpleTripletCleaner - <<>>(tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - } else { - kernel_tripletCleaner - <<>>(tracks_view, - this->params_.minHitsForSharingCut_, - this->params_.dupPassThrough_, - this->device_hitToTuple_.get()); - } - cudaCheck(cudaGetLastError()); -#ifdef GPU_DEBUG - cudaCheck(cudaDeviceSynchronize()); -#endif - } - - if (this->params_.doStats_) { - numberOfBlocks = (std::max(nhits, int(this->params_.caParams_.maxNumberOfDoublets_)) + blockSize - 1) / blockSize; - kernel_checkOverflows - <<>>(tracks_view, - this->device_tupleMultiplicity_.get(), - this->device_hitToTuple_.get(), - this->device_hitTuple_apc_, - this->device_theCells_.get(), - this->device_nCells_, - this->device_theCellNeighbors_.get(), - this->device_theCellTracks_.get(), - this->isOuterHitOfCell_, - nhits, - this->params_.caParams_.maxNumberOfDoublets_, - this->counters_); - cudaCheck(cudaGetLastError()); - } - - if (this->params_.doStats_) { - // counters (add flag???) - numberOfBlocks = (this->hitToTupleView_.offSize + blockSize - 1) / blockSize; - kernel_doStatsForHitInTracks - <<>>(this->device_hitToTuple_.get(), this->counters_); - cudaCheck(cudaGetLastError()); - numberOfBlocks = (3 * TrackerTraits::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize; - kernel_doStatsForTracks - <<>>(tracks_view, this->counters_); //why sometimes yes and some no? - cudaCheck(cudaGetLastError()); - } -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif - -#ifdef DUMP_GPU_TK_TUPLES - static std::atomic iev(0); - static std::mutex lock; - { - std::lock_guard guard(lock); - ++iev; - for (int k = 0; k < 20000; k += 500) { - kernel_print_found_ntuplets - <<<1, 32, 0, cudaStream>>>(hh, tracks_view, this->device_hitToTuple_.get(), k, k + 500, iev); - cudaCheck(cudaStreamSynchronize(cudaStream)); - } - kernel_print_found_ntuplets - <<<1, 32, 0, cudaStream>>>(hh, tracks_view, this->device_hitToTuple_.get(), 20000, 1000000, iev); - cudaCheck(cudaStreamSynchronize(cudaStream)); - } -#endif -} - -template -void CAHitNtupletGeneratorKernelsGPU::printCounters(Counters const *counters) { - caHitNtupletGeneratorKernels::kernel_printCounters<<<1, 1>>>(counters); -} - -template class CAHitNtupletGeneratorKernelsGPU; -template class CAHitNtupletGeneratorKernelsGPU; -template class CAHitNtupletGeneratorKernelsGPU; diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h deleted file mode 100644 index 250aef21c1d6a..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h +++ /dev/null @@ -1,347 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h -#define RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h - -//#define GPU_DEBUG -//#define DUMP_GPU_TK_TUPLES - -#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" - -#include "GPUCACell.h" -#include "gpuPixelDoublets.h" - -namespace caHitNtupletGenerator { - - //Configuration params common to all topologies, for the algorithms - struct AlgoParams { - const bool onGPU_; - const uint32_t minHitsForSharingCut_; - const bool useRiemannFit_; - const bool fitNas4_; - const bool includeJumpingForwardDoublets_; - const bool earlyFishbone_; - const bool lateFishbone_; - const bool doStats_; - const bool doSharedHitCut_; - const bool dupPassThrough_; - const bool useSimpleTripletCleaner_; - }; - - //CAParams - struct CACommon { - const uint32_t maxNumberOfDoublets_; - const uint32_t minHitsPerNtuplet_; - const float ptmin_; - const float CAThetaCutBarrel_; - const float CAThetaCutForward_; - const float hardCurvCut_; - const float dcaCutInnerTriplet_; - const float dcaCutOuterTriplet_; - }; - - template - struct CAParamsT : public CACommon { - __device__ __forceinline__ bool startingLayerPair(int16_t pid) const { return false; }; - __device__ __forceinline__ bool startAt0(int16_t pid) const { return false; }; - }; - - template - struct CAParamsT> : public CACommon { - /// Is is a starting layer pair? - __device__ __forceinline__ bool startingLayerPair(int16_t pid) const { - return minHitsPerNtuplet_ > 3 ? pid < 3 : pid < 8 || pid > 12; - } - - /// Is this a pair with inner == 0? - __device__ __forceinline__ bool startAt0(int16_t pid) const { - assert((pixelTopology::Phase1::layerPairs[pid * 2] == 0) == - (pid < 3 || pid == 13 || pid == 15 || pid == 16)); // to be 100% sure it's working, may be removed - return pixelTopology::Phase1::layerPairs[pid * 2] == 0; - } - }; - - template - struct CAParamsT> : public CACommon { - const bool includeFarForwards_; - /// Is is a starting layer pair? - __device__ __forceinline__ bool startingLayerPair(int16_t pid) const { - return pid < 33; // in principle one could remove 5,6,7 23, 28 and 29 - } - - /// Is this a pair with inner == 0 - __device__ __forceinline__ bool startAt0(int16_t pid) const { - assert((pixelTopology::Phase2::layerPairs[pid * 2] == 0) == ((pid < 3) | (pid >= 23 && pid < 28))); - return pixelTopology::Phase2::layerPairs[pid * 2] == 0; - } - }; - - //Full list of params = algo params + ca params + cell params + quality cuts - //Generic template - template - struct ParamsT : public AlgoParams { - // one should define the params for its own pixelTopology - // not defining anything here - inline uint32_t nPairs() const { return 0; } - }; - - template - struct ParamsT> : public AlgoParams { - using TT = TrackerTraits; - using QualityCuts = pixelTrack::QualityCutsT; //track quality cuts - using CellCuts = gpuPixelDoublets::CellCutsT; //cell building cuts - using CAParams = CAParamsT; //params to be used on device - - ParamsT(AlgoParams const& commonCuts, - CellCuts const& cellCuts, - QualityCuts const& cutsCuts, - CAParams const& caParams) - : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(cutsCuts), caParams_(caParams) {} - - const CellCuts cellCuts_; - const QualityCuts qualityCuts_{// polynomial coefficients for the pT-dependent chi2 cut - {0.68177776, 0.74609577, -0.08035491, 0.00315399}, - // max pT used to determine the chi2 cut - 10., - // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit - 30., - // regional cuts for triplets - { - 0.3, // |Tip| < 0.3 cm - 0.5, // pT > 0.5 GeV - 12.0 // |Zip| < 12.0 cm - }, - // regional cuts for quadruplets - { - 0.5, // |Tip| < 0.5 cm - 0.3, // pT > 0.3 GeV - 12.0 // |Zip| < 12.0 cm - }}; - const CAParams caParams_; - /// Compute the number of pairs - inline uint32_t nPairs() const { - // take all layer pairs into account - uint32_t nActualPairs = TT::nPairs; - if (not includeJumpingForwardDoublets_) { - // exclude forward "jumping" layer pairs - nActualPairs = TT::nPairsForTriplets; - } - if (caParams_.minHitsPerNtuplet_ > 3) { - // for quadruplets, exclude all "jumping" layer pairs - nActualPairs = TT::nPairsForQuadruplets; - } - - return nActualPairs; - } - - }; // Params Phase1 - - template - struct ParamsT> : public AlgoParams { - using TT = TrackerTraits; - using QualityCuts = pixelTrack::QualityCutsT; - using CellCuts = gpuPixelDoublets::CellCutsT; - using CAParams = CAParamsT; - - ParamsT(AlgoParams const& commonCuts, - CellCuts const& cellCuts, - QualityCuts const& qualityCuts, - CAParams const& caParams) - : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(qualityCuts), caParams_(caParams) {} - - // quality cuts - const CellCuts cellCuts_; - const QualityCuts qualityCuts_{5.0f, /*chi2*/ 0.9f, /* pT in Gev*/ 0.4f, /*zip in cm*/ 12.0f /*tip in cm*/}; - const CAParams caParams_; - - inline uint32_t nPairs() const { - // take all layer pairs into account - uint32_t nActualPairs = TT::nPairsMinimal; - if (caParams_.includeFarForwards_) { - // considera far forwards (> 11 & > 23) - nActualPairs = TT::nPairsFarForwards; - } - if (includeJumpingForwardDoublets_) { - // include jumping forwards - nActualPairs = TT::nPairs; - } - - return nActualPairs; - } - - }; // Params Phase1 - - // counters - struct Counters { - unsigned long long nEvents; - unsigned long long nHits; - unsigned long long nCells; - unsigned long long nTuples; - unsigned long long nFitTracks; - unsigned long long nLooseTracks; - unsigned long long nGoodTracks; - unsigned long long nUsedHits; - unsigned long long nDupHits; - unsigned long long nFishCells; - unsigned long long nKilledCells; - unsigned long long nEmptyCells; - unsigned long long nZeroTrackCells; - }; - - using Quality = pixelTrack::Quality; - -} // namespace caHitNtupletGenerator - -template -class CAHitNtupletGeneratorKernels { -public: - using Traits = TTraits; - using TrackerTraits = TTTraits; - using QualityCuts = pixelTrack::QualityCutsT; - using Params = caHitNtupletGenerator::ParamsT; - using CAParams = caHitNtupletGenerator::CAParamsT; - using CellCuts = gpuPixelDoublets::CellCutsT; - using Counters = caHitNtupletGenerator::Counters; - - template - using unique_ptr = typename Traits::template unique_ptr; - - using HitsView = TrackingRecHitSoAView; - using HitsConstView = TrackingRecHitSoAConstView; - using TkSoAView = TrackSoAView; - - using HitToTuple = caStructures::HitToTupleT; - using TupleMultiplicity = caStructures::TupleMultiplicityT; - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellNeighbors = caStructures::CellNeighborsT; - using CellTracksVector = caStructures::CellTracksVectorT; - using CellTracks = caStructures::CellTracksT; - using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - - using CACell = GPUCACellT; - - using Quality = pixelTrack::Quality; - using HitContainer = typename TrackSoA::HitContainer; - - CAHitNtupletGeneratorKernels(Params const& params) - : params_(params), paramsMaxDoubletes3Quarters_(3 * params.caParams_.maxNumberOfDoublets_ / 4) {} - - ~CAHitNtupletGeneratorKernels() = default; - - TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.get(); } - - void launchKernels(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - - void classifyTuples(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - - void buildDoublets(const HitsConstView& hh, cudaStream_t stream); - void allocateOnGPU(int32_t nHits, cudaStream_t stream); - void cleanup(cudaStream_t cudaStream); - - static void printCounters(Counters const* counters); - void setCounters(Counters* counters) { counters_ = counters; } - -protected: - Counters* counters_ = nullptr; - - // workspace - unique_ptr cellStorage_; - unique_ptr device_theCellNeighbors_; - CellNeighbors* device_theCellNeighborsContainer_; - unique_ptr device_theCellTracks_; - CellTracks* device_theCellTracksContainer_; - - unique_ptr device_theCells_; - unique_ptr device_isOuterHitOfCell_; - OuterHitOfCell isOuterHitOfCell_; - uint32_t* device_nCells_ = nullptr; - - unique_ptr device_hitToTuple_; - unique_ptr device_hitToTupleStorage_; - typename HitToTuple::View hitToTupleView_; - - unique_ptr device_cellCuts_; - - cms::cuda::AtomicPairCounter* device_hitToTuple_apc_ = nullptr; - - cms::cuda::AtomicPairCounter* device_hitTuple_apc_ = nullptr; - - unique_ptr device_tupleMultiplicity_; - - unique_ptr device_storage_; - - // params - Params params_; - /// Intermediate result avoiding repeated computations. - const uint32_t paramsMaxDoubletes3Quarters_; - /// Compute the number of doublet blocks for block size - inline uint32_t nDoubletBlocks(uint32_t blockSize) { - // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed. - return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize; - } - - /// Compute the number of quadruplet blocks for block size - inline uint32_t nQuadrupletBlocks(uint32_t blockSize) { - // pixelTopology::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4 - return (3 * TrackerTraits::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize; - } -}; - -template -class CAHitNtupletGeneratorKernelsGPU : public CAHitNtupletGeneratorKernels { - using CAHitNtupletGeneratorKernels::CAHitNtupletGeneratorKernels; - - using Counters = caHitNtupletGenerator::Counters; - using CAParams = caHitNtupletGenerator::CAParamsT; - - using HitContainer = typename TrackSoA::HitContainer; - - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using HitToTuple = caStructures::HitToTupleT; - using CellTracksVector = caStructures::CellTracksVectorT; - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - using HitsConstView = TrackingRecHitSoAConstView; - using TkSoAView = TrackSoAView; - - using Params = caHitNtupletGenerator::ParamsT; - -public: - void launchKernels(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - void classifyTuples(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - void buildDoublets(const HitsConstView& hh, int32_t offsetBPIX2, cudaStream_t stream); - void allocateOnGPU(int32_t nHits, cudaStream_t stream); - static void printCounters(Counters const* counters); -}; - -template -class CAHitNtupletGeneratorKernelsCPU : public CAHitNtupletGeneratorKernels { - using CAHitNtupletGeneratorKernels::CAHitNtupletGeneratorKernels; - - using Counters = caHitNtupletGenerator::Counters; - using CAParams = caHitNtupletGenerator::CAParamsT; - - using HitContainer = typename TrackSoA::HitContainer; - - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using HitToTuple = caStructures::HitToTupleT; - using CellTracksVector = caStructures::CellTracksVectorT; - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - using HitsConstView = TrackingRecHitSoAConstView; - using TkSoAView = TrackSoAView; - - using Params = caHitNtupletGenerator::ParamsT; - -public: - void launchKernels(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - void classifyTuples(const HitsConstView& hh, TkSoAView& track_view, cudaStream_t cudaStream); - void buildDoublets(const HitsConstView& hh, int32_t offsetBPIX2, cudaStream_t stream); - void allocateOnGPU(int32_t nHits, cudaStream_t stream); - static void printCounters(Counters const* counters); -}; - -#endif // RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc deleted file mode 100644 index 64148d5f5ba81..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc +++ /dev/null @@ -1,69 +0,0 @@ -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "CAHitNtupletGeneratorKernels.h" - -//#define GPU_DEBUG - -template -#ifdef __CUDACC__ -void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) { - using Traits = cms::cudacompat::GPUTraits; -#else -void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) { - using Traits = cms::cudacompat::CPUTraits; -#endif - - using CellCuts = gpuPixelDoublets::CellCutsT; - - ////////////////////////////////////////////////////////// - // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) - ////////////////////////////////////////////////////////// - - this->device_theCellNeighbors_ = Traits::template make_unique(stream); - this->device_theCellTracks_ = Traits::template make_unique(stream); - -#ifdef GPU_DEBUG - std::cout << "Allocation for tuple building. N hits " << nHits << std::endl; -#endif - - nHits++; // storage requires one more counter; - assert(nHits > 0); - this->device_hitToTuple_ = Traits::template make_unique(stream); - this->device_hitToTupleStorage_ = Traits::template make_unique(nHits, stream); - this->hitToTupleView_.assoc = this->device_hitToTuple_.get(); - this->hitToTupleView_.offStorage = this->device_hitToTupleStorage_.get(); - this->hitToTupleView_.offSize = nHits; - - this->device_tupleMultiplicity_ = Traits::template make_unique(stream); - - this->device_storage_ = Traits::template make_unique(3, stream); - - this->device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get(); - this->device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)this->device_storage_.get() + 1; - this->device_nCells_ = (uint32_t*)(this->device_storage_.get() + 2); - - this->device_cellCuts_ = Traits::template make_unique(stream); - // FIXME: consider collapsing these 3 in one adhoc kernel - if constexpr (std::is_same::value) { - cudaCheck(cudaMemsetAsync(this->device_nCells_, 0, sizeof(uint32_t), stream)); - cudaCheck(cudaMemcpyAsync( - this->device_cellCuts_.get(), &(this->params_.cellCuts_), sizeof(CellCuts), cudaMemcpyDefault, stream)); - } else { - *(this->device_nCells_) = 0; - *(this->device_cellCuts_.get()) = this->params_.cellCuts_; - } - cms::cuda::launchZero(this->device_tupleMultiplicity_.get(), stream); - cms::cuda::launchZero(this->hitToTupleView_, stream); // we may wish to keep it in the edm -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); -#endif -} - -template class CAHitNtupletGeneratorKernelsGPU; -template class CAHitNtupletGeneratorKernelsGPU; -template class CAHitNtupletGeneratorKernelsGPU; - -template class CAHitNtupletGeneratorKernelsCPU; -template class CAHitNtupletGeneratorKernelsCPU; -template class CAHitNtupletGeneratorKernelsCPU; diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cu b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cu deleted file mode 100644 index 68ee08d657e81..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cu +++ /dev/null @@ -1 +0,0 @@ -#include "CAHitNtupletGeneratorKernelsAlloc.cc" diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h deleted file mode 100644 index 4642e794ac4f9..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h +++ /dev/null @@ -1,932 +0,0 @@ -// -// Original Author: Felice Pantaleo, CERN -// - -//#define NTUPLE_DEBUG -//#define GPU_DEBUG - -#include -#include -#include - -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - -#include "FWCore/Utilities/interface/isFinite.h" - -#include "CAHitNtupletGeneratorKernels.h" -#include "CAStructures.h" -#include "GPUCACell.h" -#include "gpuFishbone.h" -#include "gpuPixelDoublets.h" - -namespace caHitNtupletGeneratorKernels { - - constexpr uint32_t tkNotFound = std::numeric_limits::max(); - constexpr float maxScore = std::numeric_limits::max(); - constexpr float nSigma2 = 25.f; - - //all of these below are mostly to avoid brining around the relative namespace - - template - using HitToTuple = caStructures::HitToTupleT; - - template - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - - template - using CellTracksVector = caStructures::CellTracksVectorT; - - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; - - using Quality = pixelTrack::Quality; - - template - using TkSoAView = TrackSoAView; - - template - using HitContainer = typename TrackSoA::HitContainer; - - template - using HitsConstView = typename GPUCACellT::HitsConstView; - - template - using QualityCuts = pixelTrack::QualityCutsT; - - template - using CAParams = caHitNtupletGenerator::CAParamsT; - - using Counters = caHitNtupletGenerator::Counters; - - template - __global__ void kernel_checkOverflows(TkSoAView tracks_view, - TupleMultiplicity const *tupleMultiplicity, - HitToTuple const *hitToTuple, - cms::cuda::AtomicPairCounter *apc, - GPUCACellT const *__restrict__ cells, - uint32_t const *__restrict__ nCells, - CellNeighborsVector const *cellNeighbors, - CellTracksVector const *cellTracks, - OuterHitOfCell const isOuterHitOfCell, - int32_t nHits, - uint32_t maxNumberOfDoublets, - Counters *counters) { - auto first = threadIdx.x + blockIdx.x * blockDim.x; - - auto &c = *counters; - // counters once per event - if (0 == first) { - atomicAdd(&c.nEvents, 1); - atomicAdd(&c.nHits, nHits); - atomicAdd(&c.nCells, *nCells); - atomicAdd(&c.nTuples, apc->get().m); - atomicAdd(&c.nFitTracks, tupleMultiplicity->size()); - } - -#ifdef NTUPLE_DEBUG - if (0 == first) { - printf("number of found cells %d \n found tuples %d with total hits %d out of %d %d\n", - *nCells, - apc->get().m, - apc->get().n, - nHits, - hitToTuple->totOnes()); - if (apc->get().m < TrackerTraits::maxNumberOfQuadruplets) { - assert(tracks_view.hitIndices().size(apc->get().m) == 0); - assert(tracks_view.hitIndices().size() == apc->get().n); - } - } - - for (int idx = first, nt = tracks_view.hitIndices().nOnes(); idx < nt; idx += gridDim.x * blockDim.x) { - if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit - printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); - assert(tracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); - for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih) - assert(int(*ih) < nHits); - } -#endif - - if (0 == first) { - if (apc->get().m >= TrackerTraits::maxNumberOfQuadruplets) - printf("Tuples overflow\n"); - if (*nCells >= maxNumberOfDoublets) - printf("Cells overflow\n"); - if (cellNeighbors && cellNeighbors->full()) - printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size()); - if (cellTracks && cellTracks->full()) - printf("cellTracks overflow\n"); - if (int(hitToTuple->nOnes()) < nHits) - printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits); -#ifdef GPU_DEBUG - printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n", - cellNeighbors->size(), - cellTracks->size(), - hitToTuple->size()); - -#endif - } - - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto const &thisCell = cells[idx]; - if (thisCell.hasFishbone() && !thisCell.isKilled()) - atomicAdd(&c.nFishCells, 1); - if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId]; - printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId()); - if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId]; - printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId()); - if (thisCell.isKilled()) - atomicAdd(&c.nKilledCells, 1); - if (!thisCell.unused()) - atomicAdd(&c.nEmptyCells, 1); - if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id()))) - atomicAdd(&c.nZeroTrackCells, 1); - } - - for (int idx = first, nt = nHits - isOuterHitOfCell.offset; idx < nt; idx += gridDim.x * blockDim.x) { - if (isOuterHitOfCell.container[idx].full()) // ++tooManyOuterHitOfCell; - printf("OuterHitOfCell overflow %d\n", idx); - } - } - - template - __global__ void kernel_fishboneCleaner(GPUCACellT const *cells, - uint32_t const *__restrict__ nCells, - TkSoAView tracks_view) { - constexpr auto reject = pixelTrack::Quality::dup; - - auto first = threadIdx.x + blockIdx.x * blockDim.x; - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto const &thisCell = cells[idx]; - if (!thisCell.isKilled()) - continue; - - for (auto it : thisCell.tracks()) - tracks_view[it].quality() = reject; - } - } - - // remove shorter tracks if sharing a cell - // It does not seem to affect efficiency in any way! - template - __global__ void kernel_earlyDuplicateRemover(GPUCACellT const *cells, - uint32_t const *__restrict__ nCells, - TkSoAView tracks_view, - bool dupPassThrough) { - // quality to mark rejected - constexpr auto reject = pixelTrack::Quality::edup; /// cannot be loose - - assert(nCells); - auto first = threadIdx.x + blockIdx.x * blockDim.x; - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto const &thisCell = cells[idx]; - - if (thisCell.tracks().size() < 2) - continue; - - int8_t maxNl = 0; - - // find maxNl - for (auto it : thisCell.tracks()) { - auto nl = tracks_view[it].nLayers(); - maxNl = std::max(nl, maxNl); - } - - // if (maxNl<4) continue; - // quad pass through (leave it her for tests) - // maxNl = std::min(4, maxNl); - - for (auto it : thisCell.tracks()) { - if (tracks_view[it].nLayers() < maxNl) - tracks_view[it].quality() = reject; //no race: simple assignment of the same constant - } - } - } - - // assume the above (so, short tracks already removed) - template - __global__ void kernel_fastDuplicateRemover(GPUCACellT const *__restrict__ cells, - uint32_t const *__restrict__ nCells, - TkSoAView tracks_view, - bool dupPassThrough) { - // quality to mark rejected - auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup; - constexpr auto loose = pixelTrack::Quality::loose; - - assert(nCells); - - auto first = threadIdx.x + blockIdx.x * blockDim.x; - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto const &thisCell = cells[idx]; - if (thisCell.tracks().size() < 2) - continue; - - float mc = maxScore; - uint16_t im = tkNotFound; - - auto score = [&](auto it) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; - - // full crazy combinatorics - // full crazy combinatorics - int ntr = thisCell.tracks().size(); - for (int i = 0; i < ntr - 1; ++i) { - auto it = thisCell.tracks()[i]; - auto qi = tracks_view[it].quality(); - if (qi <= reject) - continue; - auto opi = tracks_view[it].state()(2); - auto e2opi = tracks_view[it].covariance()(9); - auto cti = tracks_view[it].state()(3); - auto e2cti = tracks_view[it].covariance()(12); - for (auto j = i + 1; j < ntr; ++j) { - auto jt = thisCell.tracks()[j]; - auto qj = tracks_view[jt].quality(); - if (qj <= reject) - continue; - auto opj = tracks_view[jt].state()(2); - auto ctj = tracks_view[jt].state()(3); - auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti); - if ((cti - ctj) * (cti - ctj) > dct) - continue; - auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi); - if ((opi - opj) * (opi - opj) > dop) - continue; - if ((qj < qi) || (qj == qi && score(it) < score(jt))) - tracks_view[jt].quality() = reject; - else { - tracks_view[it].quality() = reject; - break; - } - } - } - - // find maxQual - auto maxQual = reject; // no duplicate! - for (auto it : thisCell.tracks()) { - if (tracks_view[it].quality() > maxQual) - maxQual = tracks_view[it].quality(); - } - - if (maxQual <= loose) - continue; - - // find min score - for (auto it : thisCell.tracks()) { - if (tracks_view[it].quality() == maxQual && score(it) < mc) { - mc = score(it); - im = it; - } - } - - if (tkNotFound == im) - continue; - - // mark all other duplicates (not yet, keep it loose) - for (auto it : thisCell.tracks()) { - if (tracks_view[it].quality() > loose && it != im) - tracks_view[it].quality() = loose; //no race: simple assignment of the same constant - } - } - } - - template - __global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1, - cms::cuda::AtomicPairCounter *apc2, // just to zero them, - HitsConstView hh, - GPUCACellT *cells, - uint32_t const *__restrict__ nCells, - CellNeighborsVector *cellNeighbors, - OuterHitOfCell const isOuterHitOfCell, - CAParams params) { - using Cell = GPUCACellT; - - auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y; - auto first = threadIdx.x; - auto stride = blockDim.x; - - if (0 == (firstCellIndex + first)) { - (*apc1) = 0; - (*apc2) = 0; - } // ready for next kernel - - constexpr uint32_t last_bpix1_detIndex = TrackerTraits::last_bpix1_detIndex; - constexpr uint32_t last_barrel_detIndex = TrackerTraits::last_barrel_detIndex; - for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) { - auto cellIndex = idx; - auto &thisCell = cells[idx]; - auto innerHitId = thisCell.inner_hit_id(); - if (int(innerHitId) < isOuterHitOfCell.offset) - continue; - int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size(); - auto vi = isOuterHitOfCell[innerHitId].data(); - - auto ri = thisCell.inner_r(hh); - auto zi = thisCell.inner_z(hh); - - auto ro = thisCell.outer_r(hh); - auto zo = thisCell.outer_z(hh); - auto isBarrel = thisCell.inner_detIndex(hh) < last_barrel_detIndex; - - for (int j = first; j < numberOfPossibleNeighbors; j += stride) { - auto otherCell = __ldg(vi + j); - auto &oc = cells[otherCell]; - auto r1 = oc.inner_r(hh); - auto z1 = oc.inner_z(hh); - bool aligned = Cell::areAlignedRZ( - r1, - z1, - ri, - zi, - ro, - zo, - params.ptmin_, - isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts - if (aligned && thisCell.dcaCut(hh, - oc, - oc.inner_detIndex(hh) < last_bpix1_detIndex ? params.dcaCutInnerTriplet_ - : params.dcaCutOuterTriplet_, - params.hardCurvCut_)) { // FIXME tune cuts - oc.addOuterNeighbor(cellIndex, *cellNeighbors); - thisCell.setStatusBits(Cell::StatusBit::kUsed); - oc.setStatusBits(Cell::StatusBit::kUsed); - } - } // loop on inner cells - } // loop on outer cells - } - - template - __global__ void kernel_find_ntuplets(HitsConstView hh, - TkSoAView tracks_view, - GPUCACellT *__restrict__ cells, - uint32_t const *nCells, - CellTracksVector *cellTracks, - cms::cuda::AtomicPairCounter *apc, - CAParams params) { - // recursive: not obvious to widen - - using Cell = GPUCACellT; - - auto first = threadIdx.x + blockIdx.x * blockDim.x; - -#ifdef GPU_DEBUG - if (first == 0) - printf("starting producing ntuplets from %d cells \n", *nCells); -#endif - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto const &thisCell = cells[idx]; - - if (thisCell.isKilled()) - continue; // cut by earlyFishbone - - // we require at least three hits... - if (thisCell.outerNeighbors().empty()) - continue; - - auto pid = thisCell.layerPairId(); - bool doit = params.startingLayerPair(pid); - - constexpr uint32_t maxDepth = TrackerTraits::maxDepth; - if (doit) { - typename Cell::TmpTuple stack; - stack.reset(); - - bool bpix1Start = params.startAt0(pid); - - thisCell.template find_ntuplets(hh, - cells, - *cellTracks, - tracks_view.hitIndices(), - *apc, - tracks_view.quality(), - stack, - params.minHitsPerNtuplet_, - bpix1Start); - - assert(stack.empty()); - } - } - } - template - __global__ void kernel_mark_used(GPUCACellT *__restrict__ cells, uint32_t const *nCells) { - auto first = threadIdx.x + blockIdx.x * blockDim.x; - using Cell = GPUCACellT; - for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { - auto &thisCell = cells[idx]; - if (!thisCell.tracks().empty()) - thisCell.setStatusBits(Cell::StatusBit::kInTrack); - } - } - - template - __global__ void kernel_countMultiplicity(TkSoAView tracks_view, - TupleMultiplicity *tupleMultiplicity) { - auto first = blockIdx.x * blockDim.x + threadIdx.x; - for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) { - auto nhits = tracks_view.hitIndices().size(it); - if (nhits < 3) - continue; - if (tracks_view[it].quality() == pixelTrack::Quality::edup) - continue; - assert(tracks_view[it].quality() == pixelTrack::Quality::bad); - if (nhits > TrackerTraits::maxHitsOnTrack) // current limit - printf("wrong mult %d %d\n", it, nhits); - assert(nhits <= TrackerTraits::maxHitsOnTrack); - tupleMultiplicity->count(nhits); - } - } - - template - __global__ void kernel_fillMultiplicity(TkSoAView tracks_view, - TupleMultiplicity *tupleMultiplicity) { - auto first = blockIdx.x * blockDim.x + threadIdx.x; - for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) { - auto nhits = tracks_view.hitIndices().size(it); - if (nhits < 3) - continue; - if (tracks_view[it].quality() == pixelTrack::Quality::edup) - continue; - assert(tracks_view[it].quality() == pixelTrack::Quality::bad); - if (nhits > TrackerTraits::maxHitsOnTrack) - printf("wrong mult %d %d\n", it, nhits); - assert(nhits <= TrackerTraits::maxHitsOnTrack); - tupleMultiplicity->fill(nhits, it); - } - } - - ///TODO : why there was quality here? - template - __global__ void kernel_classifyTracks(TkSoAView tracks_view, QualityCuts cuts) { - // Quality *__restrict__ quality) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int it = first, nt = tracks_view.hitIndices().nOnes(); it < nt; it += gridDim.x * blockDim.x) { - auto nhits = tracks_view.hitIndices().size(it); - if (nhits == 0) - break; // guard - - // if duplicate: not even fit - if (tracks_view[it].quality() == pixelTrack::Quality::edup) - continue; - - assert(tracks_view[it].quality() == pixelTrack::Quality::bad); - - // mark doublets as bad - if (nhits < 3) - continue; - - // if the fit has any invalid parameters, mark it as bad - bool isNaN = false; - for (int i = 0; i < 5; ++i) { - isNaN |= edm::isNotFinite(tracks_view[it].state()(i)); - } - if (isNaN) { -#ifdef NTUPLE_DEBUG - printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2()); -#endif - continue; - } - - tracks_view[it].quality() = pixelTrack::Quality::strict; - - if (cuts.strictCut(tracks_view, it)) - continue; - - tracks_view[it].quality() = pixelTrack::Quality::tight; - - if (cuts.isHP(tracks_view, nhits, it)) - tracks_view[it].quality() = pixelTrack::Quality::highPurity; - } - } - - template - __global__ void kernel_doStatsForTracks(TkSoAView tracks_view, Counters *counters) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (tracks_view.hitIndices().size(idx) == 0) - break; //guard - if (tracks_view[idx].quality() < pixelTrack::Quality::loose) - continue; - atomicAdd(&(counters->nLooseTracks), 1); - if (tracks_view[idx].quality() < pixelTrack::Quality::strict) - continue; - atomicAdd(&(counters->nGoodTracks), 1); - } - } - - template - __global__ void kernel_countHitInTracks(TkSoAView tracks_view, HitToTuple *hitToTuple) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (tracks_view.hitIndices().size(idx) == 0) - break; // guard - for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) - hitToTuple->count(*h); - } - } - - template - __global__ void kernel_fillHitInTracks(TkSoAView tracks_view, HitToTuple *hitToTuple) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = tracks_view.hitIndices().nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (tracks_view.hitIndices().size(idx) == 0) - break; // guard - for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) - hitToTuple->fill(*h, idx); - } - } - - template - __global__ void kernel_fillHitDetIndices(TkSoAView tracks_view, HitsConstView hh) { - int first = blockDim.x * blockIdx.x + threadIdx.x; - // copy offsets - for (int idx = first, ntot = tracks_view.hitIndices().totOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx]; - } - // fill hit indices - auto nhits = hh.nHits(); - - for (int idx = first, ntot = tracks_view.hitIndices().size(); idx < ntot; idx += gridDim.x * blockDim.x) { - assert(tracks_view.hitIndices().content[idx] < nhits); - tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex(); - } - } - - template - __global__ void kernel_fillNLayers(TkSoAView tracks_view, cms::cuda::AtomicPairCounter *apc) { - auto first = blockIdx.x * blockDim.x + threadIdx.x; - // clamp the number of tracks to the capacity of the SoA - auto ntracks = std::min(apc->get().m, tracks_view.metadata().size() - 1); - if (0 == first) - tracks_view.nTracks() = ntracks; - for (int idx = first, nt = ntracks; idx < nt; idx += gridDim.x * blockDim.x) { - auto nHits = TracksUtilities::nHits(tracks_view, idx); - assert(nHits >= 3); - tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); - } - } - - template - __global__ void kernel_doStatsForHitInTracks(HitToTuple const *__restrict__ hitToTuple, - Counters *counters) { - auto &c = *counters; - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple->size(idx) == 0) - continue; // SHALL NOT BE break - atomicAdd(&c.nUsedHits, 1); - if (hitToTuple->size(idx) > 1) - atomicAdd(&c.nDupHits, 1); - } - } - - template - __global__ void kernel_countSharedHit(int *__restrict__ nshared, - HitContainer const *__restrict__ ptuples, - Quality const *__restrict__ quality, - HitToTuple const *__restrict__ phitToTuple) { - constexpr auto loose = pixelTrack::Quality::loose; - - auto &hitToTuple = *phitToTuple; - auto const &foundNtuplets = *ptuples; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple.size(idx) < 2) - continue; - - int nt = 0; - - // count "good" tracks - for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { - if (quality[*it] < loose) - continue; - ++nt; - } - - if (nt < 2) - continue; - - // now mark each track triplet as sharing a hit - for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { - if (foundNtuplets.size(*it) > 3) - continue; - atomicAdd(&nshared[*it], 1); - } - - } // hit loop - } - - template - __global__ void kernel_markSharedHit(int const *__restrict__ nshared, - HitContainer const *__restrict__ tuples, - Quality *__restrict__ quality, - bool dupPassThrough) { - // constexpr auto bad = pixelTrack::Quality::bad; - constexpr auto dup = pixelTrack::Quality::dup; - constexpr auto loose = pixelTrack::Quality::loose; - // constexpr auto strict = pixelTrack::Quality::strict; - - // quality to mark rejected - auto const reject = dupPassThrough ? loose : dup; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = tuples->nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (tuples->size(idx) == 0) - break; //guard - if (quality[idx] <= reject) - continue; - if (nshared[idx] > 2) - quality[idx] = reject; - } - } - - // mostly for very forward triplets..... - template - __global__ void kernel_rejectDuplicate(TkSoAView tracks_view, - uint16_t nmin, - bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) { - // quality to mark rejected - auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup; - - auto &hitToTuple = *phitToTuple; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple.size(idx) < 2) - continue; - - auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; - - // full combinatorics - for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) { - auto const it = *ip; - auto qi = tracks_view[it].quality(); - if (qi <= reject) - continue; - auto opi = tracks_view[it].state()(2); - auto e2opi = tracks_view[it].covariance()(9); - auto cti = tracks_view[it].state()(3); - auto e2cti = tracks_view[it].covariance()(12); - auto nli = tracks_view[it].nLayers(); - for (auto jp = ip + 1; jp < hitToTuple.end(idx); ++jp) { - auto const jt = *jp; - auto qj = tracks_view[jt].quality(); - if (qj <= reject) - continue; - auto opj = tracks_view[jt].state()(2); - auto ctj = tracks_view[jt].state()(3); - auto dct = nSigma2 * (tracks_view[jt].covariance()(12) + e2cti); - if ((cti - ctj) * (cti - ctj) > dct) - continue; - auto dop = nSigma2 * (tracks_view[jt].covariance()(9) + e2opi); - if ((opi - opj) * (opi - opj) > dop) - continue; - auto nlj = tracks_view[jt].nLayers(); - if (nlj < nli || (nlj == nli && (qj < qi || (qj == qi && score(it, nli) < score(jt, nlj))))) - tracks_view[jt].quality() = reject; - else { - tracks_view[it].quality() = reject; - break; - } - } - } - } - } - - template - __global__ void kernel_sharedHitCleaner(HitsConstView hh, - TkSoAView tracks_view, - int nmin, - bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) { - // quality to mark rejected - auto const reject = dupPassThrough ? pixelTrack::Quality::loose : pixelTrack::Quality::dup; - // quality of longest track - auto const longTqual = pixelTrack::Quality::highPurity; - - auto &hitToTuple = *phitToTuple; - - int l1end = hh.hitsLayerStart()[1]; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple.size(idx) < 2) - continue; - - int8_t maxNl = 0; - - // find maxNl - for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { - if (tracks_view[*it].quality() < longTqual) - continue; - // if (tracks_view[*it].nHits()==3) continue; - auto nl = tracks_view[*it].nLayers(); - maxNl = std::max(nl, maxNl); - } - - if (maxNl < 4) - continue; - - // quad pass through (leave for tests) - // maxNl = std::min(4, maxNl); - - // kill all tracks shorter than maxHl (only triplets??? - for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { - auto nl = tracks_view[*it].nLayers(); - - //checking if shared hit is on bpix1 and if the tuple is short enough - if (idx < l1end and nl > nmin) - continue; - - if (nl < maxNl && tracks_view[*it].quality() > reject) - tracks_view[*it].quality() = reject; - } - } - } - - template - __global__ void kernel_tripletCleaner(TkSoAView tracks_view, - uint16_t nmin, - bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) { - // quality to mark rejected - auto const reject = pixelTrack::Quality::loose; - /// min quality of good - auto const good = pixelTrack::Quality::strict; - - auto &hitToTuple = *phitToTuple; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple.size(idx) < 2) - continue; - - float mc = maxScore; - uint16_t im = tkNotFound; - bool onlyTriplets = true; - - // check if only triplets - for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { - if (tracks_view[*it].quality() <= good) - continue; - onlyTriplets &= TracksUtilities::isTriplet(tracks_view, *it); - if (!onlyTriplets) - break; - } - - // only triplets - if (!onlyTriplets) - continue; - - // for triplets choose best tip! (should we first find best quality???) - for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { - auto const it = *ip; - if (tracks_view[it].quality() >= good && std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { - mc = std::abs(TracksUtilities::tip(tracks_view, it)); - im = it; - } - } - - if (tkNotFound == im) - continue; - - // mark worse ambiguities - for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { - auto const it = *ip; - if (tracks_view[it].quality() > reject && it != im) - tracks_view[it].quality() = reject; //no race: simple assignment of the same constant - } - - } // loop over hits - } - - template - __global__ void kernel_simpleTripletCleaner(TkSoAView tracks_view, - uint16_t nmin, - bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) { - // quality to mark rejected - auto const reject = pixelTrack::Quality::loose; - /// min quality of good - auto const good = pixelTrack::Quality::loose; - - auto &hitToTuple = *phitToTuple; - - int first = blockDim.x * blockIdx.x + threadIdx.x; - for (int idx = first, ntot = hitToTuple.nOnes(); idx < ntot; idx += gridDim.x * blockDim.x) { - if (hitToTuple.size(idx) < 2) - continue; - - float mc = maxScore; - uint16_t im = tkNotFound; - - // choose best tip! (should we first find best quality???) - for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { - auto const it = *ip; - if (tracks_view[it].quality() >= good && std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { - mc = std::abs(TracksUtilities::tip(tracks_view, it)); - im = it; - } - } - - if (tkNotFound == im) - continue; - - // mark worse ambiguities - for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { - auto const it = *ip; - if (tracks_view[it].quality() > reject && TracksUtilities::isTriplet(tracks_view, it) && - it != im) - tracks_view[it].quality() = reject; //no race: simple assignment of the same constant - } - - } // loop over hits - } - - template - __global__ void kernel_print_found_ntuplets(HitsConstView hh, - TkSoAView tracks_view, - HitToTuple const *__restrict__ phitToTuple, - int32_t firstPrint, - int32_t lastPrint, - int iev) { - constexpr auto loose = pixelTrack::Quality::loose; - - int first = firstPrint + blockDim.x * blockIdx.x + threadIdx.x; - for (int i = first, np = std::min(lastPrint, tracks_view.hitIndices().nOnes()); i < np; - i += blockDim.x * gridDim.x) { - auto nh = tracks_view.hitIndices().size(i); - if (nh < 3) - continue; - if (tracks_view[i].quality() < loose) - continue; - printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n", - 10000 * iev + i, - int(tracks_view[i].quality()), - nh, - tracks_view[i].nLayers(), - TracksUtilities::charge(tracks_view, i), - tracks_view[i].pt(), - tracks_view[i].eta(), - TracksUtilities::phi(tracks_view, i), - TracksUtilities::tip(tracks_view, i), - TracksUtilities::zip(tracks_view, i), - tracks_view[i].chi2(), - hh[*tracks_view.hitIndices().begin(i)].zGlobal(), - hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(), - hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(), - nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0, - nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0, - nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0, - nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0); - } - } - - __global__ void kernel_printCounters(Counters const *counters) { - auto const &c = *counters; - printf( - "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | " - "nDupHits | " - "nFishCells | " - "nKilledCells | " - "nUsedCells | nZeroTrackCells ||\n"); - printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", - c.nEvents, - c.nHits, - c.nCells, - c.nTuples, - c.nFitTracks, - c.nLooseTracks, - c.nGoodTracks, - c.nUsedHits, - c.nDupHits, - c.nFishCells, - c.nKilledCells, - c.nEmptyCells, - c.nZeroTrackCells); - printf( - "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| %.3f||\n", - c.nEvents, - c.nHits / double(c.nEvents), - c.nCells / double(c.nEvents), - c.nTuples / double(c.nEvents), - c.nFitTracks / double(c.nEvents), - c.nLooseTracks / double(c.nEvents), - c.nGoodTracks / double(c.nEvents), - c.nUsedHits / double(c.nEvents), - c.nDupHits / double(c.nEvents), - c.nFishCells / double(c.nCells), - c.nKilledCells / double(c.nCells), - c.nEmptyCells / double(c.nCells), - c.nZeroTrackCells / double(c.nCells)); - } - -} // namespace caHitNtupletGeneratorKernels diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc deleted file mode 100644 index c632d5f5a4fd7..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc +++ /dev/null @@ -1,427 +0,0 @@ -// -// Original Author: Felice Pantaleo, CERN -// - -//#define GPU_DEBUG -//#define DUMP_GPU_TK_TUPLES - -#include -#include -#include -#include - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "FWCore/Utilities/interface/EDMException.h" -#include "FWCore/Utilities/interface/isFinite.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "TrackingTools/DetLayers/interface/BarrelDetLayer.h" - -#include "CAHitNtupletGeneratorOnGPU.h" - -namespace { - - using namespace caHitNtupletGenerator; - using namespace gpuPixelDoublets; - using namespace pixelTopology; - using namespace pixelTrack; - - template - T sqr(T x) { - return x * x; - } - - //Common Params - AlgoParams makeCommonParams(edm::ParameterSet const& cfg) { - return AlgoParams({cfg.getParameter("onGPU"), - cfg.getParameter("minHitsForSharingCut"), - cfg.getParameter("useRiemannFit"), - cfg.getParameter("fitNas4"), - cfg.getParameter("includeJumpingForwardDoublets"), - cfg.getParameter("earlyFishbone"), - cfg.getParameter("lateFishbone"), - cfg.getParameter("fillStatistics"), - cfg.getParameter("doSharedHitCut"), - cfg.getParameter("dupPassThrough"), - cfg.getParameter("useSimpleTripletCleaner")}); - } - - //This is needed to have the partial specialization for isPhase1Topology/isPhase2Topology - template - struct topologyCuts {}; - - template - struct topologyCuts> { - static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { - return CAParamsT{{ - cfg.getParameter("maxNumberOfDoublets"), - cfg.getParameter("minHitsPerNtuplet"), - static_cast(cfg.getParameter("ptmin")), - static_cast(cfg.getParameter("CAThetaCutBarrel")), - static_cast(cfg.getParameter("CAThetaCutForward")), - static_cast(cfg.getParameter("hardCurvCut")), - static_cast(cfg.getParameter("dcaCutInnerTriplet")), - static_cast(cfg.getParameter("dcaCutOuterTriplet")), - }}; - }; - - static constexpr pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { - auto coeff = pset.getParameter>("chi2Coeff"); - auto ptMax = pset.getParameter("chi2MaxPt"); - - coeff[1] = (coeff[1] - coeff[0]) / log2(ptMax); - return pixelTrack::QualityCutsT{// polynomial coefficients for the pT-dependent chi2 cut - {(float)coeff[0], (float)coeff[1], 0.f, 0.f}, - // max pT used to determine the chi2 cut - (float)ptMax, - // chi2 scale factor: 8 for broken line fit, ?? for Riemann fit - (float)pset.getParameter("chi2Scale"), - // regional cuts for triplets - {(float)pset.getParameter("tripletMaxTip"), - (float)pset.getParameter("tripletMinPt"), - (float)pset.getParameter("tripletMaxZip")}, - // regional cuts for quadruplets - {(float)pset.getParameter("quadrupletMaxTip"), - (float)pset.getParameter("quadrupletMinPt"), - (float)pset.getParameter("quadrupletMaxZip")}}; - } - }; - - template - struct topologyCuts> { - static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { - return CAParamsT{{cfg.getParameter("maxNumberOfDoublets"), - cfg.getParameter("minHitsPerNtuplet"), - static_cast(cfg.getParameter("ptmin")), - static_cast(cfg.getParameter("CAThetaCutBarrel")), - static_cast(cfg.getParameter("CAThetaCutForward")), - static_cast(cfg.getParameter("hardCurvCut")), - static_cast(cfg.getParameter("dcaCutInnerTriplet")), - static_cast(cfg.getParameter("dcaCutOuterTriplet"))}, - {(bool)cfg.getParameter("includeFarForwards")}}; - } - - static constexpr pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { - return pixelTrack::QualityCutsT{ - static_cast(pset.getParameter("maxChi2")), - static_cast(pset.getParameter("minPt")), - static_cast(pset.getParameter("maxTip")), - static_cast(pset.getParameter("maxZip")), - }; - } - }; - - //Cell Cuts, as they are the cuts have the same logic for Phase2 and Phase1 - //keeping them separate would allow further differentiation in the future - //moving them to topologyCuts and using the same syntax - template - CellCutsT makeCellCuts(edm::ParameterSet const& cfg) { - return CellCutsT{cfg.getParameter("doClusterCut"), - cfg.getParameter("doZ0Cut"), - cfg.getParameter("doPtCut"), - cfg.getParameter("idealConditions"), - (float)cfg.getParameter("z0Cut"), - (float)cfg.getParameter("ptCut"), - cfg.getParameter("minYsizeB1"), - cfg.getParameter("minYsizeB2"), - cfg.getParameter>("phiCuts")}; - } - -} // namespace - -using namespace std; - -template -CAHitNtupletGeneratorOnGPU::CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, - edm::ConsumesCollector& iC) - : m_params(makeCommonParams(cfg), - makeCellCuts(cfg), - topologyCuts::makeQualityCuts(cfg.getParameterSet("trackQualityCuts")), - topologyCuts::makeCACuts(cfg)) { -#ifdef DUMP_GPU_TK_TUPLES - printf("TK: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", - "tid", - "qual", - "nh", - "nl", - "charge", - "pt", - "eta", - "phi", - "tip", - "zip", - "chi2", - "h1", - "h2", - "h3", - "h4", - "h5", - "hn"); -#endif -} - -template -void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) { - fillDescriptionsCommon(desc); - edm::LogWarning("CAHitNtupletGeneratorOnGPU::fillDescriptions") - << "Note: this fillDescriptions is a dummy one. Most probably you are missing some parameters. \n" - "please implement your TrackerTraits descriptions in CAHitNtupletGeneratorOnGPU. \n"; -} - -template <> -void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) { - fillDescriptionsCommon(desc); - - desc.add("idealConditions", true); - desc.add("includeJumpingForwardDoublets", false); - desc.add("z0Cut", 12.0); - desc.add("ptCut", 0.5); - - edm::ParameterSetDescription trackQualityCuts; - trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); - trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); - trackQualityCuts.add("chi2Scale", 8.) - ->setComment( - "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann " - "fit)"); - trackQualityCuts.add("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV"); - trackQualityCuts.add("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm"); - trackQualityCuts.add("tripletMaxZip", 12.)->setComment("Max |Zip| for triplets, in cm"); - trackQualityCuts.add("quadrupletMinPt", 0.3)->setComment("Min pT for quadruplets, in GeV"); - trackQualityCuts.add("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm"); - trackQualityCuts.add("quadrupletMaxZip", 12.)->setComment("Max |Zip| for quadruplets, in cm"); - - desc.add("minYsizeB1", 1)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 1)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( - "phiCuts", std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) - ->setComment("Cuts in phi for cells"); - - desc.add("trackQualityCuts", trackQualityCuts) - ->setComment( - "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply \"region " - "cuts\" based on the fit results (pT, Tip, Zip)."); -} - -template <> -void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) { - fillDescriptionsCommon(desc); - - desc.add("idealConditions", false); - desc.add("includeJumpingForwardDoublets", false); - desc.add("z0Cut", 10.0); - desc.add("ptCut", 0.0); - - edm::ParameterSetDescription trackQualityCuts; - trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); - trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); - trackQualityCuts.add("chi2Scale", 8.) - ->setComment( - "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann " - "fit)"); - trackQualityCuts.add("tripletMinPt", 0.0)->setComment("Min pT for triplets, in GeV"); - trackQualityCuts.add("tripletMaxTip", 0.1)->setComment("Max |Tip| for triplets, in cm"); - trackQualityCuts.add("tripletMaxZip", 6.)->setComment("Max |Zip| for triplets, in cm"); - trackQualityCuts.add("quadrupletMinPt", 0.0)->setComment("Min pT for quadruplets, in GeV"); - trackQualityCuts.add("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm"); - trackQualityCuts.add("quadrupletMaxZip", 6.)->setComment("Max |Zip| for quadruplets, in cm"); - - desc.add("minYsizeB1", 36)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 28)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( - "phiCuts", std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) - ->setComment("Cuts in phi for cells"); - - desc.add("trackQualityCuts", trackQualityCuts) - ->setComment( - "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply \"region " - "cuts\" based on the fit results (pT, Tip, Zip)."); -} - -template <> -void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) { - fillDescriptionsCommon(desc); - - desc.add("idealConditions", false); - desc.add("includeFarForwards", true); - desc.add("includeJumpingForwardDoublets", true); - desc.add("z0Cut", 7.5); - desc.add("ptCut", 0.85); - - edm::ParameterSetDescription trackQualityCuts; - trackQualityCuts.add("maxChi2", 5.)->setComment("Max normalized chi2"); - trackQualityCuts.add("minPt", 0.5)->setComment("Min pT in GeV"); - trackQualityCuts.add("maxTip", 0.3)->setComment("Max |Tip| in cm"); - trackQualityCuts.add("maxZip", 12.)->setComment("Max |Zip|, in cm"); - - desc.add("minYsizeB1", 25)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 15)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( - "phiCuts", std::vector(std::begin(phase2PixelTopology::phicuts), std::end(phase2PixelTopology::phicuts))) - ->setComment("Cuts in phi for cells"); - - desc.add("trackQualityCuts", trackQualityCuts) - ->setComment( - "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " - "Zip)."); -} - -template -void CAHitNtupletGeneratorOnGPU::fillDescriptionsCommon(edm::ParameterSetDescription& desc) { - // 87 cm/GeV = 1/(3.8T * 0.3) - // take less than radius given by the hardPtCut and reject everything below - // auto hardCurvCut = 1.f/(0.35 * 87.f); - desc.add("ptmin", 0.9)->setComment("Cut on minimum pt"); - desc.add("CAThetaCutBarrel", 0.002)->setComment("Cut on RZ alignement for Barrel"); - desc.add("CAThetaCutForward", 0.003)->setComment("Cut on RZ alignment for Forward"); - desc.add("hardCurvCut", 1. / (0.35 * 87.))->setComment("Cut on minimum curvature"); - desc.add("dcaCutInnerTriplet", 0.15)->setComment("Cut on origin radius when the inner hit is on BPix1"); - desc.add("dcaCutOuterTriplet", 0.25)->setComment("Cut on origin radius when the outer hit is on BPix1"); - desc.add("earlyFishbone", true); - desc.add("lateFishbone", false); - desc.add("fillStatistics", false); - desc.add("minHitsPerNtuplet", 4); - desc.add("maxNumberOfDoublets", TrackerTraits::maxNumberOfDoublets); - desc.add("minHitsForSharingCut", 10) - ->setComment("Maximum number of hits in a tuple to clean also if the shared hit is on bpx1"); - - desc.add("fitNas4", false)->setComment("fit only 4 hits out of N"); - desc.add("doClusterCut", true); - desc.add("doZ0Cut", true); - desc.add("doPtCut", true); - desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); - desc.add("doSharedHitCut", true)->setComment("Sharing hit nTuples cleaning"); - desc.add("dupPassThrough", false)->setComment("Do not reject duplicate"); - desc.add("useSimpleTripletCleaner", true)->setComment("use alternate implementation"); -} - -template -void CAHitNtupletGeneratorOnGPU::beginJob() { - if (m_params.onGPU_) { - // allocate pinned host memory only if CUDA is available - edm::Service cuda; - if (cuda and cuda->enabled()) { - cudaCheck(cudaMalloc(&m_counters, sizeof(Counters))); - cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters))); - } - } else { - m_counters = new Counters(); - memset(m_counters, 0, sizeof(Counters)); - } -} - -template -void CAHitNtupletGeneratorOnGPU::endJob() { - if (m_params.onGPU_) { - // print the gpu statistics and free pinned host memory only if CUDA is available - edm::Service cuda; - if (cuda and cuda->enabled()) { - if (m_params.doStats_) { - // crash on multi-gpu processes - CAHitNtupletGeneratorKernelsGPU::printCounters(m_counters); - } - cudaFree(m_counters); - } - } else { - if (m_params.doStats_) { - CAHitNtupletGeneratorKernelsCPU::printCounters(m_counters); - } - delete m_counters; - } -} - -template -TrackSoAHeterogeneousDevice CAHitNtupletGeneratorOnGPU::makeTuplesAsync( - HitsOnDevice const& hits_d, float bfield, cudaStream_t stream) const { - using HelixFitOnGPU = HelixFitOnGPU; - using TrackSoA = TrackSoAHeterogeneousDevice; - using GPUKernels = CAHitNtupletGeneratorKernelsGPU; - - TrackSoA tracks(stream); - - GPUKernels kernels(m_params); - kernels.setCounters(m_counters); - kernels.allocateOnGPU(hits_d.nHits(), stream); - - kernels.buildDoublets(hits_d.view(), hits_d.offsetBPIX2(), stream); - - kernels.launchKernels(hits_d.view(), tracks.view(), stream); - - HelixFitOnGPU fitter(bfield, m_params.fitNas4_); - fitter.allocateOnGPU(kernels.tupleMultiplicity(), tracks.view()); - if (m_params.useRiemannFit_) { - fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), TrackerTraits::maxNumberOfQuadruplets, stream); - } else { - fitter.launchBrokenLineKernels(hits_d.view(), hits_d.nHits(), TrackerTraits::maxNumberOfQuadruplets, stream); - } - kernels.classifyTuples(hits_d.view(), tracks.view(), stream); -#ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); - std::cout << "finished building pixel tracks on GPU" << std::endl; -#endif - - return tracks; -} - -template -TrackSoAHeterogeneousHost CAHitNtupletGeneratorOnGPU::makeTuples(HitsOnHost const& hits_h, - float bfield) const { - using HelixFitOnGPU = HelixFitOnGPU; - using TrackSoA = TrackSoAHeterogeneousHost; - using CPUKernels = CAHitNtupletGeneratorKernelsCPU; - - TrackSoA tracks; - - CPUKernels kernels(m_params); - kernels.setCounters(m_counters); - kernels.allocateOnGPU(hits_h.nHits(), nullptr); - - kernels.buildDoublets(hits_h.view(), hits_h.offsetBPIX2(), nullptr); - kernels.launchKernels(hits_h.view(), tracks.view(), nullptr); - - if (0 == hits_h.nHits()) - return tracks; - - // now fit - HelixFitOnGPU fitter(bfield, m_params.fitNas4_); - fitter.allocateOnGPU(kernels.tupleMultiplicity(), tracks.view()); - - if (m_params.useRiemannFit_) { - fitter.launchRiemannKernelsOnCPU(hits_h.view(), hits_h.nHits(), TrackerTraits::maxNumberOfQuadruplets); - } else { - fitter.launchBrokenLineKernelsOnCPU(hits_h.view(), hits_h.nHits(), TrackerTraits::maxNumberOfQuadruplets); - } - - kernels.classifyTuples(hits_h.view(), tracks.view(), nullptr); - -#ifdef GPU_DEBUG - std::cout << "finished building pixel tracks on CPU" << std::endl; -#endif - - // check that the fixed-size SoA does not overflow - auto maxTracks = tracks.view().metadata().size(); - auto nTracks = tracks.view().nTracks(); - assert(nTracks < maxTracks); - if (nTracks == maxTracks - 1) { - edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1 - << " candidates"; - } - - return tracks; -} - -template class CAHitNtupletGeneratorOnGPU; -template class CAHitNtupletGeneratorOnGPU; -template class CAHitNtupletGeneratorOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.h deleted file mode 100644 index 0f8d8f392c600..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorOnGPU_h -#define RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorOnGPU_h - -#include - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoAHost.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" - -#include "CAHitNtupletGeneratorKernels.h" -#include "GPUCACell.h" -#include "HelixFitOnGPU.h" - -namespace edm { - class Event; - class EventSetup; - class ParameterSetDescription; -} // namespace edm - -template -class CAHitNtupletGeneratorOnGPU { -public: - using HitsView = TrackingRecHitSoAView; - using HitsConstView = TrackingRecHitSoAConstView; - using HitsOnDevice = TrackingRecHitSoADevice; - using HitsOnHost = TrackingRecHitSoAHost; - using hindex_type = typename TrackingRecHitSoA::hindex_type; - - using HitToTuple = caStructures::HitToTupleT; - using TupleMultiplicity = caStructures::TupleMultiplicityT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - - using GPUCACell = GPUCACellT; - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - using HitContainer = typename TrackSoA::HitContainer; - using Tuple = HitContainer; - - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellTracksVector = caStructures::CellTracksVectorT; - - using Quality = pixelTrack::Quality; - - using QualityCuts = pixelTrack::QualityCutsT; - using Params = caHitNtupletGenerator::ParamsT; - using Counters = caHitNtupletGenerator::Counters; - -public: - CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector&& iC) - : CAHitNtupletGeneratorOnGPU(cfg, iC) {} - CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector& iC); - - static void fillDescriptions(edm::ParameterSetDescription& desc); - static void fillDescriptionsCommon(edm::ParameterSetDescription& desc); - - void beginJob(); - void endJob(); - - TrackSoADevice makeTuplesAsync(HitsOnDevice const& hits_d, float bfield, cudaStream_t stream) const; - - TrackSoAHost makeTuples(HitsOnHost const& hits_d, float bfield) const; - -private: - void buildDoublets(const HitsConstView& hh, cudaStream_t stream) const; - - void hitNtuplets(const HitsConstView& hh, const edm::EventSetup& es, bool useRiemannFit, cudaStream_t cudaStream); - - void launchKernels(const HitsConstView& hh, bool useRiemannFit, cudaStream_t cudaStream) const; - - Params m_params; - - Counters* m_counters = nullptr; -}; - -#endif // RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorOnGPU_h diff --git a/RecoTracker/PixelSeeding/plugins/CAStructures.h b/RecoTracker/PixelSeeding/plugins/CAStructures.h deleted file mode 100644 index a193f9333d255..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/CAStructures.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_CAStructures_h -#define RecoTracker_PixelSeeding_plugins_CAStructures_h - -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" - -namespace caStructures { - - // types - // using typename TrackerTraits::hindex_type = uint32_t; // FIXME from siPixelRecHitsHeterogeneousProduct - // using typename TrackerTraits::tindex_type = uint32_t; // for tuples - // using typename TrackerTraits::cindex_type = uint32_t; // for cells - - template - using CellNeighborsT = cms::cuda::VecArray; - - template - using CellTracksT = cms::cuda::VecArray; - - template - using CellNeighborsVectorT = cms::cuda::SimpleVector>; - - template - using CellTracksVectorT = cms::cuda::SimpleVector>; - - template - using OuterHitOfCellContainerT = cms::cuda::VecArray; - - template - using TupleMultiplicityT = cms::cuda::OneToManyAssoc; - - template - using HitToTupleT = cms::cuda::OneToManyAssoc; // 3.5 should be enough - - template - using TuplesContainerT = cms::cuda::OneToManyAssoc; - - template - struct OuterHitOfCellT { - OuterHitOfCellContainerT* container; - int32_t offset; - constexpr auto& operator[](int i) { return container[i - offset]; } - constexpr auto const& operator[](int i) const { return container[i - offset]; } - }; - -} // namespace caStructures - -#endif diff --git a/RecoTracker/PixelSeeding/plugins/GPUCACell.h b/RecoTracker/PixelSeeding/plugins/GPUCACell.h deleted file mode 100644 index 3839179277133..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/GPUCACell.h +++ /dev/null @@ -1,390 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_GPUCACell_h -#define RecoTracker_PixelSeeding_plugins_GPUCACell_h - -// -// Author: Felice Pantaleo, CERN -// - -// #define ONLY_TRIPLETS_IN_HOLE - -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoTracker/PixelSeeding/interface/CircleEq.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "CAStructures.h" - -template -class GPUCACellT { -public: - using PtrAsInt = unsigned long long; - - static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit; - using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - using CellNeighbors = caStructures::CellNeighborsT; - using CellTracks = caStructures::CellTracksT; - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellTracksVector = caStructures::CellTracksVectorT; - - using HitsConstView = TrackingRecHitSoAConstView; - using hindex_type = typename TrackerTraits::hindex_type; - using tindex_type = typename TrackerTraits::tindex_type; - static constexpr auto invalidHitId = std::numeric_limits::max(); - - using TmpTuple = cms::cuda::VecArray; - - using HitContainer = typename TrackSoA::HitContainer; - using Quality = pixelTrack::Quality; - static constexpr auto bad = pixelTrack::Quality::bad; - - enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 }; - - GPUCACellT() = default; - - __device__ __forceinline__ void init(CellNeighborsVector& cellNeighbors, - CellTracksVector& cellTracks, - const HitsConstView& hh, - int layerPairId, - hindex_type innerHitId, - hindex_type outerHitId) { - theInnerHitId = innerHitId; - theOuterHitId = outerHitId; - theLayerPairId_ = layerPairId; - theStatus_ = 0; - theFishboneId = invalidHitId; - - // optimization that depends on access pattern - theInnerZ = hh[innerHitId].zGlobal(); - theInnerR = hh[innerHitId].rGlobal(); - - // link to default empty - theOuterNeighbors = &cellNeighbors[0]; - theTracks = &cellTracks[0]; - assert(outerNeighbors().empty()); - assert(tracks().empty()); - } - - __device__ __forceinline__ int addOuterNeighbor(typename TrackerTraits::cindex_type t, - CellNeighborsVector& cellNeighbors) { - // use smart cache - if (outerNeighbors().empty()) { - auto i = cellNeighbors.extend(); // maybe wasted.... - if (i > 0) { - cellNeighbors[i].reset(); - __threadfence(); -#ifdef __CUDACC__ - auto zero = (PtrAsInt)(&cellNeighbors[0]); - atomicCAS((PtrAsInt*)(&theOuterNeighbors), - zero, - (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back... -#else - theOuterNeighbors = &cellNeighbors[i]; -#endif - } else - return -1; - } - __threadfence(); - return outerNeighbors().push_back(t); - } - - __device__ __forceinline__ int addTrack(tindex_type t, CellTracksVector& cellTracks) { - if (tracks().empty()) { - auto i = cellTracks.extend(); // maybe wasted.... - if (i > 0) { - cellTracks[i].reset(); - __threadfence(); -#ifdef __CUDACC__ - auto zero = (PtrAsInt)(&cellTracks[0]); - atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back... -#else - theTracks = &cellTracks[i]; -#endif - } else - return -1; - } - __threadfence(); - return tracks().push_back(t); - } - - __device__ __forceinline__ CellTracks& tracks() { return *theTracks; } - __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; } - __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } - __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } - __device__ __forceinline__ float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); } - __device__ __forceinline__ float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); } - __device__ __forceinline__ float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); } - __device__ __forceinline__ float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); } - __device__ __forceinline__ float inner_z(const HitsConstView& hh) const { return theInnerZ; } - // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; } - __device__ __forceinline__ float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); } - __device__ __forceinline__ float inner_r(const HitsConstView& hh) const { return theInnerR; } - // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; } - __device__ __forceinline__ float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); } - - __device__ __forceinline__ auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); } - __device__ __forceinline__ auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); } - - __device__ __forceinline__ float inner_detIndex(const HitsConstView& hh) const { - return hh[theInnerHitId].detectorIndex(); - } - __device__ __forceinline__ float outer_detIndex(const HitsConstView& hh) const { - return hh[theOuterHitId].detectorIndex(); - } - - constexpr unsigned int inner_hit_id() const { return theInnerHitId; } - constexpr unsigned int outer_hit_id() const { return theOuterHitId; } - - __device__ void print_cell() const { - printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n", - theLayerPairId_, - theInnerHitId, - theOuterHitId); - } - - __device__ bool check_alignment(const HitsConstView& hh, - GPUCACellT const& otherCell, - const float ptmin, - const float hardCurvCut, - const float caThetaCutBarrel, - const float caThetaCutForward, - const float dcaCutInnerTriplet, - const float dcaCutOuterTriplet) const { - // detIndex of the layerStart for the Phase1 Pixel Detector: - // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] - // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] - auto ri = inner_r(hh); - auto zi = inner_z(hh); - - auto ro = outer_r(hh); - auto zo = outer_z(hh); - - auto r1 = otherCell.inner_r(hh); - auto z1 = otherCell.inner_z(hh); - auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex; - bool aligned = areAlignedRZ(r1, - z1, - ri, - zi, - ro, - zo, - ptmin, - isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts - return (aligned && dcaCut(hh, - otherCell, - otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet - : dcaCutOuterTriplet, - hardCurvCut)); // FIXME tune cuts - } - - __device__ __forceinline__ static bool areAlignedRZ( - float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { - float radius_diff = std::abs(r1 - ro); - float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); - - float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by - // radius_diff later - - float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri)); - return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff; - } - - __device__ inline bool dcaCut(const HitsConstView& hh, - GPUCACellT const& otherCell, - const float region_origin_radius_plus_tolerance, - const float maxCurv) const { - auto x1 = otherCell.inner_x(hh); - auto y1 = otherCell.inner_y(hh); - - auto x2 = inner_x(hh); - auto y2 = inner_y(hh); - - auto x3 = outer_x(hh); - auto y3 = outer_y(hh); - - CircleEq eq(x1, y1, x2, y2, x3, y3); - - if (eq.curvature() > maxCurv) - return false; - - return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); - } - - __device__ __forceinline__ static bool dcaCutH(float x1, - float y1, - float x2, - float y2, - float x3, - float y3, - const float region_origin_radius_plus_tolerance, - const float maxCurv) { - CircleEq eq(x1, y1, x2, y2, x3, y3); - - if (eq.curvature() > maxCurv) - return false; - - return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); - } - - __device__ inline bool hole0(const HitsConstView& hh, GPUCACellT const& innerCell) const { - using namespace phase1PixelTopology; - - int p = innerCell.inner_iphi(hh); - if (p < 0) - p += std::numeric_limits::max(); - p = (max_ladder_bpx0 * p) / std::numeric_limits::max(); - p %= max_ladder_bpx0; - auto il = first_ladder_bpx0 + p; - auto r0 = hh.averageGeometry().ladderR[il]; - auto ri = innerCell.inner_r(hh); - auto zi = innerCell.inner_z(hh); - auto ro = outer_r(hh); - auto zo = outer_z(hh); - auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri); - auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]); - auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0); - auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0); - return gap; - } - - __device__ inline bool hole4(const HitsConstView& hh, GPUCACellT const& innerCell) const { - using namespace phase1PixelTopology; - - int p = outer_iphi(hh); - if (p < 0) - p += std::numeric_limits::max(); - p = (max_ladder_bpx4 * p) / std::numeric_limits::max(); - p %= max_ladder_bpx4; - auto il = first_ladder_bpx4 + p; - auto r4 = hh.averageGeometry().ladderR[il]; - auto ri = innerCell.inner_r(hh); - auto zi = innerCell.inner_z(hh); - auto ro = outer_r(hh); - auto zo = outer_z(hh); - auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri); - auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]); - auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4); - auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4); - auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0]; - auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1]; - return gap || holeP || holeN; - } - - // trying to free the track building process from hardcoded layers, leaving - // the visit of the graph based on the neighborhood connections between cells. - - template - __device__ inline void find_ntuplets(const HitsConstView& hh, - GPUCACellT* __restrict__ cells, - CellTracksVector& cellTracks, - HitContainer& foundNtuplets, - cms::cuda::AtomicPairCounter& apc, - Quality* __restrict__ quality, - TmpTuple& tmpNtuplet, - const unsigned int minHitsPerNtuplet, - bool startAt0) const { - // the building process for a track ends if: - // it has no right neighbor - // it has no compatible neighbor - // the ntuplets is then saved if the number of hits it contains is greater - // than a threshold - - if constexpr (DEPTH <= 0) { - printf("ERROR: GPUCACellT::find_ntuplets reached full depth!\n"); -#ifdef __CUDA_ARCH__ - __trap(); -#else - abort(); -#endif - } else { - auto doubletId = this - cells; - tmpNtuplet.push_back_unsafe(doubletId); - assert(tmpNtuplet.size() <= - int(TrackerTraits::maxHitsOnTrack - - 3)); //1 for the container, 1 because these are doublets, 1 because we may push another - - bool last = true; - for (unsigned int otherCell : outerNeighbors()) { - if (cells[otherCell].isKilled()) - continue; // killed by earlyFishbone - last = false; - cells[otherCell].template find_ntuplets( - hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); - } - - if (last) { // if long enough save... - if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { -#ifdef ONLY_TRIPLETS_IN_HOLE - // triplets accepted only pointing to the hole - if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || - ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) -#endif - { - hindex_type hits[TrackerTraits::maxDepth + 2]; - auto nh = 0U; - constexpr int maxFB = 2; // for the time being let's limit this - int nfb = 0; - for (auto c : tmpNtuplet) { - hits[nh++] = cells[c].theInnerHitId; - if (nfb < maxFB && cells[c].hasFishbone()) { - ++nfb; - hits[nh++] = cells[c].theFishboneId; // fishbone hit is always outer than inner hit - } - } - assert(nh < TrackerTraits::maxHitsOnTrack); - hits[nh] = theOuterHitId; - auto it = foundNtuplets.bulkFill(apc, hits, nh + 1); - if (it >= 0) { // if negative is overflow.... - for (auto c : tmpNtuplet) - cells[c].addTrack(it, cellTracks); - quality[it] = bad; // initialize to bad - } - } - } - } - tmpNtuplet.pop_back(); - assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1)); - } - } - - // Cell status management - __device__ __forceinline__ void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } - __device__ __forceinline__ bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } - - __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; } - - __device__ __forceinline__ bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } - __device__ __forceinline__ void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } - - __device__ __forceinline__ void setFishbone(hindex_type id, float z, const HitsConstView& hh) { - // make it deterministic: use the farther apart (in z) - auto old = theFishboneId; - while (old != - atomicCAS( - &theFishboneId, - old, - (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old)) - old = theFishboneId; - } - __device__ __forceinline__ auto fishboneId() const { return theFishboneId; } - __device__ __forceinline__ bool hasFishbone() const { return theFishboneId != invalidHitId; } - -private: - CellNeighbors* theOuterNeighbors; - CellTracks* theTracks; - - int16_t theLayerPairId_; - uint16_t theStatus_; // tbd - - float theInnerZ; - float theInnerR; - hindex_type theInnerHitId; - hindex_type theOuterHitId; - hindex_type theFishboneId; -}; - -#endif // RecoTracker_PixelSeeding_plugins_GPUCACell_h diff --git a/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.cc b/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.cc deleted file mode 100644 index c36ed924911f0..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.cc +++ /dev/null @@ -1,22 +0,0 @@ -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HelixFitOnGPU.h" - -template -void HelixFitOnGPU::allocateOnGPU(TupleMultiplicity const *tupleMultiplicity, - OutputSoAView &helix_fit_results) { - tuples_ = &helix_fit_results.hitIndices(); - tupleMultiplicity_ = tupleMultiplicity; - outputSoa_ = helix_fit_results; - - assert(tuples_); - assert(tupleMultiplicity_); - assert(outputSoa_.chi2()); - assert(outputSoa_.pt()); -} - -template -void HelixFitOnGPU::deallocateOnGPU() {} - -template class HelixFitOnGPU; -template class HelixFitOnGPU; -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.h b/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.h deleted file mode 100644 index 29916ab816d30..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/HelixFitOnGPU.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_HelixFitOnGPU_h -#define RecoTracker_PixelSeeding_plugins_HelixFitOnGPU_h - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "RecoTracker/PixelTrackFitting/interface/FitResult.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -#include "CAStructures.h" - -namespace riemannFit { - // in case of memory issue can be made smaller - constexpr uint32_t maxNumberOfConcurrentFits = 32 * 1024; - constexpr uint32_t stride = maxNumberOfConcurrentFits; - using Matrix3x4d = Eigen::Matrix; - using Map3x4d = Eigen::Map >; - using Matrix6x4f = Eigen::Matrix; - using Map6x4f = Eigen::Map >; - - // hits - template - using Matrix3xNd = Eigen::Matrix; - template - using Map3xNd = Eigen::Map, 0, Eigen::Stride<3 * stride, stride> >; - // errors - template - using Matrix6xNf = Eigen::Matrix; - template - using Map6xNf = Eigen::Map, 0, Eigen::Stride<6 * stride, stride> >; - // fast fit - using Map4d = Eigen::Map >; - - template //a compile-time bounded for loop - constexpr void rolling_fits(F &&f) { - if constexpr (Start < End) { - f(std::integral_constant()); - rolling_fits(f); - } - } - -} // namespace riemannFit - -template -class HelixFitOnGPU { -public: - using TrackingRecHitSoAs = TrackingRecHitSoA; - - using HitView = TrackingRecHitSoAView; - using HitConstView = TrackingRecHitSoAConstView; - - using Tuples = typename TrackSoA::HitContainer; - using OutputSoAView = TrackSoAView; - - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - explicit HelixFitOnGPU(float bf, bool fitNas4) : bField_(bf), fitNas4_(fitNas4) {} - ~HelixFitOnGPU() { deallocateOnGPU(); } - - void setBField(double bField) { bField_ = bField; } - void launchRiemannKernels(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples, cudaStream_t cudaStream); - void launchBrokenLineKernels(const HitConstView &hv, - uint32_t nhits, - uint32_t maxNumberOfTuples, - cudaStream_t cudaStream); - - void launchRiemannKernelsOnCPU(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples); - void launchBrokenLineKernelsOnCPU(const HitConstView &hv, uint32_t nhits, uint32_t maxNumberOfTuples); - - void allocateOnGPU(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results); - void deallocateOnGPU(); - -private: - static constexpr uint32_t maxNumberOfConcurrentFits_ = riemannFit::maxNumberOfConcurrentFits; - - // fowarded - Tuples const *tuples_ = nullptr; - TupleMultiplicity const *tupleMultiplicity_ = nullptr; - OutputSoAView outputSoa_; - float bField_; - - const bool fitNas4_; -}; - -#endif // RecoTracker_PixelSeeding_plugins_HelixFitOnGPU_h diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.cc b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.cc deleted file mode 100644 index 4f9037da0ceb7..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.cc +++ /dev/null @@ -1,120 +0,0 @@ -#include "RiemannFitOnGPU.h" - -template -void HelixFitOnGPU::launchRiemannKernelsOnCPU(const TrackingRecHitSoAConstView &hv, - uint32_t nhits, - uint32_t maxNumberOfTuples) { - assert(tuples_); - - // Fit internals - auto hitsGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double)); - auto hits_geGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float)); - auto fast_fit_resultsGPU = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); - auto circle_fit_resultsGPU_holder = - std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit)); - riemannFit::CircleFit *circle_fit_resultsGPU = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get()); - - for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { - // triplets - kernel_FastFit<3, TrackerTraits>( - tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - - kernel_CircleFit<3, TrackerTraits>(tupleMultiplicity_, - 3, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - kernel_LineFit<3, TrackerTraits>(tupleMultiplicity_, - 3, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - // quads - kernel_FastFit<4, TrackerTraits>( - tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - - kernel_CircleFit<4, TrackerTraits>(tupleMultiplicity_, - 4, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - kernel_LineFit<4, TrackerTraits>(tupleMultiplicity_, - 4, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - if (fitNas4_) { - // penta - kernel_FastFit<4, TrackerTraits>( - tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - - kernel_CircleFit<4, TrackerTraits>(tupleMultiplicity_, - 5, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - kernel_LineFit<4, TrackerTraits>(tupleMultiplicity_, - 5, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - } else { - // penta all 5 - kernel_FastFit<5, TrackerTraits>( - tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - - kernel_CircleFit<5, TrackerTraits>(tupleMultiplicity_, - 5, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - - kernel_LineFit<5, TrackerTraits>(tupleMultiplicity_, - 5, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU, - offset); - } - } -} - -template class HelixFitOnGPU; -template class HelixFitOnGPU; -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.h b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.h deleted file mode 100644 index d54103a75e6ac..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.h +++ /dev/null @@ -1,191 +0,0 @@ -// -// Author: Felice Pantaleo, CERN -// - -#include - -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "RecoTracker/PixelTrackFitting/interface/RiemannFit.h" - -#include "HelixFitOnGPU.h" - -template -using Tuples = typename TrackSoA::HitContainer; -template -using OutputSoAView = TrackSoAView; -template -using TupleMultiplicity = caStructures::TupleMultiplicityT; - -template -__global__ void kernel_FastFit(Tuples const *__restrict__ foundNtuplets, - TupleMultiplicity const *__restrict__ tupleMultiplicity, - uint32_t nHits, - TrackingRecHitSoAConstView hh, - double *__restrict__ phits, - float *__restrict__ phits_ge, - double *__restrict__ pfast_fit, - uint32_t offset) { - constexpr uint32_t hitsInFit = N; - - assert(hitsInFit <= nHits); - - assert(pfast_fit); - assert(foundNtuplets); - assert(tupleMultiplicity); - - // look in bin for this hit multiplicity - auto local_start = blockIdx.x * blockDim.x + threadIdx.x; - -#ifdef RIEMANN_DEBUG - if (0 == local_start) - printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit); -#endif - - for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; - local_idx += gridDim.x * blockDim.x) { - auto tuple_idx = local_idx + offset; - if (tuple_idx >= tupleMultiplicity->size(nHits)) - break; - - // get it from the ntuple container (one to one to helix) - auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); - assert(int(tkid) < foundNtuplets->nOnes()); - - assert(foundNtuplets->size(tkid) == nHits); - - riemannFit::Map3xNd hits(phits + local_idx); - riemannFit::Map4d fast_fit(pfast_fit + local_idx); - riemannFit::Map6xNf hits_ge(phits_ge + local_idx); - - // Prepare data structure - auto const *hitId = foundNtuplets->begin(tkid); - for (unsigned int i = 0; i < hitsInFit; ++i) { - auto hit = hitId[i]; - float ge[6]; - hh.cpeParams().detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); - - hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal(); - hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; - } - riemannFit::fastFit(hits, fast_fit); - - // no NaN here.... - assert(fast_fit(0) == fast_fit(0)); - assert(fast_fit(1) == fast_fit(1)); - assert(fast_fit(2) == fast_fit(2)); - assert(fast_fit(3) == fast_fit(3)); - } -} - -template -__global__ void kernel_CircleFit(TupleMultiplicity const *__restrict__ tupleMultiplicity, - uint32_t nHits, - double bField, - double *__restrict__ phits, - float *__restrict__ phits_ge, - double *__restrict__ pfast_fit_input, - riemannFit::CircleFit *circle_fit, - uint32_t offset) { - assert(circle_fit); - assert(N <= nHits); - - // same as above... - - // look in bin for this hit multiplicity - auto local_start = blockIdx.x * blockDim.x + threadIdx.x; - for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; - local_idx += gridDim.x * blockDim.x) { - auto tuple_idx = local_idx + offset; - if (tuple_idx >= tupleMultiplicity->size(nHits)) - break; - - riemannFit::Map3xNd hits(phits + local_idx); - riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); - riemannFit::Map6xNf hits_ge(phits_ge + local_idx); - - riemannFit::VectorNd rad = (hits.block(0, 0, 2, N).colwise().norm()); - - riemannFit::Matrix2Nd hits_cov = riemannFit::Matrix2Nd::Zero(); - riemannFit::loadCovariance2D(hits_ge, hits_cov); - - circle_fit[local_idx] = riemannFit::circleFit(hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true); - -#ifdef RIEMANN_DEBUG -// auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); -// printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid, -// circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2)); -#endif - } -} - -template -__global__ void kernel_LineFit(TupleMultiplicity const *__restrict__ tupleMultiplicity, - uint32_t nHits, - double bField, - OutputSoAView results_view, - double *__restrict__ phits, - float *__restrict__ phits_ge, - double *__restrict__ pfast_fit_input, - riemannFit::CircleFit *__restrict__ circle_fit, - uint32_t offset) { - assert(circle_fit); - assert(N <= nHits); - - // same as above... - - // look in bin for this hit multiplicity - auto local_start = (blockIdx.x * blockDim.x + threadIdx.x); - for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; - local_idx += gridDim.x * blockDim.x) { - auto tuple_idx = local_idx + offset; - if (tuple_idx >= tupleMultiplicity->size(nHits)) - break; - - // get it for the ntuple container (one to one to helix) - int32_t tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); - - riemannFit::Map3xNd hits(phits + local_idx); - riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); - riemannFit::Map6xNf hits_ge(phits_ge + local_idx); - - auto const &line_fit = riemannFit::lineFit(hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true); - - riemannFit::fromCircleToPerigee(circle_fit[local_idx]); - - TracksUtilities::copyFromCircle(results_view, - circle_fit[local_idx].par, - circle_fit[local_idx].cov, - line_fit.par, - line_fit.cov, - 1.f / float(bField), - tkid); - results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2)); - results_view[tkid].eta() = asinhf(line_fit.par(0)); - results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5); - -#ifdef RIEMANN_DEBUG - printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", - N, - nHits, - tkid, - circle_fit[local_idx].par(0), - circle_fit[local_idx].par(1), - circle_fit[local_idx].par(2)); - printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1)); - printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n", - circle_fit[local_idx].chi2, - line_fit.chi2, - circle_fit[local_idx].cov(0, 0), - circle_fit[local_idx].cov(1, 1), - circle_fit[local_idx].cov(2, 2), - line_fit.cov(0, 0), - line_fit.cov(1, 1)); -#endif - } -} diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.icc b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.icc deleted file mode 100644 index 44176f4bf9a25..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU.icc +++ /dev/null @@ -1,132 +0,0 @@ -#include "RiemannFitOnGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -template -void HelixFitOnGPU::launchRiemannKernels(const TrackingRecHitSoAConstView &hv, - uint32_t nhits, - uint32_t maxNumberOfTuples, - cudaStream_t stream) { - assert(tuples_); - - auto blockSize = 64; - auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; - - // Fit internals - auto hitsGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double), stream); - auto hits_geGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float), stream); - auto fast_fit_resultsGPU = cms::cuda::make_device_unique( - maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream); - auto circle_fit_resultsGPU_holder = - cms::cuda::make_device_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit), stream); - riemannFit::CircleFit *circle_fit_resultsGPU_ = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get()); - - for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { - // triplets - kernel_FastFit<3, TrackerTraits><<>>( - tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - cudaCheck(cudaGetLastError()); - - kernel_CircleFit<3, TrackerTraits><<>>(tupleMultiplicity_, - 3, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - kernel_LineFit<3, TrackerTraits><<>>(tupleMultiplicity_, - 3, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - // quads - kernel_FastFit<4, TrackerTraits><<>>( - tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - cudaCheck(cudaGetLastError()); - - kernel_CircleFit<4, TrackerTraits><<>>(tupleMultiplicity_, - 4, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - kernel_LineFit<4, TrackerTraits><<>>(tupleMultiplicity_, - 4, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - if (fitNas4_) { - // penta - kernel_FastFit<4, TrackerTraits><<>>( - tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - cudaCheck(cudaGetLastError()); - - kernel_CircleFit<4, TrackerTraits><<>>(tupleMultiplicity_, - 5, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - kernel_LineFit<4, TrackerTraits><<>>(tupleMultiplicity_, - 5, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - } else { - // penta all 5 - kernel_FastFit<5, TrackerTraits><<>>( - tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); - cudaCheck(cudaGetLastError()); - - kernel_CircleFit<5, TrackerTraits><<>>(tupleMultiplicity_, - 5, - bField_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - - kernel_LineFit<5, TrackerTraits><<>>(tupleMultiplicity_, - 5, - bField_, - outputSoa_, - hitsGPU.get(), - hits_geGPU.get(), - fast_fit_resultsGPU.get(), - circle_fit_resultsGPU_, - offset); - cudaCheck(cudaGetLastError()); - } - } -} diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_HIonPhase1.cu b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_HIonPhase1.cu deleted file mode 100644 index f965f36de84ef..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_HIonPhase1.cu +++ /dev/null @@ -1,3 +0,0 @@ -#include "RiemannFitOnGPU.icc" - -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase1.cu b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase1.cu deleted file mode 100644 index e67ea7c9b2b0a..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase1.cu +++ /dev/null @@ -1,3 +0,0 @@ -#include "RiemannFitOnGPU.icc" - -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase2.cu b/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase2.cu deleted file mode 100644 index 04480eeb5da17..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/RiemannFitOnGPU_Phase2.cu +++ /dev/null @@ -1,3 +0,0 @@ -#include "RiemannFitOnGPU.icc" - -template class HelixFitOnGPU; diff --git a/RecoTracker/PixelSeeding/plugins/gpuFishbone.h b/RecoTracker/PixelSeeding/plugins/gpuFishbone.h deleted file mode 100644 index 76b5213e84f5b..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/gpuFishbone.h +++ /dev/null @@ -1,119 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_gpuFishbone_h -#define RecoTracker_PixelSeeding_plugins_gpuFishbone_h - -#include -#include -#include -#include -#include - -#include "DataFormats/Math/interface/approx_atan2.h" -#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "GPUCACell.h" -#include "CAStructures.h" - -namespace gpuPixelDoublets { - - template - using CellNeighbors = caStructures::CellNeighborsT; - template - using CellTracks = caStructures::CellTracksT; - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - template - using CellTracksVector = caStructures::CellTracksVectorT; - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; - template - using HitsConstView = typename GPUCACellT::HitsConstView; - - template - __global__ void fishbone(HitsConstView hh, - GPUCACellT* cells, - uint32_t const* __restrict__ nCells, - OuterHitOfCell const isOuterHitOfCellWrap, - int32_t nHits, - bool checkTrack) { - constexpr auto maxCellsPerHit = GPUCACellT::maxCellsPerHit; - - auto const isOuterHitOfCell = isOuterHitOfCellWrap.container; - int32_t offset = isOuterHitOfCellWrap.offset; - - // x runs faster... - auto firstY = threadIdx.y + blockIdx.y * blockDim.y; - auto firstX = threadIdx.x; - - float x[maxCellsPerHit], y[maxCellsPerHit], z[maxCellsPerHit], n[maxCellsPerHit]; - uint32_t cc[maxCellsPerHit]; - uint16_t d[maxCellsPerHit]; - uint8_t l[maxCellsPerHit]; - - for (int idy = firstY, nt = nHits - offset; idy < nt; idy += gridDim.y * blockDim.y) { - auto const& vc = isOuterHitOfCell[idy]; - auto size = vc.size(); - if (size < 2) - continue; - // if alligned kill one of the two. - // in principle one could try to relax the cut (only in r-z?) for jumping-doublets - auto const& c0 = cells[vc[0]]; - auto xo = c0.outer_x(hh); - auto yo = c0.outer_y(hh); - auto zo = c0.outer_z(hh); - auto sg = 0; - for (int32_t ic = 0; ic < size; ++ic) { - auto& ci = cells[vc[ic]]; - if (ci.unused()) - continue; // for triplets equivalent to next - if (checkTrack && ci.tracks().empty()) - continue; - cc[sg] = vc[ic]; - l[sg] = ci.layerPairId(); - d[sg] = ci.inner_detIndex(hh); - x[sg] = ci.inner_x(hh) - xo; - y[sg] = ci.inner_y(hh) - yo; - z[sg] = ci.inner_z(hh) - zo; - n[sg] = x[sg] * x[sg] + y[sg] * y[sg] + z[sg] * z[sg]; - ++sg; - } - if (sg < 2) - continue; - // here we parallelize - for (int32_t ic = firstX; ic < sg - 1; ic += blockDim.x) { - auto& ci = cells[cc[ic]]; - for (auto jc = ic + 1; jc < sg; ++jc) { - auto& cj = cells[cc[jc]]; - // must be different detectors - // if (d[ic]==d[jc]) continue; - auto cos12 = x[ic] * x[jc] + y[ic] * y[jc] + z[ic] * z[jc]; - if (d[ic] != d[jc] && cos12 * cos12 >= 0.99999f * (n[ic] * n[jc])) { - // alligned: kill farthest (prefer consecutive layers) - // if same layer prefer farthest (longer level arm) and make space for intermediate hit - bool sameLayer = l[ic] == l[jc]; - if (n[ic] > n[jc]) { - if (sameLayer) { - cj.kill(); // closest - ci.setFishbone(cj.inner_hit_id(), cj.inner_z(hh), hh); - } else { - ci.kill(); // farthest - // break; // removed to improve reproducibility. keep it for reference and tests - } - } else { - if (!sameLayer) { - cj.kill(); // farthest - } else { - ci.kill(); // closest - cj.setFishbone(ci.inner_hit_id(), ci.inner_z(hh), hh); - // break; // removed to improve reproducibility. keep it for reference and tests - } - } - } - } // cj - } // ci - } // hits - } - -} // namespace gpuPixelDoublets - -#endif // RecoTracker_PixelSeeding_plugins_gpuFishbone_h diff --git a/RecoTracker/PixelSeeding/plugins/gpuPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/gpuPixelDoublets.h deleted file mode 100644 index 472fa7117fd77..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/gpuPixelDoublets.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_gpuPixelDoublets_h -#define RecoTracker_PixelSeeding_plugins_gpuPixelDoublets_h - -#include "RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h" - -#define CONSTANT_VAR __constant__ - -namespace gpuPixelDoublets { - - template - __global__ void initDoublets(OuterHitOfCell isOuterHitOfCell, - int nHits, - CellNeighborsVector* cellNeighbors, - CellNeighbors* cellNeighborsContainer, - CellTracksVector* cellTracks, - CellTracks* cellTracksContainer) { - assert(isOuterHitOfCell.container); - int first = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = first; i < nHits - isOuterHitOfCell.offset; i += gridDim.x * blockDim.x) - isOuterHitOfCell.container[i].reset(); - - if (0 == first) { - cellNeighbors->construct(TrackerTraits::maxNumOfActiveDoublets, cellNeighborsContainer); - cellTracks->construct(TrackerTraits::maxNumOfActiveDoublets, cellTracksContainer); - auto i = cellNeighbors->extend(); - assert(0 == i); - (*cellNeighbors)[0].reset(); - i = cellTracks->extend(); - assert(0 == i); - (*cellTracks)[0].reset(); - } - } - - constexpr auto getDoubletsFromHistoMaxBlockSize = 64; // for both x and y - constexpr auto getDoubletsFromHistoMinBlocksPerMP = 16; - - template - __global__ -#ifdef __CUDACC__ - __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) -#endif - void getDoubletsFromHisto(GPUCACellT* cells, - uint32_t* nCells, - CellNeighborsVector* cellNeighbors, - CellTracksVector* cellTracks, - HitsConstView hh, - OuterHitOfCell isOuterHitOfCell, - int nActualPairs, - const int maxNumOfDoublets, - CellCutsT* const cuts) { - - doubletsFromHisto( - nActualPairs, maxNumOfDoublets, cells, nCells, cellNeighbors, cellTracks, hh, isOuterHitOfCell, *cuts); - } - -} // namespace gpuPixelDoublets - -#endif // RecoTracker_PixelSeeding_plugins_gpuPixelDoublets_h diff --git a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h deleted file mode 100644 index a15095487b097..0000000000000 --- a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h +++ /dev/null @@ -1,315 +0,0 @@ -#ifndef RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h -#define RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h - -#include -#include -#include -#include -#include - -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -#include "DataFormats/Math/interface/approx_atan2.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "CAStructures.h" -#include "GPUCACell.h" - -//#define GPU_DEBUG -//#define NTUPLE_DEBUG - -namespace gpuPixelDoublets { - - template - using CellNeighbors = caStructures::CellNeighborsT; - template - using CellTracks = caStructures::CellTracksT; - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - template - using CellTracksVector = caStructures::CellTracksVectorT; - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; - template - using HitsConstView = typename GPUCACellT::HitsConstView; - - template - struct CellCutsT { - using H = HitsConstView; - using T = TrackerTraits; - - CellCutsT() = default; - - CellCutsT(const bool doClusterCut, - const bool doZ0Cut, - const bool doPtCut, - const bool idealConditions, - const float z0Cut, - const float ptCut, - const int minYsizeB1, - const int minYsizeB2, - const std::vector& phiCutsV) - : doClusterCut_(doClusterCut), - doZ0Cut_(doZ0Cut), - doPtCut_(doPtCut), - idealConditions_(idealConditions), - z0Cut_(z0Cut), - ptCut_(ptCut), - minYsizeB1_(minYsizeB1), - minYsizeB2_(minYsizeB2) { - assert(phiCutsV.size() == TrackerTraits::nPairs); - std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]); - } - - bool doClusterCut_; - bool doZ0Cut_; - bool doPtCut_; - bool idealConditions_; //this is actually not used by phase2 - - float z0Cut_; - float ptCut_; - - int minYsizeB1_; - int minYsizeB2_; - - int phiCuts[T::nPairs]; - - __device__ __forceinline__ bool zSizeCut(H hh, int i, int o) const { - const uint32_t mi = hh[i].detectorIndex(); - - bool innerB1 = mi < T::last_bpix1_detIndex; - bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; - auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; - - if (mes < 0) - return false; - - const uint32_t mo = hh[o].detectorIndex(); - auto so = hh[o].clusterSizeY(); - - auto dz = hh[i].zGlobal() - hh[o].zGlobal(); - auto dr = hh[i].rGlobal() - hh[o].rGlobal(); - - auto innerBarrel = mi < T::last_barrel_detIndex; - auto onlyBarrel = mo < T::last_barrel_detIndex; - - if (not innerBarrel and not onlyBarrel) - return false; - auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize; - - return onlyBarrel ? so > 0 && std::abs(so - mes) > dy - : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred; - } - - __device__ __forceinline__ bool clusterCut(H hh, int i) const { - const uint32_t mi = hh[i].detectorIndex(); - bool innerB1orB2 = mi < T::last_bpix2_detIndex; - - if (!innerB1orB2) - return false; - - bool innerB1 = mi < T::last_bpix1_detIndex; - bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; - auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; - - if (innerB1) // B1 - if (mes > 0 && mes < minYsizeB1_) - return true; // only long cluster (5*8) - bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number - if (innerB2) // B2 and F1 - if (mes > 0 && mes < minYsizeB2_) - return true; - - return false; - } - }; - - template - __device__ __forceinline__ void doubletsFromHisto(uint32_t nPairs, - uint32_t maxNumOfDoublets, - GPUCACellT* cells, - uint32_t* nCells, - CellNeighborsVector* cellNeighbors, - CellTracksVector* cellTracks, - HitsConstView hh, - OuterHitOfCell isOuterHitOfCell, - CellCutsT const& cuts) { - // ysize cuts (z in the barrel) times 8 - // these are used if doClusterCut is true - - const bool doClusterCut = cuts.doClusterCut_; - const bool doZ0Cut = cuts.doZ0Cut_; - const bool doPtCut = cuts.doPtCut_; - - const float z0cut = cuts.z0Cut_; // cm - const float hardPtCut = cuts.ptCut_; // GeV - // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field) - const float minRadius = hardPtCut * 87.78f; - const float minRadius2T4 = 4.f * minRadius * minRadius; - - using PhiBinner = typename TrackingRecHitSoA::PhiBinner; - - auto const& __restrict__ phiBinner = hh.phiBinner(); - uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data(); - assert(offsets); - - auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; - - // nPairsMax to be optimized later (originally was 64). - // If it should be much bigger, consider using a block-wide parallel prefix scan, - // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html - - __shared__ uint32_t innerLayerCumulativeSize[TrackerTraits::nPairs]; - __shared__ uint32_t ntot; - if (threadIdx.y == 0 && threadIdx.x == 0) { - innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]); - for (uint32_t i = 1; i < nPairs; ++i) { - innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]); - } - ntot = innerLayerCumulativeSize[nPairs - 1]; - } - __syncthreads(); - - // x runs faster - auto idy = blockIdx.y * blockDim.y + threadIdx.y; - auto first = threadIdx.x; - auto stride = blockDim.x; - - uint32_t pairLayerId = 0; // cannot go backward - - for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) { - while (j >= innerLayerCumulativeSize[pairLayerId++]) - ; - --pairLayerId; // move to lower_bound ?? - - assert(pairLayerId < nPairs); - assert(j < innerLayerCumulativeSize[pairLayerId]); - assert(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); - - uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId]; - uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1]; - assert(outer > inner); - - auto hoff = PhiBinner::histOff(outer); - auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; - i += offsets[inner]; - - assert(i >= offsets[inner]); - assert(i < offsets[inner + 1]); - - // found hit corresponding to our cuda thread, now do the job - - if (hh[i].detectorIndex() > gpuClustering::maxNumModules) - continue; // invalid - - /* maybe clever, not effective when zoCut is on - auto bpos = (mi%8)/4; // if barrel is 1 for z>0 - auto fpos = (outer>3) & (outer<7); - if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue; - */ - - auto mez = hh[i].zGlobal(); - - if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId]) - continue; - - if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(hh, i)) - continue; - - auto mep = hh[i].iphi(); - auto mer = hh[i].rGlobal(); - - auto ptcut = [&](int j, int16_t idphi) { - auto r2t4 = minRadius2T4; - auto ri = mer; - auto ro = hh[j].rGlobal(); - auto dphi = short2phi(idphi); - return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri); - }; - auto z0cutoff = [&](int j) { - auto zo = hh[j].zGlobal(); - auto ro = hh[j].rGlobal(); - auto dr = ro - mer; - return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr; - }; - - auto iphicut = cuts.phiCuts[pairLayerId]; - - auto kl = PhiBinner::bin(int16_t(mep - iphicut)); - auto kh = PhiBinner::bin(int16_t(mep + iphicut)); - auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); }; - -#ifdef GPU_DEBUG - int tot = 0; - int nmin = 0; - int tooMany = 0; -#endif - - auto khh = kh; - incr(khh); - for (auto kk = kl; kk != khh; incr(kk)) { -#ifdef GPU_DEBUG - if (kk != kl && kk != kh) - nmin += phiBinner.size(kk + hoff); -#endif - auto const* __restrict__ p = phiBinner.begin(kk + hoff); - auto const* __restrict__ e = phiBinner.end(kk + hoff); - p += first; - for (; p < e; p += stride) { - auto oi = __ldg(p); - assert(oi >= offsets[outer]); - assert(oi < offsets[outer + 1]); - auto mo = hh[oi].detectorIndex(); - - if (mo > gpuClustering::maxNumModules) - continue; // invalid - - if (doZ0Cut && z0cutoff(oi)) - continue; - auto mop = hh[oi].iphi(); - uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop))); - if (idphi > iphicut) - continue; - - if (doClusterCut && cuts.zSizeCut(hh, i, oi)) - continue; - if (doPtCut && ptcut(oi, idphi)) - continue; - - auto ind = atomicAdd(nCells, 1); - if (ind >= maxNumOfDoublets) { - atomicSub(nCells, 1); - break; - } // move to SimpleVector?? - // int layerPairId, int doubletId, int innerHitId, int outerHitId) - cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi); - isOuterHitOfCell[oi].push_back(ind); -#ifdef GPU_DEBUG - if (isOuterHitOfCell[oi].full()) - ++tooMany; - ++tot; -#endif - } - } -// #endif -#ifdef GPU_DEBUG - if (tooMany > 0 || tot > 0) - printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n", - i, - inner, - outer, - nmin, - tot, - tooMany, - iphicut, - TrackerTraits::minz[pairLayerId], - TrackerTraits::maxz[pairLayerId], - tooMany > 0 ? "FULL!!" : "not full."); -#endif - } // loop in block... - } - -} // namespace gpuPixelDoublets - -#endif // RecoTracker_PixelSeeding_plugins_gpuPixelDoubletsAlgos_h diff --git a/RecoTracker/PixelSeeding/test/BuildFile.xml b/RecoTracker/PixelSeeding/test/BuildFile.xml index e130cbbb8ae59..41777c0752c7c 100644 --- a/RecoTracker/PixelSeeding/test/BuildFile.xml +++ b/RecoTracker/PixelSeeding/test/BuildFile.xml @@ -23,16 +23,10 @@ - - - - - - - + diff --git a/RecoTracker/PixelSeeding/test/CAsizes_t.cpp b/RecoTracker/PixelSeeding/test/CAsizes_t.cpp deleted file mode 100644 index 97eae610c8670..0000000000000 --- a/RecoTracker/PixelSeeding/test/CAsizes_t.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "RecoTracker/PixelSeeding/plugins/GPUCACell.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include -#include - -template -void print() { - std::cout << "size of " << typeid(T).name() << ' ' << sizeof(T) << std::endl; -} - -int main() { - using namespace pixelTopology; - using namespace caStructures; - //for Phase-I - print>(); - print>(); - print>(); - print>(); - print>(); - print>(); - print>(); - - print>(); - - //for Phase-II - - print>(); - print>(); - print>(); - print>(); - print>(); - print>(); - print>(); - - print>(); - - return 0; -} diff --git a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml index 6c8c102293651..eae412ec800cc 100644 --- a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml +++ b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml @@ -1,10 +1,7 @@ - - - diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc deleted file mode 100644 index 6bff9a7c42292..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include -#include // needed here by soa layout - -#include "CUDADataFormats/Common/interface/Product.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDAnalyzer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" - -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" - -template -class PixelTrackDumpCUDAT : public edm::global::EDAnalyzer<> { -public: - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - - using VertexSoAHost = ZVertexSoAHost; - using VertexSoADevice = ZVertexSoADevice; - - explicit PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig); - ~PixelTrackDumpCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override; - const bool m_onGPU; - edm::EDGetTokenT> tokenGPUTrack_; - edm::EDGetTokenT> tokenGPUVertex_; - edm::EDGetTokenT tokenSoATrack_; - edm::EDGetTokenT tokenSoAVertex_; -}; - -template -PixelTrackDumpCUDAT::PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig) - : m_onGPU(iConfig.getParameter("onGPU")) { - if (m_onGPU) { - tokenGPUTrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); - tokenGPUVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); - } else { - tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); - tokenSoAVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); - } -} - -template -void PixelTrackDumpCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("onGPU", true); - desc.add("pixelTrackSrc", edm::InputTag("pixelTracksCUDA")); - desc.add("pixelVertexSrc", edm::InputTag("pixelVerticesCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackDumpCUDAT::analyze(edm::StreamID streamID, - edm::Event const& iEvent, - const edm::EventSetup& iSetup) const { - if (m_onGPU) { - auto const& hTracks = iEvent.get(tokenGPUTrack_); - cms::cuda::ScopedContextProduce ctx{hTracks}; - - auto const& tracks = ctx.get(hTracks); - auto const* tsoa = &tracks; - assert(tsoa->buffer()); - - auto const& vertices = ctx.get(iEvent.get(tokenGPUVertex_)); - auto const* vsoa = &vertices; - assert(vsoa->buffer()); - - } else { - auto const& tsoa = iEvent.get(tokenSoATrack_); - assert(tsoa.buffer()); - - auto const& vsoa = iEvent.get(tokenSoAVertex_); - assert(vsoa.buffer()); - } -} - -using PixelTrackDumpCUDAPhase1 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase1); - -using PixelTrackDumpCUDAPhase2 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase2); - -using PixelTrackDumpCUDAHIonPhase1 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAHIonPhase1); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc deleted file mode 100644 index ef65feb4f8b28..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc +++ /dev/null @@ -1,271 +0,0 @@ -#include -#include -//#include -#include -#include -#include -#include -#include - -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/Common/interface/OrphanHandle.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/TrackReco/interface/Track.h" -#include "DataFormats/TrackReco/interface/TrackExtra.h" -#include "DataFormats/TrackReco/interface/TrackFwd.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" - -#include "storeTracks.h" - -/** - * This class creates "legacy" reco::Track - * objects from the output of SoA CA. - */ -template -class PixelTrackProducerFromSoAT : public edm::global::EDProducer<> { - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TracksHelpers = TracksUtilities; - using HMSstorage = HostProduct; - using IndToEdm = std::vector; - -public: - explicit PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig); - ~PixelTrackProducerFromSoAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; - - // Event Data tokens - const edm::EDGetTokenT tBeamSpot_; - const edm::EDGetTokenT tokenTrack_; - const edm::EDGetTokenT cpuHits_; - const edm::EDGetTokenT hmsToken_; - // Event Setup tokens - const edm::ESGetToken idealMagneticFieldToken_; - const edm::ESGetToken ttTopoToken_; - - int32_t const minNumberOfHits_; - pixelTrack::Quality const minQuality_; -}; - -template -PixelTrackProducerFromSoAT::PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig) - : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), - tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), - cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), - hmsToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), - idealMagneticFieldToken_(esConsumes()), - ttTopoToken_(esConsumes()), - minNumberOfHits_(iConfig.getParameter("minNumberOfHits")), - minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))) { - if (minQuality_ == pixelTrack::Quality::notQuality) { - throw cms::Exception("PixelTrackConfiguration") - << iConfig.getParameter("minQuality") + " is not a pixelTrack::Quality"; - } - if (minQuality_ < pixelTrack::Quality::dup) { - throw cms::Exception("PixelTrackConfiguration") - << iConfig.getParameter("minQuality") + " not supported"; - } - produces(); - produces(); - // TrackCollection refers to TrackingRechit and TrackExtra - // collections, need to declare its production after them to work - // around a rare race condition in framework scheduling - produces(); - produces(); -} - -template -void PixelTrackProducerFromSoAT::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { - edm::ParameterSetDescription desc; - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("trackSrc", edm::InputTag("pixelTracksSoA")); - desc.add("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy")); - desc.add("minNumberOfHits", 0); - desc.add("minQuality", "loose"); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackProducerFromSoAT::produce(edm::StreamID streamID, - edm::Event &iEvent, - const edm::EventSetup &iSetup) const { - // enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity }; - reco::TrackBase::TrackQuality recoQuality[] = {reco::TrackBase::undefQuality, - reco::TrackBase::undefQuality, - reco::TrackBase::discarded, - reco::TrackBase::loose, - reco::TrackBase::tight, - reco::TrackBase::tight, - reco::TrackBase::highPurity}; - assert(reco::TrackBase::highPurity == recoQuality[int(pixelTrack::Quality::highPurity)]); - - // std::cout << "Converting gpu helix in reco tracks" << std::endl; - - auto indToEdmP = std::make_unique(); - auto &indToEdm = *indToEdmP; - - auto const &idealField = iSetup.getData(idealMagneticFieldToken_); - - pixeltrackfitting::TracksWithRecHits tracks; - - auto const &httopo = iSetup.getData(ttTopoToken_); - - const auto &bsh = iEvent.get(tBeamSpot_); - GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - - auto const &rechits = iEvent.get(cpuHits_); - std::vector hitmap; - auto const &rcs = rechits.data(); - auto const nhits = rcs.size(); - - hitmap.resize(nhits, nullptr); - - auto const *hitsModuleStart = iEvent.get(hmsToken_).get(); - - for (auto const &hit : rcs) { - auto const &thit = static_cast(hit); - auto const detI = thit.det()->index(); - auto const &clus = thit.firstClusterRef(); - assert(clus.isPixel()); - auto const idx = hitsModuleStart[detI] + clus.pixelCluster().originalId(); - if (idx >= hitmap.size()) - hitmap.resize(idx + 256, nullptr); // only in case of hit overflow in one module - - assert(nullptr == hitmap[idx]); - hitmap[idx] = &hit; - } - - std::vector hits; - hits.reserve(5); - - auto const &tsoa = iEvent.get(tokenTrack_); - auto const *quality = tsoa.view().quality(); - auto const &hitIndices = tsoa.view().hitIndices(); - auto nTracks = tsoa.view().nTracks(); - - tracks.reserve(nTracks); - - int32_t nt = 0; - - // sort index by pt - std::vector sortIdxs(nTracks); - std::iota(sortIdxs.begin(), sortIdxs.end(), 0); - // sort good-quality tracks by pt, keep bad-quality tracks in the bottom - std::sort(sortIdxs.begin(), sortIdxs.end(), [&](int32_t const i1, int32_t const i2) { - if (quality[i1] >= minQuality_ && quality[i2] >= minQuality_) - return tsoa.view()[i1].pt() > tsoa.view()[i2].pt(); - else - return quality[i1] > quality[i2]; - }); - - // store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) - indToEdm.resize(sortIdxs.size(), -1); - for (const auto &it : sortIdxs) { - auto nHits = TracksHelpers::nHits(tsoa.view(), it); - assert(nHits >= 3); - auto q = quality[it]; - - if (q < minQuality_) - continue; - if (nHits < minNumberOfHits_) // move to nLayers? - continue; - indToEdm[it] = nt; - ++nt; - - hits.resize(nHits); - auto b = hitIndices.begin(it); - for (int iHit = 0; iHit < nHits; ++iHit) - hits[iHit] = hitmap[*(b + iHit)]; - - // mind: this values are respect the beamspot! - float chi2 = tsoa.view()[it].chi2(); - float phi = TracksHelpers::phi(tsoa.view(), it); - - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - TracksHelpers::template copyToDense(tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - AlgebraicSymMatrix55 m; - for (int i = 0; i < 5; ++i) - for (int j = i; j < 5; ++j) - m(i, j) = ocov(i, j); - - float sp = std::sin(phi); - float cp = std::cos(phi); - Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - - Plane impPointPlane(bs, rot); - GlobalTrajectoryParameters gp( - impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), &idealField); - JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, idealField); - - AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); - - int ndof = 2 * hits.size() - 5; - chi2 = chi2 * ndof; - GlobalPoint vv = gp.position(); - math::XYZPoint pos(vv.x(), vv.y(), vv.z()); - GlobalVector pp = gp.momentum(); - math::XYZVector mom(pp.x(), pp.y(), pp.z()); - - auto track = std::make_unique(chi2, ndof, pos, mom, gp.charge(), CurvilinearTrajectoryError(mo)); - - // bad and edup not supported as fit not present or not reliable - auto tkq = recoQuality[int(q)]; - track->setQuality(tkq); - // loose,tight and HP are inclusive - if (reco::TrackBase::highPurity == tkq) { - track->setQuality(reco::TrackBase::tight); - track->setQuality(reco::TrackBase::loose); - } else if (reco::TrackBase::tight == tkq) { - track->setQuality(reco::TrackBase::loose); - } - track->setQuality(tkq); - // filter??? - tracks.emplace_back(track.release(), hits); - } - // std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl; - - // store tracks - storeTracks(iEvent, tracks, httopo); - iEvent.put(std::move(indToEdmP)); -} - -using PixelTrackProducerFromSoAPhase1 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase1); - -using PixelTrackProducerFromSoAPhase2 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase2); - -using PixelTrackProducerFromSoAHIonPhase1 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAHIonPhase1); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc deleted file mode 100644 index fc2c76ff00155..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include // needed here by soa layout - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -// Switch on to enable checks and printout for found tracks -// #define PIXEL_DEBUG_PRODUCE - -template -class PixelTrackSoAFromCUDAT : public edm::stream::EDProducer { - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - -public: - explicit PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig); - ~PixelTrackSoAFromCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - edm::EDGetTokenT> tokenCUDA_; - edm::EDPutTokenT tokenSOA_; - - TrackSoAHost tracks_h_; -}; - -template -PixelTrackSoAFromCUDAT::PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig) - : tokenCUDA_(consumes(iConfig.getParameter("src"))), tokenSOA_(produces()) {} - -template -void PixelTrackSoAFromCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("src", edm::InputTag("pixelTracksCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackSoAFromCUDAT::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - auto const& tracks_d = ctx.get(inputDataWrapped); // Tracks on device - tracks_h_ = TrackSoAHost(ctx.stream()); // Create an instance of Tracks on Host, using the stream - cudaCheck(cudaMemcpyAsync(tracks_h_.buffer().get(), - tracks_d.const_buffer().get(), - tracks_d.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); // Copy data from Device to Host -} - -template -void PixelTrackSoAFromCUDAT::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) { - auto maxTracks = tracks_h_.view().metadata().size(); - auto nTracks = tracks_h_.view().nTracks(); - - assert(nTracks < maxTracks); - if (nTracks == maxTracks - 1) { - edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1 - << " candidates"; - } - -#ifdef PIXEL_DEBUG_PRODUCE - std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl; - std::cout << "found " << nTracks << " tracks in cpu SoA at " << &tsoa << std::endl; - - int32_t nt = 0; - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = TracksUtilities::nHits(tracks_h_.view(), it); - assert(nHits == int(tracks_h_.view().hitIndices().size(it))); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - nt++; - } - assert(nTracks == nt); -#endif - - // DO NOT make a copy (actually TWO....) - iEvent.emplace(tokenSOA_, std::move(tracks_h_)); - assert(!tracks_h_.buffer()); -} - -using PixelTrackSoAFromCUDAPhase1 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase1); - -using PixelTrackSoAFromCUDAPhase2 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase2); - -using PixelTrackSoAFromCUDAHIonPhase1 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAHIonPhase1); diff --git a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml index 5999fef50b346..ada2991b1cdb2 100644 --- a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml +++ b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml @@ -1,4 +1,3 @@ - diff --git a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc b/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc deleted file mode 100644 index 620d9973396d7..0000000000000 --- a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc +++ /dev/null @@ -1,179 +0,0 @@ -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "DataFormats/TrackingRecHit/interface/InvalidTrackingRecHit.h" -#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonDetUnit/interface/GeomDet.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "TrackingTools/MaterialEffects/interface/PropagatorWithMaterial.h" -#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" -#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" - -/* - produces seeds directly from cuda produced tuples -*/ -template -class SeedProducerFromSoAT : public edm::global::EDProducer<> { -public: - explicit SeedProducerFromSoAT(const edm::ParameterSet& iConfig); - ~SeedProducerFromSoAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - // Event data tokens - const edm::EDGetTokenT tBeamSpot_; - const edm::EDGetTokenT> tokenTrack_; - // Event setup tokens - const edm::ESGetToken idealMagneticFieldToken_; - const edm::ESGetToken trackerDigiGeometryToken_; - const edm::ESGetToken trackerPropagatorToken_; - int32_t minNumberOfHits_; -}; - -template -SeedProducerFromSoAT::SeedProducerFromSoAT(const edm::ParameterSet& iConfig) - : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), - tokenTrack_(consumes(iConfig.getParameter("src"))), - idealMagneticFieldToken_(esConsumes()), - trackerDigiGeometryToken_(esConsumes()), - trackerPropagatorToken_(esConsumes(edm::ESInputTag("PropagatorWithMaterial"))), - minNumberOfHits_(iConfig.getParameter("minNumberOfHits")) - -{ - produces(); -} - -template -void SeedProducerFromSoAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("src", edm::InputTag("pixelTrackSoA")); - desc.add("minNumberOfHits", 0); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SeedProducerFromSoAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - // std::cout << "Converting gpu helix to trajectory seed" << std::endl; - auto result = std::make_unique(); - - using trackHelper = TracksUtilities; - - auto const& fieldESH = iSetup.getHandle(idealMagneticFieldToken_); - auto const& tracker = iSetup.getHandle(trackerDigiGeometryToken_); - auto const& dus = tracker->detUnits(); - - auto const& propagatorHandle = iSetup.getHandle(trackerPropagatorToken_); - const Propagator* propagator = &(*propagatorHandle); - - const auto& bsh = iEvent.get(tBeamSpot_); - // std::cout << "beamspot " << bsh.x0() << ' ' << bsh.y0() << ' ' << bsh.z0() << std::endl; - GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - - auto const& tsoa = iEvent.get(tokenTrack_); - - auto const* quality = tsoa.view().quality(); - auto const& detIndices = tsoa.view().detIndices(); - auto maxTracks = tsoa.view().metadata().size(); - - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = trackHelper::nHits(tsoa.view(), it); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - - auto q = quality[it]; - if (q != pixelTrack::Quality::loose) - continue; // FIXME - if (nHits < minNumberOfHits_) - continue; - - // fill hits with invalid just to hold the detId - auto b = detIndices.begin(it); - edm::OwnVector hits; - for (int iHit = 0; iHit < nHits; ++iHit) { - auto const* det = dus[*(b + iHit)]; - // FIXME at some point get a proper type ... - hits.push_back(new InvalidTrackingRecHit(*det, TrackingRecHit::bad)); - } - - // mind: this values are respect the beamspot! - - float phi = trackHelper::nHits(tsoa.view(), it); - - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - trackHelper::copyToDense(tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - AlgebraicSymMatrix55 m; - for (int i = 0; i < 5; ++i) - for (int j = i; j < 5; ++j) - m(i, j) = ocov(i, j); - - float sp = std::sin(phi); - float cp = std::cos(phi); - Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - - Plane impPointPlane(bs, rot); - GlobalTrajectoryParameters gp(impPointPlane.toGlobal(lpar.position()), - impPointPlane.toGlobal(lpar.momentum()), - lpar.charge(), - fieldESH.product()); - - JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, *fieldESH.product()); - - AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); - - FreeTrajectoryState fts(gp, CurvilinearTrajectoryError(mo)); - - auto const& lastHit = hits.back(); - - TrajectoryStateOnSurface outerState = propagator->propagate(fts, *lastHit.surface()); - - if (!outerState.isValid()) { - edm::LogError("SeedFromGPU") << " was trying to create a seed from:\n" - << fts << "\n propagating to: " << lastHit.geographicalId().rawId(); - continue; - } - - auto const& pTraj = trajectoryStateTransform::persistentState(outerState, lastHit.geographicalId().rawId()); - - result->emplace_back(pTraj, hits, alongMomentum); - } - - iEvent.put(std::move(result)); -} - -using SeedProducerFromSoAPhase1 = SeedProducerFromSoAT; -DEFINE_FWK_MODULE(SeedProducerFromSoAPhase1); - -using SeedProducerFromSoAPhase2 = SeedProducerFromSoAT; -DEFINE_FWK_MODULE(SeedProducerFromSoAPhase2); diff --git a/RecoTracker/TransientTrackingRecHit/python/TTRHBuilders_cff.py b/RecoTracker/TransientTrackingRecHit/python/TTRHBuilders_cff.py index 3022edc925406..024acc8c40e2e 100644 --- a/RecoTracker/TransientTrackingRecHit/python/TTRHBuilders_cff.py +++ b/RecoTracker/TransientTrackingRecHit/python/TTRHBuilders_cff.py @@ -7,8 +7,6 @@ from RecoTracker.TkSeedingLayers.TTRHBuilderWithoutAngle4PixelPairs_cfi import * from RecoTracker.TkSeedingLayers.TTRHBuilderWithoutAngle4PixelTriplets_cfi import * #TransientTRH builder with template -from RecoLocalTracker.SiPixelRecHits.pixelCPEFastESProducerPhase1_cfi import * -from RecoLocalTracker.SiPixelRecHits.pixelCPEFastESProducerPhase2_cfi import * from RecoLocalTracker.SiPixelRecHits.PixelCPETemplateReco_cfi import * from RecoLocalTracker.SiPixelRecHits.PixelCPEClusterRepair_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEESProducer_cfi import * diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc deleted file mode 100644 index a62c6efb5abdb..0000000000000 --- a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc +++ /dev/null @@ -1,101 +0,0 @@ -#include - -#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/Common/interface/Product.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" - -namespace { - - class BeamSpotHost { - public: - BeamSpotHost() : data_h_{cms::cuda::make_host_noncached_unique(cudaHostAllocWriteCombined)} {} - - BeamSpotHost(BeamSpotHost const&) = delete; - BeamSpotHost(BeamSpotHost&&) = default; - - BeamSpotHost& operator=(BeamSpotHost const&) = delete; - BeamSpotHost& operator=(BeamSpotHost&&) = default; - - BeamSpotPOD* data() { return data_h_.get(); } - BeamSpotPOD const* data() const { return data_h_.get(); } - - cms::cuda::host::noncached::unique_ptr& ptr() { return data_h_; } - cms::cuda::host::noncached::unique_ptr const& ptr() const { return data_h_; } - - private: - cms::cuda::host::noncached::unique_ptr data_h_; - }; - -} // namespace - -class BeamSpotToCUDA : public edm::global::EDProducer> { -public: - explicit BeamSpotToCUDA(const edm::ParameterSet& iConfig); - ~BeamSpotToCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - - std::unique_ptr beginStream(edm::StreamID) const override { - edm::Service cuda; - if (cuda and cuda->enabled()) { - return std::make_unique(); - } else { - return nullptr; - } - } - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - -private: - const edm::EDGetTokenT bsGetToken_; - const edm::EDPutTokenT> bsPutToken_; -}; - -BeamSpotToCUDA::BeamSpotToCUDA(const edm::ParameterSet& iConfig) - : bsGetToken_{consumes(iConfig.getParameter("src"))}, - bsPutToken_{produces>()} {} - -void BeamSpotToCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("src", edm::InputTag("offlineBeamSpot")); - descriptions.add("offlineBeamSpotToCUDA", desc); -} - -void BeamSpotToCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { - cms::cuda::ScopedContextProduce ctx{streamID}; - - const reco::BeamSpot& bs = iEvent.get(bsGetToken_); - - auto& bsHost = streamCache(streamID)->ptr(); - - bsHost->x = bs.x0(); - bsHost->y = bs.y0(); - bsHost->z = bs.z0(); - - bsHost->sigmaZ = bs.sigmaZ(); - bsHost->beamWidthX = bs.BeamWidthX(); - bsHost->beamWidthY = bs.BeamWidthY(); - bsHost->dxdz = bs.dxdz(); - bsHost->dydz = bs.dydz(); - bsHost->emittanceX = bs.emittanceX(); - bsHost->emittanceY = bs.emittanceY(); - bsHost->betaStar = bs.betaStar(); - - BeamSpotCUDA bsDevice(ctx.stream()); - cms::cuda::copyAsync(bsDevice.ptr(), bsHost, ctx.stream()); - - ctx.emplace(iEvent, bsPutToken_, std::move(bsDevice)); -} - -DEFINE_FWK_MODULE(BeamSpotToCUDA); diff --git a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml index 5de80f16f24e5..d0f61ab465620 100644 --- a/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml +++ b/RecoVertex/BeamSpotProducer/plugins/BuildFile.xml @@ -53,16 +53,6 @@ - - - - - - - - - - diff --git a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml index b7674b3e91c12..81b7b2cacc65e 100644 --- a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml +++ b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml @@ -20,19 +20,8 @@ - - - - - - - - - - - - - + + diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc deleted file mode 100644 index a1f4101252319..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc +++ /dev/null @@ -1,166 +0,0 @@ -#include - -#include "CUDADataFormats/Common/interface/Product.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" - -#include "gpuVertexFinder.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -template -class PixelVertexProducerCUDAT : public edm::global::EDProducer<> { - using TracksSoADevice = TrackSoAHeterogeneousDevice; - using TracksSoAHost = TrackSoAHeterogeneousHost; - using GPUAlgo = gpuVertexFinder::Producer; - -public: - explicit PixelVertexProducerCUDAT(const edm::ParameterSet& iConfig); - ~PixelVertexProducerCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produceOnGPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const; - void produceOnCPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const; - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - bool onGPU_; - - edm::EDGetTokenT> tokenGPUTrack_; - edm::EDPutTokenT> tokenGPUVertex_; - edm::EDGetTokenT tokenCPUTrack_; - edm::EDPutTokenT tokenCPUVertex_; - - const GPUAlgo gpuAlgo_; - - // Tracking cuts before sending tracks to vertex algo - const float ptMin_; - const float ptMax_; -}; - -template -PixelVertexProducerCUDAT::PixelVertexProducerCUDAT(const edm::ParameterSet& conf) - : onGPU_(conf.getParameter("onGPU")), - gpuAlgo_(conf.getParameter("oneKernel"), - conf.getParameter("useDensity"), - conf.getParameter("useDBSCAN"), - conf.getParameter("useIterative"), - conf.getParameter("doSplitting"), - conf.getParameter("minT"), - conf.getParameter("eps"), - conf.getParameter("errmax"), - conf.getParameter("chi2max")), - ptMin_(conf.getParameter("PtMin")), // 0.5 GeV - ptMax_(conf.getParameter("PtMax")) // 75. GeV -{ - if (onGPU_) { - tokenGPUTrack_ = consumes(conf.getParameter("pixelTrackSrc")); - tokenGPUVertex_ = produces(); - } else { - tokenCPUTrack_ = consumes(conf.getParameter("pixelTrackSrc")); - tokenCPUVertex_ = produces(); - } -} - -template -void PixelVertexProducerCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - // Only one of these three algos can be used at once. - // Maybe this should become a Plugin Factory - desc.add("onGPU", true); - desc.add("oneKernel", true); - desc.add("useDensity", true); - desc.add("useDBSCAN", false); - desc.add("useIterative", false); - desc.add("doSplitting", true); - - desc.add("minT", 2); // min number of neighbours to be "core" - desc.add("eps", 0.07); // max absolute distance to cluster - desc.add("errmax", 0.01); // max error to be "seed" - desc.add("chi2max", 9.); // max normalized distance to cluster - - desc.add("PtMin", 0.5); - desc.add("PtMax", 75.); - desc.add("pixelTrackSrc", edm::InputTag("pixelTracksCUDA")); - - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelVertexProducerCUDAT::produceOnGPU(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - using TracksSoA = TrackSoAHeterogeneousDevice; - auto hTracks = iEvent.getHandle(tokenGPUTrack_); - - cms::cuda::ScopedContextProduce ctx{*hTracks}; - auto& tracks = ctx.get(*hTracks); - - ctx.emplace(iEvent, tokenGPUVertex_, gpuAlgo_.makeAsync(ctx.stream(), tracks.view(), ptMin_, ptMax_)); -} - -template -void PixelVertexProducerCUDAT::produceOnCPU(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - auto& tracks = iEvent.get(tokenCPUTrack_); - -#ifdef PIXVERTEX_DEBUG_PRODUCE - auto const& tsoa = *tracks; - auto maxTracks = tsoa.stride(); - std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl; - - int32_t nt = 0; - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = TracksUtilities::nHits(tracks.view(), it); - assert(nHits == int(tracks.view().hitIndices().size(it))); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - nt++; - } - std::cout << "found " << nt << " tracks in cpu SoA for Vertexing at " << tracks << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - - iEvent.emplace(tokenCPUVertex_, gpuAlgo_.make(tracks.view(), ptMin_, ptMax_)); -} - -template -void PixelVertexProducerCUDAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - if (onGPU_) { - produceOnGPU(streamID, iEvent, iSetup); - } else { - produceOnCPU(streamID, iEvent, iSetup); - } -} - -using PixelVertexProducerCUDAPhase1 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase1); - -using PixelVertexProducerCUDAPhase2 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase2); - -using PixelVertexProducerCUDAHIonPhase1 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAHIonPhase1); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc deleted file mode 100644 index 91de2bdb6992b..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc +++ /dev/null @@ -1,178 +0,0 @@ -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/Common/interface/OrphanHandle.h" -#include "DataFormats/TrackReco/interface/Track.h" -#include "DataFormats/TrackReco/interface/TrackExtra.h" -#include "DataFormats/TrackReco/interface/TrackFwd.h" -#include "DataFormats/VertexReco/interface/Vertex.h" -#include "DataFormats/VertexReco/interface/VertexFwd.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -class PixelVertexProducerFromSoA : public edm::global::EDProducer<> { -public: - using IndToEdm = std::vector; - - explicit PixelVertexProducerFromSoA(const edm::ParameterSet &iConfig); - ~PixelVertexProducerFromSoA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; - - edm::EDGetTokenT tokenVertex_; - edm::EDGetTokenT tokenBeamSpot_; - edm::EDGetTokenT tokenTracks_; - edm::EDGetTokenT tokenIndToEdm_; -}; - -PixelVertexProducerFromSoA::PixelVertexProducerFromSoA(const edm::ParameterSet &conf) - : tokenVertex_(consumes(conf.getParameter("src"))), - tokenBeamSpot_(consumes(conf.getParameter("beamSpot"))), - tokenTracks_(consumes(conf.getParameter("TrackCollection"))), - tokenIndToEdm_(consumes(conf.getParameter("TrackCollection"))) { - produces(); -} - -void PixelVertexProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { - edm::ParameterSetDescription desc; - - desc.add("TrackCollection", edm::InputTag("pixelTracks")); - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("src", edm::InputTag("pixelVerticesSoA")); - - descriptions.add("pixelVertexFromSoA", desc); -} - -void PixelVertexProducerFromSoA::produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &) const { - auto vertexes = std::make_unique(); - - auto tracksHandle = iEvent.getHandle(tokenTracks_); - auto tracksSize = tracksHandle->size(); - auto const &indToEdm = iEvent.get(tokenIndToEdm_); - auto bsHandle = iEvent.getHandle(tokenBeamSpot_); - - float x0 = 0, y0 = 0, z0 = 0, dxdz = 0, dydz = 0; - std::vector itrk; - itrk.reserve(64); // avoid first relocations - if (!bsHandle.isValid()) { - edm::LogWarning("PixelVertexProducer") << "No beamspot found. returning vertexes with (0,0,Z) "; - } else { - const reco::BeamSpot &bs = *bsHandle; - x0 = bs.x0(); - y0 = bs.y0(); - z0 = bs.z0(); - dxdz = bs.dxdz(); - dydz = bs.dydz(); - } - - auto const &soa = iEvent.get(tokenVertex_); - - int nv = soa.view().nvFinal(); - -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "converting " << nv << " vertices " - << " from " << indToEdm.size() << " tracks" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - - std::set uind; // for verifing index consistency - for (int j = nv - 1; j >= 0; --j) { - auto i = soa.view()[j].sortInd(); // on gpu sorted in ascending order.... - assert(i < nv); - uind.insert(i); - assert(itrk.empty()); - auto z = soa.view()[i].zv(); - auto x = x0 + dxdz * z; - auto y = y0 + dydz * z; - z += z0; - reco::Vertex::Error err; - err(2, 2) = 1.f / soa.view()[i].wv(); - err(2, 2) *= 2.; // artifically inflate error - //Copy also the tracks (no intention to be efficient....) - for (auto k = 0U; k < indToEdm.size(); ++k) { - if (soa.view()[k].idv() == int16_t(i)) - itrk.push_back(k); - } - auto nt = itrk.size(); - if (nt == 0) { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "vertex " << i << " with no tracks..." << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - continue; - } - if (nt < 2) { - itrk.clear(); - continue; - } // remove outliers - (*vertexes).emplace_back(reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); - auto &v = (*vertexes).back(); - v.reserve(itrk.size()); - for (auto it : itrk) { - assert(it < int(indToEdm.size())); - auto k = indToEdm[it]; - if (k > tracksSize) { - edm::LogWarning("PixelVertexProducer") << "oops track " << it << " does not exists on CPU " << k; - continue; - } - auto tk = reco::TrackRef(tracksHandle, k); - v.add(tk); - } - itrk.clear(); - } - - LogDebug("PixelVertexProducer") << ": Found " << vertexes->size() << " vertexes\n"; - for (unsigned int i = 0; i < vertexes->size(); ++i) { - LogDebug("PixelVertexProducer") << "Vertex number " << i << " has " << (*vertexes)[i].tracksSize() - << " tracks with a position of " << (*vertexes)[i].z() << " +- " - << std::sqrt((*vertexes)[i].covariance(2, 2)); - } - - // legacy logic.... - if (vertexes->empty() && bsHandle.isValid()) { - const reco::BeamSpot &bs = *bsHandle; - - GlobalError bse(bs.rotatedCovariance3D()); - if ((bse.cxx() <= 0.) || (bse.cyy() <= 0.) || (bse.czz() <= 0.)) { - AlgebraicSymMatrix33 we; - we(0, 0) = 10000; - we(1, 1) = 10000; - we(2, 2) = 10000; - vertexes->push_back(reco::Vertex(bs.position(), we, 0., 0., 0)); - - edm::LogInfo("PixelVertexProducer") << "No vertices found. Beamspot with invalid errors " << bse.matrix() - << "\nWill put Vertex derived from dummy-fake BeamSpot into Event.\n" - << (*vertexes)[0].x() << "\n" - << (*vertexes)[0].y() << "\n" - << (*vertexes)[0].z() << "\n"; - } else { - vertexes->push_back(reco::Vertex(bs.position(), bs.rotatedCovariance3D(), 0., 0., 0)); - - edm::LogInfo("PixelVertexProducer") << "No vertices found. Will put Vertex derived from BeamSpot into Event:\n" - << (*vertexes)[0].x() << "\n" - << (*vertexes)[0].y() << "\n" - << (*vertexes)[0].z() << "\n"; - } - } else if (vertexes->empty() && !bsHandle.isValid()) { - edm::LogWarning("PixelVertexProducer") << "No beamspot and no vertex found. No vertex returned."; - } - - iEvent.put(std::move(vertexes)); -} - -DEFINE_FWK_MODULE(PixelVertexProducerFromSoA); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc deleted file mode 100644 index b13b6c96f0bd3..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -class PixelVertexSoAFromCUDA : public edm::stream::EDProducer { -public: - explicit PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig); - ~PixelVertexSoAFromCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - edm::EDGetTokenT> tokenCUDA_; - edm::EDPutTokenT tokenSOA_; - - ZVertexSoAHost zvertex_h; -}; - -PixelVertexSoAFromCUDA::PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig) - : tokenCUDA_(consumes>(iConfig.getParameter("src"))), - tokenSOA_(produces()) {} - -void PixelVertexSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("src", edm::InputTag("pixelVerticesCUDA")); - descriptions.add("pixelVerticesSoA", desc); -} - -void PixelVertexSoAFromCUDA::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - auto const& zvertex_d = ctx.get(inputDataWrapped); // Tracks on device - zvertex_h = ZVertexSoAHost(ctx.stream()); // Create an instance of Tracks on Host, using the stream - cudaCheck(cudaMemcpyAsync(zvertex_h.buffer().get(), - zvertex_d.const_buffer().get(), - zvertex_d.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); // Copy data from Device to Host -} - -void PixelVertexSoAFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) { - // No copies.... - iEvent.emplace(tokenSOA_, std::move(zvertex_h)); -} - -DEFINE_FWK_MODULE(PixelVertexSoAFromCUDA); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h deleted file mode 100644 index e7eadd9c61dda..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h - -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" - -template -class PixelVertexWorkSpaceSoADevice : public cms::cuda::PortableDeviceCollection> { -public: - explicit PixelVertexWorkSpaceSoADevice() = default; - - // Constructor which specifies the SoA size and CUDA stream - explicit PixelVertexWorkSpaceSoADevice(cudaStream_t stream) - : PortableDeviceCollection>(S, stream) {} -}; - -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoADevice = PixelVertexWorkSpaceSoADevice; - } -} // namespace gpuVertexFinder -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h deleted file mode 100644 index 9a4dc49b87f23..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h - -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" - -template -class PixelVertexWorkSpaceSoAHost : public cms::cuda::PortableHostCollection> { -public: - explicit PixelVertexWorkSpaceSoAHost() : PortableHostCollection>(S) {} - - // Constructor which specifies the SoA size and CUDA stream - explicit PixelVertexWorkSpaceSoAHost(cudaStream_t stream) - : PortableHostCollection>(S, stream) {} -}; - -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoAHost = PixelVertexWorkSpaceSoAHost; - } -} // namespace gpuVertexFinder -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h deleted file mode 100644 index cc0828f153330..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h - -#include -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -// Intermediate data used in the vertex reco algos -// For internal use only -GENERATE_SOA_LAYOUT(PixelVertexWSSoALayout, - SOA_COLUMN(uint16_t, itrk), // index of original track - SOA_COLUMN(float, zt), // input track z at bs - SOA_COLUMN(float, ezt2), // input error^2 on the above - SOA_COLUMN(float, ptt2), // input pt^2 on the above - SOA_COLUMN(uint8_t, izt), // interized z-position of input tracks - SOA_COLUMN(int32_t, iv), // vertex index for each associated track - SOA_SCALAR(uint32_t, ntrks), // number of "selected tracks" - SOA_SCALAR(uint32_t, nvIntermediate)) // the number of vertices after splitting pruning etc. - -// Methods that operate on View and ConstView of the WorkSpaceSoALayout. -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoALayout = PixelVertexWSSoALayout<>; - using PixelVertexWorkSpaceSoAView = PixelVertexWSSoALayout<>::View; - using PixelVertexWorkSpaceSoAConstView = PixelVertexWSSoALayout<>::ConstView; - - namespace utilities { - __host__ __device__ inline void init(PixelVertexWorkSpaceSoAView &workspace_view) { - workspace_view.ntrks() = 0; - workspace_view.nvIntermediate() = 0; - } - } // namespace utilities - } // namespace workSpace -} // namespace gpuVertexFinder - -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h deleted file mode 100644 index aaca1c5cc268f..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h +++ /dev/null @@ -1,237 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - // - // based on Rodrighez&Laio algo - // - __device__ __forceinline__ void clusterTracksByDensity(VtxSoAView& pdata, - WsSoAView& pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - using namespace gpuVertexFinder; - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt(); - float const* __restrict__ ezt2 = ws.ezt2(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); - int32_t* __restrict__ iv = ws.iv(); - - assert(zt); - assert(ezt2); - assert(izt); - assert(nn); - assert(iv); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only - iz = std::min(std::max(iz, INT8_MIN), INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - - // find closest above me .... (we ignore the possibility of two j at same distance from i) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - float mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < nn[i]) - return; - if (nn[j] == nn[i] && zt[j] >= zt[i]) - return; // if equal use natural order... - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // (break natural order???) - mdist = dist; - iv[i] = j; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - - // consolidate graph (percolate index of seed) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - -#ifdef GPU_DEBUG - __syncthreads(); - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } -#endif - -#ifdef GPU_DEBUG - // and verify that we did not spit any cluster... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto minJ = i; - auto mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < nn[i]) - return; - if (nn[j] == nn[i] && zt[j] >= zt[i]) - return; // if equal use natural order... - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - mdist = dist; - minJ = j; - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - // should belong to the same cluster... - assert(iv[i] == iv[minJ]); - assert(nn[i] <= nn[iv[i]]); - } - __syncthreads(); -#endif - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - - __global__ void clusterTracksByDensityKernel(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h deleted file mode 100644 index d3fc38baf48b8..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - __global__ void clusterTracksDBSCAN(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "core" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt(); - float const* __restrict__ ezt2 = ws.ezt2(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); - int32_t* __restrict__ iv = ws.iv(); - - assert(zt); - assert(iv); - assert(nn); - assert(ezt2); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - iz = std::clamp(iz, INT8_MIN, INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - - // find NN with smaller z... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (nn[i] < minT) - continue; // DBSCAN core rule - float mz = zt[i]; - auto loop = [&](uint32_t j) { - if (zt[j] >= mz) - return; - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - mz = zt[j]; - iv[i] = j; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - - // consolidate graph (percolate index of seed) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - -#ifdef GPU_DEBUG - // and verify that we did not spit any cluster... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (nn[i] < minT) - continue; // DBSCAN core rule - assert(zt[iv[i]] <= zt[i]); - auto loop = [&](uint32_t j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - // they should belong to the same cluster, isn't it? - if (iv[i] != iv[j]) { - printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]); - printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]); - ; - } - assert(iv[i] == iv[j]); - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - __syncthreads(); -#endif - - // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule - if (nn[i] >= minT) - continue; // DBSCAN edge rule - float mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // needed? - mdist = dist; - iv[i] = iv[j]; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h deleted file mode 100644 index 9a8ef262db767..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h +++ /dev/null @@ -1,214 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - __global__ void clusterTracksIterative(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "core" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt(); - float const* __restrict__ ezt2 = ws.ezt2(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); - int32_t* __restrict__ iv = ws.iv(); - - assert(zt); - assert(nn); - assert(iv); - assert(ezt2); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - iz = std::clamp(iz, INT8_MIN, INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ int nloops; - nloops = 0; - - __syncthreads(); - - // cluster seeds only - bool more = true; - while (__syncthreads_or(more)) { - if (1 == nloops % 2) { - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - } else { - more = false; - for (auto k = threadIdx.x; k < hist.size(); k += blockDim.x) { - auto p = hist.begin() + k; - auto i = (*p); - auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1)); - if (nn[i] < minT) - continue; // DBSCAN core rule - auto loop = [&](uint32_t j) { - assert(i != j); - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - auto old = atomicMin(&iv[j], iv[i]); - if (old != iv[i]) { - // end the loop only if no changes were applied - more = true; - } - atomicMin(&iv[i], old); - }; - ++p; - for (; p < hist.end(be); ++p) - loop(*p); - } // for i - } - if (threadIdx.x == 0) - ++nloops; - } // while - - // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule - if (nn[i] >= minT) - continue; // DBSCAN edge rule - float mdist = eps; - auto loop = [&](int j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // needed? - mdist = dist; - iv[i] = iv[j]; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h deleted file mode 100644 index e2a5a82fd6802..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h -#define RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void fitVertices(VtxSoAView& pdata, - WsSoAView& pws, - float chi2Max // for outlier rejection - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt(); - float const* __restrict__ ezt2 = ws.ezt2(); - float* __restrict__ zv = data.zv(); - float* __restrict__ wv = data.wv(); - float* __restrict__ chi2 = data.chi2(); - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - int32_t* __restrict__ nn = data.ndof(); - int32_t* __restrict__ iv = ws.iv(); - - assert(nvFinal <= nvIntermediate); - nvFinal = nvIntermediate; - auto foundClusters = nvFinal; - - // zero - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) { - zv[i] = 0; - wv[i] = 0; - chi2[i] = 0; - } - - // only for test - __shared__ int noise; - if (verbose && 0 == threadIdx.x) - noise = 0; - - __syncthreads(); - - // compute cluster location - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) { - if (verbose) - atomicAdd(&noise, 1); - continue; - } - assert(iv[i] >= 0); - assert(iv[i] < int(foundClusters)); - auto w = 1.f / ezt2[i]; - atomicAdd_block(&zv[iv[i]], zt[i] * w); - atomicAdd_block(&wv[iv[i]], w); - } - - __syncthreads(); - // reuse nn - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) { - assert(wv[i] > 0.f); - zv[i] /= wv[i]; - nn[i] = -1; // ndof - } - __syncthreads(); - - // compute chi2 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) - continue; - - auto c2 = zv[iv[i]] - zt[i]; - c2 *= c2 / ezt2[i]; - if (c2 > chi2Max) { - iv[i] = 9999; - continue; - } - atomicAdd_block(&chi2[iv[i]], c2); - atomicAdd_block(&nn[iv[i]], 1); - } - __syncthreads(); - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) - if (nn[i] > 0) - wv[i] *= float(nn[i]) / chi2[i]; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto clusters ", foundClusters); - if (verbose && 0 == threadIdx.x) - printf("and %d noise\n", noise); - } - - __global__ void fitVerticesKernel(VtxSoAView pdata, - WsSoAView pws, - float chi2Max // for outlier rejection - ) { - fitVertices(pdata, pws, chi2Max); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h b/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h deleted file mode 100644 index c5a0b5cfc1776..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h -#define RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#ifdef __CUDA_ARCH__ -#include "HeterogeneousCore/CUDAUtilities/interface/radixSort.h" -#endif - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void sortByPt2(VtxSoAView& pdata, WsSoAView& pws) { - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ ptt2 = ws.ptt2(); - uint32_t const& nvFinal = data.nvFinal(); - - int32_t const* __restrict__ iv = ws.iv(); - float* __restrict__ ptv2 = data.ptv2(); - uint16_t* __restrict__ sortInd = data.sortInd(); - - assert(ptv2); - assert(sortInd); - - if (nvFinal < 1) - return; - - // fill indexing - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - data[ws[i].itrk()].idv() = iv[i]; - } - - // can be done asynchronously at the end of previous event - for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) { - ptv2[i] = 0; - } - __syncthreads(); - - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) - continue; - atomicAdd_block(&ptv2[iv[i]], ptt2[i]); - } - __syncthreads(); - - if (1 == nvFinal) { - if (threadIdx.x == 0) - sortInd[0] = 0; - return; - } -#ifdef __CUDA_ARCH__ - __shared__ uint16_t sws[1024]; - // sort using only 16 bits - radixSort(ptv2, sortInd, sws, nvFinal); -#else - for (uint16_t i = 0; i < nvFinal; ++i) - sortInd[i] = i; - std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; }); -#endif - } - - __global__ void sortByPt2Kernel(VtxSoAView pdata, WsSoAView pws) { sortByPt2(pdata, pws); } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h deleted file mode 100644 index 6538bb216737b..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h -#define RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void splitVertices(VtxSoAView& pdata, WsSoAView& pws, float maxChi2) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt(); - float const* __restrict__ ezt2 = ws.ezt2(); - float* __restrict__ zv = data.zv(); - float* __restrict__ wv = data.wv(); - float const* __restrict__ chi2 = data.chi2(); - uint32_t& nvFinal = data.nvFinal(); - - int32_t const* __restrict__ nn = data.ndof(); - int32_t* __restrict__ iv = ws.iv(); - - assert(zt); - assert(wv); - assert(chi2); - assert(nn); - - // one vertex per block - for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) { - if (nn[kv] < 4) - continue; - if (chi2[kv] < maxChi2 * float(nn[kv])) - continue; - - constexpr int MAXTK = 512; - assert(nn[kv] < MAXTK); - if (nn[kv] >= MAXTK) - continue; // too bad FIXME - __shared__ uint32_t it[MAXTK]; // track index - __shared__ float zz[MAXTK]; // z pos - __shared__ uint8_t newV[MAXTK]; // 0 or 1 - __shared__ float ww[MAXTK]; // z weight - - __shared__ uint32_t nq; // number of track for this vertex - nq = 0; - __syncthreads(); - - // copy to local - for (auto k = threadIdx.x; k < nt; k += blockDim.x) { - if (iv[k] == int(kv)) { - auto old = atomicInc(&nq, MAXTK); - zz[old] = zt[k] - zv[kv]; - newV[old] = zz[old] < 0 ? 0 : 1; - ww[old] = 1.f / ezt2[k]; - it[old] = k; - } - } - - __shared__ float znew[2], wnew[2]; // the new vertices - - __syncthreads(); - assert(int(nq) == nn[kv] + 1); - - int maxiter = 20; - // kt-min.... - bool more = true; - while (__syncthreads_or(more)) { - more = false; - if (0 == threadIdx.x) { - znew[0] = 0; - znew[1] = 0; - wnew[0] = 0; - wnew[1] = 0; - } - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - auto i = newV[k]; - atomicAdd(&znew[i], zz[k] * ww[k]); - atomicAdd(&wnew[i], ww[k]); - } - __syncthreads(); - if (0 == threadIdx.x) { - znew[0] /= wnew[0]; - znew[1] /= wnew[1]; - } - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - auto d0 = fabs(zz[k] - znew[0]); - auto d1 = fabs(zz[k] - znew[1]); - auto newer = d0 < d1 ? 0 : 1; - more |= newer != newV[k]; - newV[k] = newer; - } - --maxiter; - if (maxiter <= 0) - more = false; - } - - // avoid empty vertices - if (0 == wnew[0] || 0 == wnew[1]) - continue; - - // quality cut - auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]); - - auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]); - - if (verbose && 0 == threadIdx.x) - printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]); - - if (chi2Dist < 4) - continue; - - // get a new global vertex - __shared__ uint32_t igv; - if (0 == threadIdx.x) - igv = atomicAdd(&ws.nvIntermediate(), 1); - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - if (1 == newV[k]) - iv[it[k]] = igv; - } - - } // loop on vertices - } - - __global__ void splitVerticesKernel(VtxSoAView pdata, WsSoAView pws, float maxChi2) { - splitVertices(pdata, pws, maxChi2); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc deleted file mode 100644 index 73fa1408a0aab..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc +++ /dev/null @@ -1,208 +0,0 @@ -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" - -#include "PixelVertexWorkSpaceUtilities.h" -#include "PixelVertexWorkSpaceSoAHost.h" -#include "PixelVertexWorkSpaceSoADevice.h" - -#include "gpuClusterTracksByDensity.h" -#include "gpuClusterTracksDBSCAN.h" -#include "gpuClusterTracksIterative.h" -#include "gpuFitVertices.h" -#include "gpuSortByPt2.h" -#include "gpuSplitVertices.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -namespace gpuVertexFinder { - - // reject outlier tracks that contribute more than this to the chi2 of the vertex fit - constexpr float maxChi2ForFirstFit = 50.f; - constexpr float maxChi2ForFinalFit = 5000.f; - - // split vertices with a chi2/NDoF greater than this - constexpr float maxChi2ForSplit = 9.f; - - template - __global__ void loadTracks( - TrackSoAConstView tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) { - auto const* quality = tracks_view.quality(); - using helper = TracksUtilities; - auto first = blockIdx.x * blockDim.x + threadIdx.x; - for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) { - auto nHits = helper::nHits(tracks_view, idx); - assert(nHits >= 3); - - // initialize soa... - soa[idx].idv() = -1; - - if (helper::isTriplet(tracks_view, idx)) - continue; // no triplets - if (quality[idx] < pixelTrack::Quality::highPurity) - continue; - - auto pt = tracks_view[idx].pt(); - - if (pt < ptMin) - continue; - - // clamp pt - pt = std::min(pt, ptMax); - - auto& data = pws; - auto it = atomicAdd(&data.ntrks(), 1); - data[it].itrk() = idx; - data[it].zt() = helper::zip(tracks_view, idx); - data[it].ezt2() = tracks_view[idx].covariance()(14); - data[it].ptt2() = pt * pt; - } - } - -// #define THREE_KERNELS -#ifndef THREE_KERNELS - __global__ void vertexFinderOneKernel(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFirstFit); - __syncthreads(); - splitVertices(pdata, pws, maxChi2ForSplit); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFinalFit); - __syncthreads(); - sortByPt2(pdata, pws); - } -#else - __global__ void vertexFinderKernel1(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFirstFit); - } - - __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) { - fitVertices(pdata, pws, maxChi2ForFinalFit); - __syncthreads(); - sortByPt2(pdata, pws); - } -#endif - - template -#ifdef __CUDACC__ - ZVertexSoADevice Producer::makeAsync(cudaStream_t stream, - const TrackSoAConstView& tracks_view, - float ptMin, - float ptMax) const { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "producing Vertices on GPU" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - ZVertexSoADevice vertices(stream); -#else - ZVertexSoAHost Producer::make(const TrackSoAConstView& tracks_view, - float ptMin, - float ptMax) const { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "producing Vertices on CPU" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - ZVertexSoAHost vertices; -#endif - auto soa = vertices.view(); - - assert(vertices.buffer()); - -#ifdef __CUDACC__ - auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream); -#else - auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost(); -#endif - -#ifdef __CUDACC__ - init<<<1, 1, 0, stream>>>(soa, ws_d.view()); - auto blockSize = 128; - auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize; - loadTracks<<>>(tracks_view, soa, ws_d.view(), ptMin, ptMax); - cudaCheck(cudaGetLastError()); -#else - init(soa, ws_d.view()); - loadTracks(tracks_view, soa, ws_d.view(), ptMin, ptMax); -#endif - -#ifdef __CUDACC__ - // Running too many thread lead to problems when printf is enabled. - constexpr int maxThreadsForPrint = 1024 - 128; - constexpr int numBlocks = 1024; - constexpr int threadsPerBlock = 128; - - if (oneKernel_) { - // implemented only for density clustesrs -#ifndef THREE_KERNELS - vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); -#else - vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - cudaCheck(cudaGetLastError()); - // one block per vertex... - splitVerticesKernel<<>>(soa, ws_d.view(), maxChi2ForSplit); - cudaCheck(cudaGetLastError()); - vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view()); -#endif - } else { // five kernels - if (useDensity_) { - clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>( - soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useDBSCAN_) { - clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useIterative_) { - clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - } - cudaCheck(cudaGetLastError()); - fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit); - cudaCheck(cudaGetLastError()); - if (doSplitting_) { - // one block per vertex... - splitVerticesKernel<<>>(soa, ws_d.view(), maxChi2ForSplit); - cudaCheck(cudaGetLastError()); - fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit); - cudaCheck(cudaGetLastError()); - } - sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view()); - } - cudaCheck(cudaGetLastError()); -#else // __CUDACC__ - if (useDensity_) { - clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useDBSCAN_) { - clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useIterative_) { - clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max); - } -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - fitVertices(soa, ws_d.view(), maxChi2ForFirstFit); - // one block per vertex! - if (doSplitting_) { - splitVertices(soa, ws_d.view(), maxChi2ForSplit); - fitVertices(soa, ws_d.view(), maxChi2ForFinalFit); - } - sortByPt2(soa, ws_d.view()); -#endif - - return vertices; - } - - template class Producer; - template class Producer; - template class Producer; -} // namespace gpuVertexFinder diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu deleted file mode 100644 index 9674eac7d8784..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu +++ /dev/null @@ -1 +0,0 @@ -#include "gpuVertexFinder.cc" diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h deleted file mode 100644 index 640f137bf76f4..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h -#define RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h - -#include -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "PixelVertexWorkSpaceUtilities.h" -#include "PixelVertexWorkSpaceSoAHost.h" -#include "PixelVertexWorkSpaceSoADevice.h" - -namespace gpuVertexFinder { - - using VtxSoAView = zVertex::ZVertexSoAView; - using WsSoAView = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView; - - __global__ void init(VtxSoAView pdata, WsSoAView pws) { - zVertex::utilities::init(pdata); - gpuVertexFinder::workSpace::utilities::init(pws); - } - - template - class Producer { - using TkSoAConstView = TrackSoAConstView; - - public: - Producer(bool oneKernel, - bool useDensity, - bool useDBSCAN, - bool useIterative, - bool doSplitting, - int iminT, // min number of neighbours to be "core" - float ieps, // max absolute distance to cluster - float ierrmax, // max error to be "seed" - float ichi2max // max normalized distance to cluster - ) - : oneKernel_(oneKernel && !(useDBSCAN || useIterative)), - useDensity_(useDensity), - useDBSCAN_(useDBSCAN), - useIterative_(useIterative), - doSplitting_(doSplitting), - minT(iminT), - eps(ieps), - errmax(ierrmax), - chi2max(ichi2max) {} - - ~Producer() = default; - - ZVertexSoADevice makeAsync(cudaStream_t stream, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const; - ZVertexSoAHost make(const TkSoAConstView &tracks_view, float ptMin, float ptMax) const; - - private: - const bool oneKernel_; - const bool useDensity_; - const bool useDBSCAN_; - const bool useIterative_; - const bool doSplitting_; - - int minT; // min number of neighbours to be "core" - float eps; // max absolute distance to cluster - float errmax; // max error to be "seed" - float chi2max; // max normalized distance to cluster - }; - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h diff --git a/RecoVertex/PixelVertexFinding/test/BuildFile.xml b/RecoVertex/PixelVertexFinding/test/BuildFile.xml index d5d0142eca659..52b8eabf3587d 100644 --- a/RecoVertex/PixelVertexFinding/test/BuildFile.xml +++ b/RecoVertex/PixelVertexFinding/test/BuildFile.xml @@ -1,74 +1,28 @@ + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - diff --git a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h b/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h deleted file mode 100644 index 93534f82e06c8..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h +++ /dev/null @@ -1,359 +0,0 @@ -#include -#include -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/launch.h" -#include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h" -#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" -// PixelTrackUtilities only included in order to compile SoALayout with Eigen columns -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" - -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h" -#ifdef USE_DBSCAN -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksDBSCAN -#elif USE_ITERATIVE -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksIterative -#else -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksByDensityKernel -#endif -#include "RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h" -#include "RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h" -#include "RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h" - -#ifdef ONE_KERNEL -#ifdef __CUDACC__ -__global__ void vertexFinderOneKernel(gpuVertexFinder::VtxSoAView pdata, - gpuVertexFinder::WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, -) { - gpuVertexFinder::clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - gpuVertexFinder::fitVertices(pdata, pws, 50.); - __syncthreads(); - gpuVertexFinder::splitVertices(pdata, pws, 9.f); - __syncthreads(); - gpuVertexFinder::fitVertices(pdata, pws, 5000.); - __syncthreads(); - gpuVertexFinder::sortByPt2(pdata, pws); -} -#endif -#endif - -struct Event { - std::vector zvert; - std::vector itrack; - std::vector ztrack; - std::vector eztrack; - std::vector pttrack; - std::vector ivert; -}; - -struct ClusterGenerator { - explicit ClusterGenerator(float nvert, float ntrack) - : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {} - - void operator()(Event& ev) { - int nclus = clusGen(reng); - ev.zvert.resize(nclus); - ev.itrack.resize(nclus); - for (auto& z : ev.zvert) { - z = 3.5f * gauss(reng); - } - - ev.ztrack.clear(); - ev.eztrack.clear(); - ev.ivert.clear(); - ev.pttrack.clear(); - for (int iv = 0; iv < nclus; ++iv) { - auto nt = trackGen(reng); - ev.itrack[iv] = nt; - for (int it = 0; it < nt; ++it) { - auto err = errgen(reng); // reality is not flat.... - ev.ztrack.push_back(ev.zvert[iv] + err * gauss(reng)); - ev.eztrack.push_back(err * err); - ev.ivert.push_back(iv); - ev.pttrack.push_back((iv == 5 ? 1.f : 0.5f) + ptGen(reng)); - ev.pttrack.back() *= ev.pttrack.back(); - } - } - // add noise - auto nt = 2 * trackGen(reng); - for (int it = 0; it < nt; ++it) { - auto err = 0.03f; - ev.ztrack.push_back(rgen(reng)); - ev.eztrack.push_back(err * err); - ev.ivert.push_back(9999); - ev.pttrack.push_back(0.5f + ptGen(reng)); - ev.pttrack.back() *= ev.pttrack.back(); - } - } - - std::mt19937 reng; - std::uniform_real_distribution rgen; - std::uniform_real_distribution errgen; - std::poisson_distribution clusGen; - std::poisson_distribution trackGen; - std::normal_distribution gauss; - std::exponential_distribution ptGen; -}; - -__global__ void print(gpuVertexFinder::VtxSoAView pdata, gpuVertexFinder::WsSoAView pws) { - auto& __restrict__ ws = pws; - printf("nt,nv %d %d,%d\n", ws.ntrks(), pdata.nvFinal(), ws.nvIntermediate()); -} - -int main() { -#ifdef __CUDACC__ - cudaStream_t stream; - cms::cudatest::requireDevices(); - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - - ZVertexSoADevice onGPU_d(stream); - gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(stream); -#else - - ZVertexSoAHost onGPU_d; - gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_d; -#endif - - Event ev; - - float eps = 0.1f; - std::array par{{eps, 0.01f, 9.0f}}; - for (int nav = 30; nav < 80; nav += 20) { - ClusterGenerator gen(nav, 10); - - for (int i = 8; i < 20; ++i) { - auto kk = i / 4; // M param - - gen(ev); - -#ifdef __CUDACC__ - gpuVertexFinder::init<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); -#else - gpuVertexFinder::init(onGPU_d.view(), ws_d.view()); -#endif - - std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl; - auto nt = ev.ztrack.size(); -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t), cudaMemcpyHostToDevice)); - cudaCheck( - cudaMemcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size(), cudaMemcpyHostToDevice)); - cudaCheck( - cudaMemcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice)); - cudaCheck( - cudaMemcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice)); -#else - ::memcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t)); - ::memcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size()); - ::memcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size()); - ::memcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size()); -#endif - - std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl; - - if ((i % 4) == 0) - par = {{eps, 0.02f, 12.0f}}; - if ((i % 4) == 1) - par = {{eps, 0.02f, 9.0f}}; - if ((i % 4) == 2) - par = {{eps, 0.01f, 9.0f}}; - if ((i % 4) == 3) - par = {{0.7f * eps, 0.01f, 9.0f}}; - - uint32_t nv = 0; -#ifdef __CUDACC__ - print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); - cudaCheck(cudaGetLastError()); - cudaDeviceSynchronize(); - -#ifdef ONE_KERNEL - cms::cuda::launch(vertexFinderOneKernel, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); -#else - cms::cuda::launch(CLUSTERIZE, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); -#endif - print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); - - cudaCheck(cudaGetLastError()); - cudaDeviceSynchronize(); - - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - -#else - print(onGPU_d.view(), ws_d.view()); - CLUSTERIZE(onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); - print(onGPU_d.view(), ws_d.view()); - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f); - nv = onGPU_d.view().nvFinal(); -#endif - - if (nv == 0) { - std::cout << "NO VERTICES???" << std::endl; - continue; - } - - float* zv = nullptr; - float* wv = nullptr; - float* ptv2 = nullptr; - int32_t* nn = nullptr; - uint16_t* ind = nullptr; - - // keep chi2 separated... - float chi2[2 * nv]; // make space for splitting... - -#ifdef __CUDACC__ - float hzv[2 * nv]; - float hwv[2 * nv]; - float hptv2[2 * nv]; - int32_t hnn[2 * nv]; - uint16_t hind[2 * nv]; - - zv = hzv; - wv = hwv; - ptv2 = hptv2; - nn = hnn; - ind = hind; -#else - zv = onGPU_d.view().zv(); - wv = onGPU_d.view().wv(); - ptv2 = onGPU_d.view().ptv2(); - nn = onGPU_d.view().ndof(); - ind = onGPU_d.view().sortInd(); -#endif - -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost)); -#else - memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float)); -#endif - - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - -#ifdef __CUDACC__ - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f); - nv = onGPU_d.view().nvFinal(); - memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float)); -#endif - - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - -#ifdef __CUDACC__ - // one vertex per block!!! - cms::cuda::launch(gpuVertexFinder::splitVerticesKernel, {1024, 64}, onGPU_d.view(), ws_d.view(), 9.f); - cudaCheck(cudaMemcpy(&nv, &ws_d.view().nvIntermediate(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::splitVertices(onGPU_d.view(), ws_d.view(), 9.f); - nv = ws_d.view().nvIntermediate(); -#endif - std::cout << "after split " << nv << std::endl; - -#ifdef __CUDACC__ - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 5000.f); - cudaCheck(cudaGetLastError()); - - cms::cuda::launch(gpuVertexFinder::sortByPt2Kernel, {1, 256}, onGPU_d.view(), ws_d.view()); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 5000.f); - gpuVertexFinder::sortByPt2(onGPU_d.view(), ws_d.view()); - nv = onGPU_d.view().nvFinal(); - memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float)); -#endif - - if (nv == 0) { - std::cout << "NO VERTICES???" << std::endl; - continue; - } - -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(zv, onGPU_d.view().zv(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(wv, onGPU_d.view().wv(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(ptv2, onGPU_d.view().ptv2(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(ind, onGPU_d.view().sortInd(), nv * sizeof(uint16_t), cudaMemcpyDeviceToHost)); -#endif - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - - { - auto mx = std::minmax_element(wv, wv + nv); - std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl; - } - - { - auto mx = std::minmax_element(ptv2, ptv2 + nv); - std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl; - std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv - 1]] << " at " << ind[0] << ' ' - << ind[nv - 1] << std::endl; - } - - float dd[nv]; - for (auto kv = 0U; kv < nv; ++kv) { - auto zr = zv[kv]; - auto md = 500.0f; - for (auto zs : ev.ztrack) { - auto d = std::abs(zr - zs); - md = std::min(d, md); - } - dd[kv] = md; - } - if (i == 6) { - for (auto d : dd) - std::cout << d << ' '; - std::cout << std::endl; - } - auto mx = std::minmax_element(dd, dd + nv); - float rms = 0; - for (auto d : dd) - rms += d * d; - rms = std::sqrt(rms) / (nv - 1); - std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl; - - } // loop on events - } // lopp on ave vert - - return 0; -} diff --git a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp b/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp deleted file mode 100644 index a7906fe0d03f5..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "VertexFinder_t.h" diff --git a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu b/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu deleted file mode 100644 index a7906fe0d03f5..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu +++ /dev/null @@ -1 +0,0 @@ -#include "VertexFinder_t.h"