From 23f6a4f48ee1eb93c21ad3ab374b00a1e1b35de1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 21:10:30 +0000 Subject: [PATCH 01/17] Add IGRAPH support to CUDACPP for MLM --- .../iolibs/template_files/gpu/Bridge.h | 8 +++- .../template_files/gpu/BridgeKernels.cc | 8 ++-- .../gpu/MatrixElementKernels.cc | 4 +- .../template_files/gpu/MatrixElementKernels.h | 6 +++ .../iolibs/template_files/gpu/fbridge.cc | 8 ++-- .../iolibs/template_files/gpu/fbridge.h | 2 + .../iolibs/template_files/gpu/fbridge.inc | 9 +++- .../gpu/process_function_definitions.inc | 44 ++++++++++------- .../iolibs/template_files/gpu/process_h.inc | 2 + .../gpu/process_sigmaKin_function.inc | 47 ++++++++++++------- .../iolibs/template_files/gpu/smatrix_multi.f | 6 +-- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 2 + 12 files changed, 96 insertions(+), 50 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 4e3f17e0dd..4d06021a2e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setIgraph1( igraph1 ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setIgraph1( igraph1 ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc index 62e2c3af96..b7225c9a15 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index b61df224f1..128beadea2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h index 16f8874888..acf8f3f1ea 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setIgraph1( const int* igraph1 ) { m_igraph1 = igraph1; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setIgraph1 before computeMatrixElements) + const int* m_igraph1 = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 8b3f302975..56e94f6782 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph1, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph1, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph1, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h index 7d5014a138..c7f6c498be 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc index 5708dca15c..d6a3621c6c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH1: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH1, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH1(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH1: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH1, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH1(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index 0665bfb93b..0b2e0eb934 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -591,38 +591,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph1 = ( allIgraph1 != nullptr ) ? allIgraph1[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph1 != 0 ) // no event-by-event choice of color if both channelId and igraph1 are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1 != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -648,7 +660,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph1 are 0 (see #931) } return; } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc index 7de8886b1d..3067cc308d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc @@ -86,6 +86,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -110,6 +111,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index 4372edde52..d6a24d84a0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -70,7 +70,7 @@ gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph1, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -94,7 +94,7 @@ // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph1, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -206,25 +206,36 @@ } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph1 != nullptr ) ? allIgraph1[ievt00] : 0; // all events in SIMD page share the same igraph1 + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph1 are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 - } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -287,7 +298,7 @@ for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph1 are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f index 1151dc5a6c..0ff1b29766 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f @@ -18,7 +18,7 @@ IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH1, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) @@ -38,7 +38,7 @@ CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH1, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -46,7 +46,7 @@ STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH1, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index f2d7189ddd..ac4a3f168e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -358,6 +358,8 @@ def write_auto_dsig_file(self, writer, matrix_element, proc_id = ""): SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' From 5e0eaecfb58d403c0a3e6e1129be834e875bf8ab Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 11:43:44 +0200 Subject: [PATCH 02/17] Rename igraph1->igraph everywhere --- .../madgraph/iolibs/template_files/gpu/Bridge.h | 12 ++++++------ .../iolibs/template_files/gpu/BridgeKernels.cc | 4 ++-- .../template_files/gpu/MatrixElementKernels.cc | 4 ++-- .../iolibs/template_files/gpu/MatrixElementKernels.h | 6 +++--- .../madgraph/iolibs/template_files/gpu/fbridge.cc | 10 +++++----- .../madgraph/iolibs/template_files/gpu/fbridge.h | 6 +++--- .../madgraph/iolibs/template_files/gpu/fbridge.inc | 12 ++++++------ .../gpu/process_function_definitions.inc | 12 ++++++------ .../madgraph/iolibs/template_files/gpu/process_h.inc | 4 ++-- .../template_files/gpu/process_sigmaKin_function.inc | 10 +++++----- .../iolibs/template_files/gpu/smatrix_multi.f | 6 +++--- 11 files changed, 43 insertions(+), 43 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 4d06021a2e..9cdf2f90d1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph1, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,7 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -395,7 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; - m_pmek->setIgraph1( igraph1 ); + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -425,7 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -457,7 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; - m_pmek->setIgraph1( igraph1 ); + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc index b7225c9a15..2d46db185e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/BridgeKernels.cc @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index 128beadea2..1e7dcf38fe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph1, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h index acf8f3f1ea..9382732d9f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h @@ -47,7 +47,7 @@ namespace mg5amcCpu virtual int computeGoodHelicities() = 0; // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) - void setIgraph1( const int* igraph1 ) { m_igraph1 = igraph1; } + void setigraph( const int* igraph ) { m_igraph = igraph; } // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -87,8 +87,8 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; - // The per-event MLM graph array (nullptr = no MLM; set via setIgraph1 before computeMatrixElements) - const int* m_igraph1 = nullptr; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 56e94f6782..fea35823f5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -91,7 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -103,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph1, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph1, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -130,14 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph1, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h index c7f6c498be..b3667b03fe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h @@ -29,7 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -40,7 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, - const int* igraph1, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -48,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc index d6a3621c6c..590063408a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc @@ -37,7 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance -C - IGRAPH1: the input per-event MLM graph array (0 = no MLM graph) +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -45,7 +45,7 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, IGRAPH1, MES, SELHEL, SELCOL, + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) @@ -53,7 +53,7 @@ C DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) - INTEGER*4 IGRAPH1(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -68,7 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection -C - IGRAPH1: the input per-event MLM graph array (0 = no MLM graph) +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -76,13 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, IGRAPH1, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) - INTEGER*4 IGRAPH1(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index 0b2e0eb934..bcfbd0652f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -591,7 +591,7 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) - const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { @@ -599,9 +599,9 @@ namespace mg5amcCpu // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); // Per-event MLM graph (0 = no MLM) - const int igraph1 = ( allIgraph1 != nullptr ) ? allIgraph1[ievt] : 0; + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 || igraph1 != 0 ) // no event-by-event choice of color if both channelId and igraph1 are 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; @@ -611,9 +611,9 @@ namespace mg5amcCpu jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig int iconfig; - if( igraph1 != 0 ) + if( igraph != 0 ) { - iconfig = igraph1; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } else { @@ -660,7 +660,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph1 are 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc index 3067cc308d..de07450c31 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_h.inc @@ -86,7 +86,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) - const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -111,7 +111,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) - const int* allIgraph1, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index d6a24d84a0..aea1632410 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -70,7 +70,7 @@ gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph1, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -94,7 +94,7 @@ // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph1, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -207,8 +207,8 @@ #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph1 != nullptr ) ? allIgraph1[ievt00] : 0; // all events in SIMD page share the same igraph1 - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph1 are 0 (fix FPE #783) + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig int iconfig; @@ -298,7 +298,7 @@ for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph1 are 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f index 0ff1b29766..858052727f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/smatrix_multi.f @@ -18,7 +18,7 @@ IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH1, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) @@ -38,7 +38,7 @@ CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH1, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -46,7 +46,7 @@ STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, IGRAPH1, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 From 01ce9fd802d5579899d5bfc2507e3ab1866c7299 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 11:47:45 +0200 Subject: [PATCH 03/17] Point submodule to copilot/fix-mlm-issue-phase-space --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index bfd34580eb..723f843075 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit bfd34580eb59c2a027a502c89995e682a70a95b9 +Subproject commit 723f8430750707c35d8c64dc73732a1ae94aa5c3 From 5dbd7bf9e01b0a7d980d514fb26103f28e1df2f6 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 12:10:25 +0200 Subject: [PATCH 04/17] Regenerate processes --- .../gpu/process_function_definitions.inc | 2 +- .../gpu/process_sigmaKin_function.inc | 2 +- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 76 ++++---- .../ee_mumu.mad/Cards/me5_configuration.txt | 4 +- .../ee_mumu.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../cudacpp/ee_mumu.mad/SubProcesses/Bridge.h | 8 +- .../ee_mumu.mad/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_epem_mupmum/CPPProcess.h | 2 + .../SubProcesses/P1_epem_mupmum/auto_dsig.f | 8 +- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 18 +- .../SubProcesses/P1_epem_mupmum/matrix1.f | 2 - .../ee_mumu.mad/SubProcesses/addmothers.f | 2 +- .../ee_mumu.mad/SubProcesses/cluster.inc | 4 +- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../ee_mumu.mad/SubProcesses/fbridge.cc | 8 +- .../ee_mumu.mad/SubProcesses/fbridge.h | 4 +- .../ee_mumu.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/ee_mumu.mad/SubProcesses/myamp.f | 2 +- .../ee_mumu.mad/SubProcesses/reweight.f | 8 +- .../ee_mumu.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../ee_mumu.mad/bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/ee_mumu.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 46 ++--- .../cudacpp/ee_mumu.sa/SubProcesses/Bridge.h | 8 +- .../ee_mumu.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_epem_mupmum/CPPProcess.h | 2 + .../ee_mumu.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../ee_mumu.sa/SubProcesses/fbridge.cc | 8 +- .../cudacpp/ee_mumu.sa/SubProcesses/fbridge.h | 4 +- .../ee_mumu.sa/SubProcesses/fbridge.inc | 9 +- .../cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h | 12 +- .../cudacpp/ee_mumu.sa/test/cudacpp_test.mk | 3 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 74 +++---- .../gg_tt.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../gg_tt.mad/Source/DHELAS/aloha_functions.f | 15 -- .../cudacpp/gg_tt.mad/SubProcesses/Bridge.h | 8 +- .../gg_tt.mad/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttx/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttx/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 - .../gg_tt.mad/SubProcesses/addmothers.f | 2 +- .../gg_tt.mad/SubProcesses/cluster.inc | 4 +- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 49 ++++- .../gg_tt.mad/SubProcesses/cudacpp_overlay.mk | 3 +- .../cudacpp/gg_tt.mad/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gg_tt.mad/SubProcesses/fbridge.h | 4 +- .../gg_tt.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f | 2 +- .../cudacpp/gg_tt.mad/SubProcesses/reweight.f | 8 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../gg_tt.mad/bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h | 12 +- epochX/cudacpp/gg_tt.mad/test/cudacpp_test.mk | 3 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 46 ++--- epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h | 8 +- .../gg_tt.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gg_ttx/CPPProcess.h | 2 + .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 49 ++++- .../gg_tt.sa/SubProcesses/cudacpp_overlay.mk | 3 +- .../cudacpp/gg_tt.sa/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gg_tt.sa/SubProcesses/fbridge.h | 4 +- .../cudacpp/gg_tt.sa/SubProcesses/fbridge.inc | 9 +- epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h | 12 +- epochX/cudacpp/gg_tt.sa/test/cudacpp_test.mk | 3 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 81 ++++---- .../gg_tt01g.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt01g.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../gg_tt01g.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttx/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttx/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_gg_ttxg/CPPProcess.h | 2 + .../SubProcesses/P2_gg_ttxg/auto_dsig.f | 8 +- .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxg/matrix1.f | 2 - .../gg_tt01g.mad/SubProcesses/addmothers.f | 2 +- .../gg_tt01g.mad/SubProcesses/cluster.inc | 4 +- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_tt01g.mad/SubProcesses/fbridge.cc | 8 +- .../gg_tt01g.mad/SubProcesses/fbridge.h | 4 +- .../gg_tt01g.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/gg_tt01g.mad/SubProcesses/myamp.f | 2 +- .../gg_tt01g.mad/SubProcesses/reweight.f | 8 +- .../gg_tt01g.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_tt01g.mad/test/cudacpp_test.mk | 3 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 75 ++++---- .../gg_ttg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttg.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../cudacpp/gg_ttg.mad/SubProcesses/Bridge.h | 8 +- .../gg_ttg.mad/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttxg/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 - .../gg_ttg.mad/SubProcesses/addmothers.f | 2 +- .../gg_ttg.mad/SubProcesses/cluster.inc | 4 +- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_ttg.mad/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gg_ttg.mad/SubProcesses/fbridge.h | 4 +- .../gg_ttg.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/gg_ttg.mad/SubProcesses/myamp.f | 2 +- .../gg_ttg.mad/SubProcesses/reweight.f | 8 +- .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../gg_ttg.mad/bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_ttg.mad/test/cudacpp_test.mk | 3 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 45 ++--- .../cudacpp/gg_ttg.sa/SubProcesses/Bridge.h | 8 +- .../gg_ttg.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gg_ttxg/CPPProcess.h | 2 + .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 49 ++++- .../gg_ttg.sa/SubProcesses/cudacpp_overlay.mk | 3 +- .../cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gg_ttg.sa/SubProcesses/fbridge.h | 4 +- .../gg_ttg.sa/SubProcesses/fbridge.inc | 9 +- epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h | 12 +- epochX/cudacpp/gg_ttg.sa/test/cudacpp_test.mk | 3 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 75 ++++---- .../gg_ttgg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttgg.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h | 8 +- .../gg_ttgg.mad/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttxgg/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttxgg/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 2 - .../gg_ttgg.mad/SubProcesses/addmothers.f | 2 +- .../gg_ttgg.mad/SubProcesses/cluster.inc | 4 +- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_ttgg.mad/SubProcesses/fbridge.cc | 8 +- .../gg_ttgg.mad/SubProcesses/fbridge.h | 4 +- .../gg_ttgg.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/gg_ttgg.mad/SubProcesses/myamp.f | 2 +- .../gg_ttgg.mad/SubProcesses/reweight.f | 8 +- .../gg_ttgg.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../gg_ttgg.mad/bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_ttgg.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 45 ++--- .../cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h | 8 +- .../gg_ttgg.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.h | 2 + .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_ttgg.sa/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gg_ttgg.sa/SubProcesses/fbridge.h | 4 +- .../gg_ttgg.sa/SubProcesses/fbridge.inc | 9 +- .../cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_ttgg.sa/test/cudacpp_test.mk | 3 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 77 ++++---- .../gg_ttggg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttggg.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../gg_ttggg.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttxggg/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttxggg/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 2 - .../gg_ttggg.mad/SubProcesses/addmothers.f | 2 +- .../gg_ttggg.mad/SubProcesses/cluster.inc | 4 +- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_ttggg.mad/SubProcesses/fbridge.cc | 8 +- .../gg_ttggg.mad/SubProcesses/fbridge.h | 4 +- .../gg_ttggg.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/gg_ttggg.mad/SubProcesses/myamp.f | 2 +- .../gg_ttggg.mad/SubProcesses/reweight.f | 8 +- .../gg_ttggg.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_ttggg.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 45 ++--- .../cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h | 8 +- .../gg_ttggg.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.h | 2 + .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gg_ttggg.sa/SubProcesses/fbridge.cc | 8 +- .../gg_ttggg.sa/SubProcesses/fbridge.h | 4 +- .../gg_ttggg.sa/SubProcesses/fbridge.inc | 9 +- .../cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gg_ttggg.sa/test/cudacpp_test.mk | 3 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 81 ++++---- .../gq_ttq.mad/Cards/me5_configuration.txt | 4 +- .../gq_ttq.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../cudacpp/gq_ttq.mad/SubProcesses/Bridge.h | 8 +- .../gq_ttq.mad/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gu_ttxu/CPPProcess.h | 2 + .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 8 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 18 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 - .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gux_ttxux/CPPProcess.h | 2 + .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 8 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 18 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 - .../gq_ttq.mad/SubProcesses/addmothers.f | 2 +- .../gq_ttq.mad/SubProcesses/cluster.inc | 4 +- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../gq_ttq.mad/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gq_ttq.mad/SubProcesses/fbridge.h | 4 +- .../gq_ttq.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../cudacpp/gq_ttq.mad/SubProcesses/myamp.f | 2 +- .../gq_ttq.mad/SubProcesses/reweight.f | 8 +- .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../gq_ttq.mad/bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h | 12 +- .../cudacpp/gq_ttq.mad/test/cudacpp_test.mk | 3 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 53 ++--- .../cudacpp/gq_ttq.sa/SubProcesses/Bridge.h | 8 +- .../gq_ttq.sa/SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gu_ttxu/CPPProcess.h | 2 + .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 91 +++++---- .../P1_Sigma_sm_gux_ttxux/CPPProcess.h | 2 + .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 49 ++++- .../gq_ttq.sa/SubProcesses/cudacpp_overlay.mk | 3 +- .../cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc | 8 +- .../cudacpp/gq_ttq.sa/SubProcesses/fbridge.h | 4 +- .../gq_ttq.sa/SubProcesses/fbridge.inc | 9 +- epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h | 12 +- epochX/cudacpp/gq_ttq.sa/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 76 ++++---- .../Cards/me5_configuration.txt | 4 +- .../heft_gg_bb.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../heft_gg_bb.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_bbx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_bbx/CPPProcess.h | 2 + .../SubProcesses/P1_gg_bbx/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_bbx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_bbx/matrix1.f | 2 - .../heft_gg_bb.mad/SubProcesses/addmothers.f | 2 +- .../heft_gg_bb.mad/SubProcesses/cluster.inc | 4 +- .../heft_gg_bb.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../heft_gg_bb.mad/SubProcesses/fbridge.cc | 8 +- .../heft_gg_bb.mad/SubProcesses/fbridge.h | 4 +- .../heft_gg_bb.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../heft_gg_bb.mad/SubProcesses/myamp.f | 2 +- .../heft_gg_bb.mad/SubProcesses/reweight.f | 8 +- .../heft_gg_bb.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../heft_gg_bb.mad/src/mgOnGpuVectors.h | 12 +- .../heft_gg_bb.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 57 +++--- .../heft_gg_bb.sa/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_heft_gg_bbx/CPPProcess.cc | 91 +++++---- .../P1_Sigma_heft_gg_bbx/CPPProcess.h | 2 + .../heft_gg_bb.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../heft_gg_bb.sa/SubProcesses/fbridge.cc | 8 +- .../heft_gg_bb.sa/SubProcesses/fbridge.h | 4 +- .../heft_gg_bb.sa/SubProcesses/fbridge.inc | 9 +- .../heft_gg_bb.sa/src/mgOnGpuVectors.h | 12 +- .../heft_gg_bb.sa/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 119 ++++++------ .../Cards/me5_configuration.txt | 4 +- .../nobm_pp_ttW.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../nobm_pp_ttW.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P0_dux_ttxwm/CPPProcess.cc | 91 +++++---- .../SubProcesses/P0_dux_ttxwm/CPPProcess.h | 2 + .../SubProcesses/P0_dux_ttxwm/auto_dsig.f | 8 +- .../SubProcesses/P0_dux_ttxwm/auto_dsig1.f | 18 +- .../SubProcesses/P0_dux_ttxwm/matrix1.f | 2 - .../SubProcesses/P0_udx_ttxwp/CPPProcess.cc | 91 +++++---- .../SubProcesses/P0_udx_ttxwp/CPPProcess.h | 2 + .../SubProcesses/P0_udx_ttxwp/auto_dsig.f | 8 +- .../SubProcesses/P0_udx_ttxwp/auto_dsig1.f | 18 +- .../SubProcesses/P0_udx_ttxwp/matrix1.f | 2 - .../SubProcesses/P1_dux_ttxwmg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_dux_ttxwmg/CPPProcess.h | 2 + .../SubProcesses/P1_dux_ttxwmg/auto_dsig.f | 8 +- .../SubProcesses/P1_dux_ttxwmg/auto_dsig1.f | 18 +- .../SubProcesses/P1_dux_ttxwmg/matrix1.f | 2 - .../SubProcesses/P1_gd_ttxwmu/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gd_ttxwmu/CPPProcess.h | 2 + .../SubProcesses/P1_gd_ttxwmu/auto_dsig.f | 8 +- .../SubProcesses/P1_gd_ttxwmu/auto_dsig1.f | 18 +- .../SubProcesses/P1_gd_ttxwmu/matrix1.f | 2 - .../SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gdx_ttxwpux/CPPProcess.h | 2 + .../SubProcesses/P1_gdx_ttxwpux/auto_dsig.f | 8 +- .../SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f | 18 +- .../SubProcesses/P1_gdx_ttxwpux/matrix1.f | 2 - .../SubProcesses/P1_gu_ttxwpd/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gu_ttxwpd/CPPProcess.h | 2 + .../SubProcesses/P1_gu_ttxwpd/auto_dsig.f | 8 +- .../SubProcesses/P1_gu_ttxwpd/auto_dsig1.f | 18 +- .../SubProcesses/P1_gu_ttxwpd/matrix1.f | 2 - .../SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gux_ttxwmdx/CPPProcess.h | 2 + .../SubProcesses/P1_gux_ttxwmdx/auto_dsig.f | 8 +- .../SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gux_ttxwmdx/matrix1.f | 2 - .../SubProcesses/P1_udx_ttxwpg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_udx_ttxwpg/CPPProcess.h | 2 + .../SubProcesses/P1_udx_ttxwpg/auto_dsig.f | 8 +- .../SubProcesses/P1_udx_ttxwpg/auto_dsig1.f | 18 +- .../SubProcesses/P1_udx_ttxwpg/matrix1.f | 2 - .../nobm_pp_ttW.mad/SubProcesses/addmothers.f | 2 +- .../nobm_pp_ttW.mad/SubProcesses/cluster.inc | 4 +- .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../nobm_pp_ttW.mad/SubProcesses/fbridge.cc | 8 +- .../nobm_pp_ttW.mad/SubProcesses/fbridge.h | 4 +- .../nobm_pp_ttW.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../nobm_pp_ttW.mad/SubProcesses/myamp.f | 2 +- .../nobm_pp_ttW.mad/SubProcesses/reweight.f | 8 +- .../nobm_pp_ttW.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../nobm_pp_ttW.mad/src/mgOnGpuVectors.h | 12 +- .../nobm_pp_ttW.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_pp_tt012j_log.txt | 182 +++++++++--------- .../pp_tt012j.mad/Cards/me5_configuration.txt | 4 +- .../pp_tt012j.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../pp_tt012j.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P0_gg_ttx/CPPProcess.h | 2 + .../SubProcesses/P0_gg_ttx/auto_dsig.f | 8 +- .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P0_gg_ttx/matrix1.f | 2 - .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P0_uux_ttx/CPPProcess.h | 2 + .../SubProcesses/P0_uux_ttx/auto_dsig.f | 8 +- .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P0_uux_ttx/matrix1.f | 2 - .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttxg/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 - .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gu_ttxu/CPPProcess.h | 2 + .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 8 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 18 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 - .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gux_ttxux/CPPProcess.h | 2 + .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 8 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 18 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 - .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_uux_ttxg/CPPProcess.h | 2 + .../SubProcesses/P1_uux_ttxg/auto_dsig.f | 8 +- .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_uux_ttxg/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_gg_ttxgg/CPPProcess.h | 2 + .../SubProcesses/P2_gg_ttxgg/auto_dsig.f | 8 +- .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_gg_ttxuux/CPPProcess.h | 2 + .../SubProcesses/P2_gg_ttxuux/auto_dsig.f | 8 +- .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 2 - .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_gu_ttxgu/CPPProcess.h | 2 + .../SubProcesses/P2_gu_ttxgu/auto_dsig.f | 8 +- .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 18 +- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 2 - .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_gux_ttxgux/CPPProcess.h | 2 + .../SubProcesses/P2_gux_ttxgux/auto_dsig.f | 8 +- .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 18 +- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 2 - .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uc_ttxuc/CPPProcess.h | 2 + .../SubProcesses/P2_uc_ttxuc/auto_dsig.f | 8 +- .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 18 +- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 2 - .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.h | 2 + .../SubProcesses/P2_ucx_ttxucx/auto_dsig.f | 8 +- .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 18 +- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 2 - .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uu_ttxuu/CPPProcess.h | 2 + .../SubProcesses/P2_uu_ttxuu/auto_dsig.f | 8 +- .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 18 +- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uux_ttxccx/CPPProcess.h | 2 + .../SubProcesses/P2_uux_ttxccx/auto_dsig.f | 8 +- .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 18 +- .../SubProcesses/P2_uux_ttxccx/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uux_ttxgg/CPPProcess.h | 2 + .../SubProcesses/P2_uux_ttxgg/auto_dsig.f | 8 +- .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P2_uux_ttxgg/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uux_ttxuux/CPPProcess.h | 2 + .../SubProcesses/P2_uux_ttxuux/auto_dsig.f | 8 +- .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 18 +- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 2 - .../P2_uxcx_ttxuxcx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h | 2 + .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f | 8 +- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 18 +- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 2 - .../P2_uxux_ttxuxux/CPPProcess.cc | 91 +++++---- .../SubProcesses/P2_uxux_ttxuxux/CPPProcess.h | 2 + .../SubProcesses/P2_uxux_ttxuxux/auto_dsig.f | 8 +- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 18 +- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 2 - .../pp_tt012j.mad/SubProcesses/addmothers.f | 2 +- .../pp_tt012j.mad/SubProcesses/cluster.inc | 4 +- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../pp_tt012j.mad/SubProcesses/fbridge.cc | 8 +- .../pp_tt012j.mad/SubProcesses/fbridge.h | 4 +- .../pp_tt012j.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../pp_tt012j.mad/SubProcesses/myamp.f | 2 +- .../pp_tt012j.mad/SubProcesses/reweight.f | 8 +- .../pp_tt012j.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../bin/internal/ufomodel/py3_model_FDG.pkl | Bin 49027 -> 0 bytes .../internal/ufomodel/py3_model_Feynman.pkl | Bin 42837 -> 0 bytes .../pp_tt012j.mad/src/mgOnGpuVectors.h | 12 +- .../pp_tt012j.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 77 ++++---- .../Cards/me5_configuration.txt | 4 +- .../smeft_gg_tttt.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../smeft_gg_tttt.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttxttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttxttx/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttxttx/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttxttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxttx/matrix1.f | 2 - .../SubProcesses/addmothers.f | 2 +- .../SubProcesses/cluster.inc | 4 +- .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../smeft_gg_tttt.mad/SubProcesses/fbridge.cc | 8 +- .../smeft_gg_tttt.mad/SubProcesses/fbridge.h | 4 +- .../SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../smeft_gg_tttt.mad/SubProcesses/myamp.f | 2 +- .../smeft_gg_tttt.mad/SubProcesses/reweight.f | 8 +- .../smeft_gg_tttt.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../smeft_gg_tttt.mad/src/mgOnGpuVectors.h | 12 +- .../smeft_gg_tttt.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 59 +++--- .../smeft_gg_tttt.sa/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../CPPProcess.cc | 91 +++++---- .../CPPProcess.h | 2 + .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../smeft_gg_tttt.sa/SubProcesses/fbridge.cc | 8 +- .../smeft_gg_tttt.sa/SubProcesses/fbridge.h | 4 +- .../smeft_gg_tttt.sa/SubProcesses/fbridge.inc | 9 +- .../smeft_gg_tttt.sa/src/mgOnGpuVectors.h | 12 +- .../smeft_gg_tttt.sa/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 73 +++---- .../Cards/me5_configuration.txt | 4 +- .../susy_gg_t1t1.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../susy_gg_t1t1.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_t1t1x/CPPProcess.h | 2 + .../SubProcesses/P1_gg_t1t1x/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_t1t1x/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_t1t1x/matrix1.f | 2 - .../SubProcesses/addmothers.f | 2 +- .../susy_gg_t1t1.mad/SubProcesses/cluster.inc | 4 +- .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../susy_gg_t1t1.mad/SubProcesses/fbridge.cc | 8 +- .../susy_gg_t1t1.mad/SubProcesses/fbridge.h | 4 +- .../susy_gg_t1t1.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../susy_gg_t1t1.mad/SubProcesses/myamp.f | 2 +- .../susy_gg_t1t1.mad/SubProcesses/reweight.f | 8 +- .../susy_gg_t1t1.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../susy_gg_t1t1.mad/src/mgOnGpuVectors.h | 12 +- .../susy_gg_t1t1.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 43 +++-- .../susy_gg_t1t1.sa/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../CPPProcess.cc | 91 +++++---- .../P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.h | 2 + .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../susy_gg_t1t1.sa/SubProcesses/fbridge.cc | 8 +- .../susy_gg_t1t1.sa/SubProcesses/fbridge.h | 4 +- .../susy_gg_t1t1.sa/SubProcesses/fbridge.inc | 9 +- .../susy_gg_t1t1.sa/src/mgOnGpuVectors.h | 12 +- .../susy_gg_t1t1.sa/test/cudacpp_test.mk | 3 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 73 +++---- .../Cards/me5_configuration.txt | 4 +- .../susy_gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../Source/DHELAS/aloha_functions.f | 15 -- .../susy_gg_tt.mad/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 91 +++++---- .../SubProcesses/P1_gg_ttx/CPPProcess.h | 2 + .../SubProcesses/P1_gg_ttx/auto_dsig.f | 8 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 - .../susy_gg_tt.mad/SubProcesses/addmothers.f | 2 +- .../susy_gg_tt.mad/SubProcesses/cluster.inc | 4 +- .../susy_gg_tt.mad/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../susy_gg_tt.mad/SubProcesses/fbridge.cc | 8 +- .../susy_gg_tt.mad/SubProcesses/fbridge.h | 4 +- .../susy_gg_tt.mad/SubProcesses/fbridge.inc | 9 +- .../SubProcesses/makefile_original.mk | 5 +- .../susy_gg_tt.mad/SubProcesses/myamp.f | 2 +- .../susy_gg_tt.mad/SubProcesses/reweight.f | 8 +- .../susy_gg_tt.mad/bin/internal/banner.py | 49 ++--- .../bin/internal/common_run_interface.py | 6 +- .../bin/internal/launch_plugin.py | 19 +- .../susy_gg_tt.mad/src/mgOnGpuVectors.h | 12 +- .../susy_gg_tt.mad/test/cudacpp_test.mk | 3 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 46 ++--- .../susy_gg_tt.sa/SubProcesses/Bridge.h | 8 +- .../SubProcesses/BridgeKernels.cc | 8 +- .../SubProcesses/MatrixElementKernels.cc | 4 +- .../SubProcesses/MatrixElementKernels.h | 6 + .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc | 91 +++++---- .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.h | 2 + .../susy_gg_tt.sa/SubProcesses/cudacpp.mk | 49 ++++- .../SubProcesses/cudacpp_overlay.mk | 3 +- .../susy_gg_tt.sa/SubProcesses/fbridge.cc | 8 +- .../susy_gg_tt.sa/SubProcesses/fbridge.h | 4 +- .../susy_gg_tt.sa/SubProcesses/fbridge.inc | 9 +- .../susy_gg_tt.sa/src/mgOnGpuVectors.h | 12 +- .../susy_gg_tt.sa/test/cudacpp_test.mk | 3 +- 656 files changed, 6822 insertions(+), 4678 deletions(-) delete mode 100644 epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_Feynman.pkl delete mode 100644 epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_FDG.pkl delete mode 100644 epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_Feynman.pkl diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index bcfbd0652f..6140ddc4b0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -591,7 +591,7 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) - const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index aea1632410..57c8d7fab5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -208,7 +208,7 @@ // Event-by-event random choice of color #402 // Use per-event MLM graph if provided, otherwise use channel2iconfig const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig int iconfig; diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f8930a863f..3ce3428de8 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.5544004440307617) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0051648616790771484  +DEBUG: model prefixing takes 0.008105278015136719  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -147,7 +149,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.002 s +1 processes with 2 diagrams generated in 0.014 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -158,10 +160,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vecto INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -173,22 +175,22 @@ FileWriter mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum -DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s -Wrote files for 8 helas calls in 0.285 s +DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (2 diagrams) in 0.009 s +Wrote files for 8 helas calls in 0.185 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.122 s +ALOHA: aloha creates 3 routines in 0.353 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.152 s +ALOHA: aloha creates 7 routines in 0.374 s FFV1 FFV1 FFV2 @@ -197,32 +199,32 @@ ALOHA: aloha creates 7 routines in 0.152 s FFV4 FFV2_4 FFV2_4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m4.542s -user 0m1.246s -sys 0m0.587s -Code generation completed in 5 seconds +real 0m5.692s +user 0m4.638s +sys 0m0.843s +Code generation completed in 6 seconds ************************************************************ * * * W E L C O M E to * @@ -243,10 +245,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -273,10 +275,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index 7aed5df7db..5c3cdba491 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 1c6406a546..4a7ad99d1d 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -926,38 +926,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -983,7 +995,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1118,7 +1130,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1142,7 +1154,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1254,25 +1266,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1335,7 +1358,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h index 1469ba9333..b590074a0a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f index 78c4e66a95..6dbcbc178f 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 03db576967..035ebb9a2e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -349,6 +349,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -359,6 +362,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -461,7 +465,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -534,7 +538,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -547,6 +551,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -565,6 +571,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -616,7 +624,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -640,7 +648,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -648,7 +656,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index 15e4d1a8a2..8d9020151c 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -342,8 +342,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/addmothers.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.inc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/myamp.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/ee_mumu.mad/test/cudacpp_test.mk b/epochX/cudacpp/ee_mumu.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/ee_mumu.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/ee_mumu.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index bdea67b952..cea3cd6aff 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.5546493530273438) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005957365036010742  +DEBUG: model prefixing takes 0.010983943939208984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -147,13 +149,13 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.002 s +1 processes with 2 diagrams generated in 0.009 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -162,17 +164,17 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.002 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +Generated helas calls for 1 subprocesses (2 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.171 s +ALOHA: aloha creates 4 routines in 0.399 s FFV1 FFV1 FFV2 @@ -181,17 +183,17 @@ ALOHA: aloha creates 4 routines in 0.171 s FFV4 FFV2_4 FFV2_4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m1.151s -user 0m0.372s -sys 0m0.155s -Code generation completed in 1 seconds +real 0m1.581s +user 0m1.308s +sys 0m0.223s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 22cb8c2604..358c2d341e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -924,38 +924,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -981,7 +993,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1116,7 +1128,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1140,7 +1152,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1252,25 +1264,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1333,7 +1356,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h index 1469ba9333..b590074a0a 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/ee_mumu.sa/test/cudacpp_test.mk b/epochX/cudacpp/ee_mumu.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/ee_mumu.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/ee_mumu.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index dbae24afe0..f64815a8e5 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.5098216533660889) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005540609359741211  +DEBUG: model prefixing takes 0.007402896881103516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.004 s +1 processes with 3 diagrams generated in 0.018 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -159,10 +161,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_ INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -174,48 +176,48 @@ FileWriter t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.004 s -Wrote files for 10 helas calls in 0.266 s +DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (3 diagrams) in 0.012 s +Wrote files for 10 helas calls in 0.199 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.089 s +ALOHA: aloha creates 2 routines in 0.264 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.093 s +ALOHA: aloha creates 4 routines in 0.250 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m4.687s -user 0m1.163s -sys 0m0.619s +real 0m5.071s +user 0m4.130s +sys 0m0.804s Code generation completed in 5 seconds ************************************************************ * * @@ -237,10 +239,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -267,10 +269,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index 8b331b055f..4ac8928826 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 091fecf10e..8a87b6ffd2 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -939,38 +939,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -996,7 +1008,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1131,7 +1143,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1155,7 +1167,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1267,25 +1279,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1348,7 +1371,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 3c5f6fe31f..b3c3d0ffb4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 7f809ad0ff..cbe257bc8a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index a68aa6e4c0..b79f45da06 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index b47f79aa45..6ddd16cc10 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -324,8 +324,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_tt.mad/test/cudacpp_test.mk b/epochX/cudacpp/gg_tt.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_tt.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_tt.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 20cc72fd46..6f8f43751d 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.5027971267700195) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00434565544128418  +DEBUG: model prefixing takes 0.010428905487060547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,13 +150,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.004 s +1 processes with 3 diagrams generated in 0.029 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -163,30 +165,30 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.004 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +Generated helas calls for 1 subprocesses (3 diagrams) in 0.021 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.091 s +ALOHA: aloha creates 2 routines in 0.362 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.992s -user 0m0.334s -sys 0m0.123s -Code generation completed in 1 seconds +real 0m1.704s +user 0m1.443s +sys 0m0.212s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 61e6f0c54c..2637b5b7b3 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -936,38 +936,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -993,7 +1005,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1128,7 +1140,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1152,7 +1164,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1264,25 +1276,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1345,7 +1368,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h index 3c5f6fe31f..b3c3d0ffb4 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_tt.sa/test/cudacpp_test.mk b/epochX/cudacpp/gg_tt.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_tt.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_tt.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 332a0806f1..9923650dee 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005877494812011719  +DEBUG: model prefixing takes 0.005435466766357422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.004 s +1 processes with 3 diagrams generated in 0.013 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -156,7 +157,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.010 s +1 processes with 16 diagrams generated in 0.031 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -167,10 +168,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vect INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 @@ -184,9 +185,9 @@ FileWriter t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -195,25 +196,25 @@ FileWriter t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.023 s -Wrote files for 46 helas calls in 0.502 s +DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  +Generated helas calls for 2 subprocesses (19 diagrams) in 0.077 s +Wrote files for 46 helas calls in 0.331 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.190 s +ALOHA: aloha creates 5 routines in 0.440 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.187 s +ALOHA: aloha creates 10 routines in 0.368 s VVV1 VVV1 FFV1 @@ -223,31 +224,31 @@ ALOHA: aloha creates 10 routines in 0.187 s VVVV1 VVVV3 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m5.233s -user 0m1.496s -sys 0m0.718s +real 0m4.708s +user 0m3.946s +sys 0m0.658s Code generation completed in 5 seconds ************************************************************ * * @@ -269,10 +270,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -299,10 +300,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index 30bd3794c3..e08e065c5d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 091fecf10e..8a87b6ffd2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -939,38 +939,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -996,7 +1008,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1131,7 +1143,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1155,7 +1167,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1267,25 +1279,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1348,7 +1371,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 3c5f6fe31f..b3c3d0ffb4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 7f809ad0ff..cbe257bc8a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index a68aa6e4c0..b79f45da06 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index b47f79aa45..6ddd16cc10 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -324,8 +324,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index ce41e289c6..10c348765c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -1156,38 +1156,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1213,7 +1225,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1348,7 +1360,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1372,7 +1384,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1484,25 +1496,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1565,7 +1588,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h index 44f2636937..d248effd6c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f index ca0da2991e..b0bbdf17fb 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index a43968abf6..78dfad938f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index 3ed3e82f91..44078e305d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -340,8 +340,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_tt01g.mad/test/cudacpp_test.mk b/epochX/cudacpp/gg_tt01g.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_tt01g.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_tt01g.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index b836987bc5..3ba90dabe6 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00551295280456543  +DEBUG: model prefixing takes 0.006871461868286133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.012 s +1 processes with 16 diagrams generated in 0.037 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -159,10 +160,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -174,25 +175,25 @@ FileWriter t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.046 s -Wrote files for 36 helas calls in 0.368 s +DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.074 s +Wrote files for 36 helas calls in 0.281 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.190 s +ALOHA: aloha creates 5 routines in 0.501 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.194 s +ALOHA: aloha creates 10 routines in 0.441 s VVV1 VVV1 FFV1 @@ -202,32 +203,32 @@ ALOHA: aloha creates 10 routines in 0.194 s VVVV1 VVVV3 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m4.945s -user 0m1.513s -sys 0m0.678s -Code generation completed in 5 seconds +real 0m5.750s +user 0m4.686s +sys 0m0.918s +Code generation completed in 6 seconds ************************************************************ * * * W E L C O M E to * @@ -248,10 +249,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -278,10 +279,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index 0fe3df08d4..f2554dda03 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 0726e0a6ea..2bcaa70441 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1156,38 +1156,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1213,7 +1225,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1348,7 +1360,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1372,7 +1384,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1484,25 +1496,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1565,7 +1588,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h index 5c057176f6..6ad3c7dd1e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index ebf5273614..5b885a4dac 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c32cb4d43c..1621d47cbc 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 6724cffa4b..186fa86cc3 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -340,8 +340,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttg.mad/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttg.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttg.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttg.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index ba99f30bdf..4be597d33d 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005433082580566406  +DEBUG: model prefixing takes 0.00689697265625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,13 +149,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.012 s +1 processes with 16 diagrams generated in 0.049 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -163,18 +164,18 @@ INFO: Processing color information for process: g g > t t~ g @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.045 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +Generated helas calls for 1 subprocesses (16 diagrams) in 0.066 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.206 s +ALOHA: aloha creates 5 routines in 0.484 s VVV1 VVV1 FFV1 @@ -184,17 +185,17 @@ ALOHA: aloha creates 5 routines in 0.206 s VVVV1 VVVV3 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m1.176s -user 0m0.468s -sys 0m0.131s -Code generation completed in 1 seconds +real 0m1.660s +user 0m1.379s +sys 0m0.231s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 372ced5d87..028e8696cc 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -1150,38 +1150,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1207,7 +1219,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1342,7 +1354,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1366,7 +1378,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1478,25 +1490,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1559,7 +1582,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h index 5c057176f6..6ad3c7dd1e 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttg.sa/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttg.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttg.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttg.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index ea9db152a3..28fded2fac 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004921674728393555  +DEBUG: model prefixing takes 0.009834766387939453  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.080 s +1 processes with 123 diagrams generated in 0.348 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -159,10 +160,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vecto INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -174,25 +175,25 @@ FileWriter t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.223 s -Wrote files for 222 helas calls in 0.654 s +DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.955 s +Wrote files for 222 helas calls in 1.123 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.219 s +ALOHA: aloha creates 5 routines in 0.532 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.197 s +ALOHA: aloha creates 10 routines in 0.568 s VVV1 VVV1 FFV1 @@ -205,32 +206,32 @@ ALOHA: aloha creates 10 routines in 0.197 s VVVV3 VVVV4 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m5.675s -user 0m2.118s -sys 0m0.681s -Code generation completed in 6 seconds +real 0m8.954s +user 0m7.850s +sys 0m0.859s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * @@ -251,10 +252,10 @@ Code generation completed in 6 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -281,10 +282,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index 5fe0cb01be..a6eb18aa10 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 0f6ddcae67..1e7036acd0 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -3084,38 +3084,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3141,7 +3153,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -3276,7 +3288,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -3300,7 +3312,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -3412,25 +3424,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3493,7 +3516,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h index 96f4a4724c..6ef3863ae3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f index 6a61beea31..6913e66444 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 0f7fcaa25f..539796fad6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index b173f22bfc..c45d039de2 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -372,8 +372,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttgg.mad/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttgg.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttgg.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttgg.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 7ff994126b..993b7c15cb 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003983020782470703  +DEBUG: model prefixing takes 0.008510351181030273  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,13 +149,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.081 s +1 processes with 123 diagrams generated in 0.349 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -163,18 +164,18 @@ INFO: Processing color information for process: g g > t t~ g g @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.216 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +Generated helas calls for 1 subprocesses (123 diagrams) in 0.757 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.204 s +ALOHA: aloha creates 5 routines in 0.622 s VVV1 VVV1 FFV1 @@ -187,17 +188,17 @@ ALOHA: aloha creates 5 routines in 0.204 s VVVV3 VVVV4 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.544s -user 0m0.774s -sys 0m0.144s -Code generation completed in 2 seconds +real 0m3.239s +user 0m2.961s +sys 0m0.180s +Code generation completed in 3 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 08a537c1f2..e1e3f4b970 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -3141,38 +3141,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3198,7 +3210,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -3333,7 +3345,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -3357,7 +3369,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -3469,25 +3481,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3550,7 +3573,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h index 96f4a4724c..6ef3863ae3 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttgg.sa/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttgg.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttgg.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttgg.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index ebb525b6f1..59080caa88 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0036034584045410156  +DEBUG: model prefixing takes 0.009574413299560547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 0.963 s +1 processes with 1240 diagrams generated in 4.089 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -159,16 +160,16 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vect INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 3s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 16s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h @@ -176,25 +177,25 @@ FileWriter t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 3.355 s -Wrote files for 2281 helas calls in 9.598 s +DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 17.014 s +Wrote files for 2281 helas calls in 38.104 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.194 s +ALOHA: aloha creates 5 routines in 0.672 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.231 s +ALOHA: aloha creates 10 routines in 0.557 s VVV1 VVV1 FFV1 @@ -207,32 +208,32 @@ ALOHA: aloha creates 10 routines in 0.231 s VVVV3 VVVV4 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m20.546s -user 0m16.458s -sys 0m0.884s -Code generation completed in 20 seconds +real 1m14.494s +user 1m10.674s +sys 0m2.006s +Code generation completed in 74 seconds ************************************************************ * * * W E L C O M E to * @@ -253,10 +254,10 @@ Code generation completed in 20 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -283,10 +284,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index 08a07273bc..bb850487c9 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index 148ad48435..764a22ed2f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -30655,38 +30655,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -30712,7 +30724,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -30847,7 +30859,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -30871,7 +30883,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -30983,25 +30995,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -31064,7 +31087,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h index 75c52ba31a..fbe1065f6b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f index 95f2b50e68..fc4a203533 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 14d6ca8aa6..58e121be6e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index ff1a367151..75d6f7a707 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -436,8 +436,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttggg.mad/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttggg.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttggg.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttggg.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 66cd67a19b..e3ac0cd576 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -55,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0029516220092773438  +DEBUG: model prefixing takes 0.00561833381652832  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,13 +149,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 0.953 s +1 processes with 1240 diagrams generated in 4.382 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 @@ -163,18 +164,18 @@ INFO: Processing color information for process: g g > t t~ g g g @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 3.379 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +Generated helas calls for 1 subprocesses (1240 diagrams) in 14.358 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.214 s +ALOHA: aloha creates 5 routines in 0.888 s VVV1 VVV1 FFV1 @@ -187,17 +188,17 @@ ALOHA: aloha creates 5 routines in 0.214 s VVVV3 VVVV4 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m7.419s -user 0m6.626s -sys 0m0.185s -Code generation completed in 7 seconds +real 0m31.360s +user 0m29.999s +sys 0m0.614s +Code generation completed in 31 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 6b89d18559..eefc81a09f 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -32545,38 +32545,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -32602,7 +32614,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -32737,7 +32749,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -32761,7 +32773,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -32873,25 +32885,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -32954,7 +32977,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h index 75c52ba31a..fbe1065f6b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gg_ttggg.sa/test/cudacpp_test.mk b/epochX/cudacpp/gg_ttggg.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gg_ttggg.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/gg_ttggg.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 248fa16d65..1f2a00cc6e 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -54,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052111148834228516  +DEBUG: model prefixing takes 0.005351066589355469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.041 s +8 processes with 40 diagrams generated in 0.229 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -174,10 +175,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -197,9 +198,9 @@ FileWriter t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -208,50 +209,50 @@ FileWriter t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.017 s -Wrote files for 32 helas calls in 0.625 s +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  +Generated helas calls for 2 subprocesses (10 diagrams) in 0.067 s +Wrote files for 32 helas calls in 0.549 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.094 s +ALOHA: aloha creates 2 routines in 0.278 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.080 s +ALOHA: aloha creates 4 routines in 0.292 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m5.076s -user 0m1.391s -sys 0m0.672s -Code generation completed in 5 seconds +real 0m6.914s +user 0m5.669s +sys 0m1.040s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * @@ -272,10 +273,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -302,10 +303,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index aba2f10b06..2ebb2fe196 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 787b72a15b..7382e1b70e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -994,38 +994,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1051,7 +1063,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1186,7 +1198,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1210,7 +1222,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1322,25 +1334,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1403,7 +1426,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h index ebc491b00d..ab9d7dde82 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 4595d5a38e..abfeda5bd0 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 0f523f574b..13398bd74e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index 90ac031008..45a46d5129 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -361,8 +361,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index e2c28c73eb..c62d49022d 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -994,38 +994,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1051,7 +1063,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1186,7 +1198,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1210,7 +1222,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1322,25 +1334,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1403,7 +1426,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h index 2c3a739550..55c42cb947 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index e239a05794..b3ffe0f7cf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 7240e416ab..9dfed40308 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index aa0f9bedff..7245268851 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -361,8 +361,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/addmothers.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.inc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/myamp.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gq_ttq.mad/test/cudacpp_test.mk b/epochX/cudacpp/gq_ttq.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gq_ttq.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/gq_ttq.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index e76b814911..db06660f7c 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -54,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005757570266723633  +DEBUG: model prefixing takes 0.009302139282226562  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,13 +164,13 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.040 s +8 processes with 40 diagrams generated in 0.148 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -186,40 +187,40 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 222]  DEBUG: type(subproc_group)= [output.py at line 223]  DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=1 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.016 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +Generated helas calls for 2 subprocesses (10 diagrams) in 0.065 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.090 s +ALOHA: aloha creates 2 routines in 0.226 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m1.337s -user 0m0.375s -sys 0m0.160s -Code generation completed in 2 seconds +real 0m1.590s +user 0m1.312s +sys 0m0.233s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index eea3950214..95bddd1643 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -989,38 +989,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1046,7 +1058,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1181,7 +1193,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1205,7 +1217,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1317,25 +1329,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1398,7 +1421,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h index ebc491b00d..ab9d7dde82 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index bb8b2f2773..3a5fa5afe7 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -989,38 +989,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1046,7 +1058,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1181,7 +1193,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1205,7 +1217,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1317,25 +1329,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1398,7 +1421,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h index 2c3a739550..55c42cb947 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/gq_ttq.sa/test/cudacpp_test.mk b/epochX/cudacpp/gq_ttq.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/gq_ttq.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/gq_ttq.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index f374f8f313..a5473b9464 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.605353593826294) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,22 +39,23 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  @@ -120,7 +122,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.003 s +1 processes with 4 diagrams generated in 0.019 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -131,10 +133,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --ve INFO: initialize a new directory: CODEGEN_mad_heft_gg_bb INFO: remove old information in CODEGEN_mad_heft_gg_bb DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 @@ -146,55 +148,55 @@ FileWriter b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx -DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (4 diagrams) in 0.005 s -Wrote files for 12 helas calls in 0.268 s +DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (4 diagrams) in 0.021 s +Wrote files for 12 helas calls in 0.277 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.159 s +ALOHA: aloha creates 4 routines in 0.605 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.152 s +ALOHA: aloha creates 8 routines in 0.446 s VVS3 VVV1 FFV1 FFV1 FFV1 FFS2 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README Run "open index.html" to see more information about this process. quit -real 0m4.654s -user 0m1.223s -sys 0m0.605s -Code generation completed in 5 seconds +real 0m6.546s +user 0m5.426s +sys 0m0.926s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * @@ -215,10 +217,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -245,10 +247,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat index 84c16b4cf4..16084f30b2 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc index c32c974cc1..dd67bb37d5 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc @@ -953,38 +953,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1010,7 +1022,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1145,7 +1157,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1169,7 +1181,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1281,25 +1293,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1362,7 +1385,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h index 543e74fad7..8e08d92d87 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f index 785453cfcf..19da4f5d75 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f index fc8effb6b2..81f64c5619 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f index 66966ada1a..7395a3966a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f @@ -324,8 +324,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/addmothers.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.inc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/myamp.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py +++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/test/cudacpp_test.mk b/epochX/cudacpp/heft_gg_bb.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index e04a2da479..905af14e1e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,34 +38,35 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft -INFO: download model from https://madgraph.mi.infn.it/Downloads/models/heft.tgz to the following directory: /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/models  ---2026-03-10 10:38:21-- https://madgraph.mi.infn.it/Downloads/models/heft.tgz -Resolving madgraph.mi.infn.it (madgraph.mi.infn.it)... 192.135.21.75 -Connecting to madgraph.mi.infn.it (madgraph.mi.infn.it)|192.135.21.75|:443... connected. +INFO: download model from http://madgraph.phys.ucl.ac.be/Downloads/models/heft.tgz to the following directory: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models  +--2026-04-14 11:59:48-- http://madgraph.phys.ucl.ac.be/Downloads/models/heft.tgz +Resolving madgraph.phys.ucl.ac.be (madgraph.phys.ucl.ac.be)... 130.104.2.143 +Connecting to madgraph.phys.ucl.ac.be (madgraph.phys.ucl.ac.be)|130.104.2.143|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 50876 (50K) [application/x-gzip] Saving to: ‘tmp.tgz’ - 0K .......... .......... .......... .......... ......... 100% 2.92M=0.02s + 0K .......... .......... .......... .......... ......... 100% 911K=0.05s -2026-03-10 10:38:22 (2.92 MB/s) - ‘tmp.tgz’ saved [50876/50876] +2026-04-14 11:59:49 (911 KB/s) - ‘tmp.tgz’ saved [50876/50876] heft/ heft/write_param_card.py @@ -102,7 +103,7 @@ INFO: load particles INFO: load vertices WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.007684946060180664  +DEBUG: model prefixing takes 0.011363983154296875  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -168,13 +169,13 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.003 s +1 processes with 4 diagrams generated in 0.019 s Total: 1 processes with 4 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 @@ -183,34 +184,34 @@ INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. -Generated helas calls for 1 subprocesses (4 diagrams) in 0.005 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. +Generated helas calls for 1 subprocesses (4 diagrams) in 0.027 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.159 s +ALOHA: aloha creates 4 routines in 0.482 s VVS3 VVV1 FFV1 FFV1 FFV1 FFS2 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m1.669s -user 0m0.522s -sys 0m0.180s +real 0m2.111s +user 0m1.591s +sys 0m0.266s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc index 7a1f85c7cc..b6fdebd1f6 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc @@ -949,38 +949,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1006,7 +1018,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1141,7 +1153,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1165,7 +1177,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1277,25 +1289,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1358,7 +1381,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.h index 543e74fad7..8e08d92d87 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/heft_gg_bb.sa/test/cudacpp_test.mk b/epochX/cudacpp/heft_gg_bb.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 5067c06ff1..56b44455c8 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -54,7 +55,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003014802932739258  +DEBUG: model prefixing takes 0.005021333694458008  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +179,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.056 s +4 processes with 8 diagrams generated in 0.179 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -220,7 +221,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.331 s +12 processes with 144 diagrams generated in 0.940 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -231,10 +232,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --v INFO: initialize a new directory: CODEGEN_mad_nobm_pp_ttW INFO: remove old information in CODEGEN_mad_nobm_pp_ttW DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ w+ d WEIGHTED<=5 @1 INFO: Processing color information for process: g u > t t~ w+ d @1 @@ -268,9 +269,9 @@ FileWriter t t~ w+ d WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxwpd -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gd_ttxwmu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -279,9 +280,9 @@ FileWriter t t~ w- u WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gd_ttxwmu -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gux_ttxwmdx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -290,9 +291,9 @@ FileWriter t t~ w- d~ WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxwmdx -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gdx_ttxwpux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -301,9 +302,9 @@ FileWriter t t~ w+ u~ WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gdx_ttxwpux -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P1_udx_ttxwpg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -312,9 +313,9 @@ FileWriter t t~ w+ g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group udx_ttxwpg -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P1_dux_ttxwmg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -323,9 +324,9 @@ FileWriter t t~ w- g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group dux_ttxwmg -DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  12 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [model_handling.py at line 1749]  INFO: Creating files in directory P0_udx_ttxwp DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -334,9 +335,9 @@ FileWriter t t~ w+ WEIGHTED<=4 INFO: Finding symmetric diagrams for subprocess group udx_ttxwp -DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  INFO: Creating files in directory P0_dux_ttxwm DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -345,21 +346,21 @@ FileWriter t t~ w- WEIGHTED<=4 INFO: Finding symmetric diagrams for subprocess group dux_ttxwm -DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1748]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.104 s -Wrote files for 212 helas calls in 2.138 s +DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  +Generated helas calls for 8 subprocesses (76 diagrams) in 0.289 s +Wrote files for 212 helas calls in 1.169 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.123 s +ALOHA: aloha creates 3 routines in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.122 s +ALOHA: aloha creates 6 routines in 0.215 s FFV1 FFV1 FFV1 @@ -367,32 +368,32 @@ ALOHA: aloha creates 6 routines in 0.122 s FFV2 FFV2 VVV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h -INFO: Created file HelAmps_sm_no_b_mass.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h +INFO: Created file HelAmps_sm_no_b_mass.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc INFO: Created files Parameters_sm_no_b_mass.h and Parameters_sm_no_b_mass.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README Run "open index.html" to see more information about this process. quit -real 0m8.122s -user 0m2.522s -sys 0m1.075s -Code generation completed in 8 seconds +real 0m8.199s +user 0m6.827s +sys 0m1.178s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * @@ -413,10 +414,10 @@ Code generation completed in 8 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -443,10 +444,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat index 3f652ded8d..f7d965154f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/Bridge.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc index 9d43997b76..90d559ebf6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc @@ -966,38 +966,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1023,7 +1035,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1158,7 +1170,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1182,7 +1194,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1294,25 +1306,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1375,7 +1398,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h index 53f417c646..56d598b7a9 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f index 16d9b1bce8..fc86391731 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f index 983025466d..4ea33017ad 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f @@ -344,6 +344,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -354,6 +357,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -460,7 +464,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -533,7 +537,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -546,6 +550,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -564,6 +570,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -615,7 +623,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -639,7 +647,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -647,7 +655,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f index 97ed635786..f474e88c0b 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f @@ -373,8 +373,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc index 83d25c8021..d0667f1e25 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc @@ -966,38 +966,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1023,7 +1035,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1158,7 +1170,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1182,7 +1194,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1294,25 +1306,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1375,7 +1398,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h index 3ac92dd2c9..913c0dfeed 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f index 37f83693d3..d017298ec3 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f index 2224f52ad1..fc948b0b81 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f @@ -344,6 +344,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -354,6 +357,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -460,7 +464,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -533,7 +537,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -546,6 +550,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -564,6 +570,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -615,7 +623,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -639,7 +647,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -647,7 +655,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f index 1496eebe35..3b69954d1e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f @@ -373,8 +373,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc index 152beb1322..64e5979e50 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h index 20f8a6d2b4..1335e38061 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f index af77031e76..e87d79430d 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f index a566870b6b..7ebd1a0f28 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f @@ -344,6 +344,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -354,6 +357,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -460,7 +464,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -533,7 +537,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -546,6 +550,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -564,6 +570,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -615,7 +623,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -639,7 +647,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -647,7 +655,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f index 0f5afbd521..59d1fada6b 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc index 8f0bfc615c..a9287ad910 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h index e94d034748..27f4d1c5c2 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f index 633c2bda2a..9953cf61f0 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f index 7fda166f5a..8bc3bb1631 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f @@ -343,6 +343,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -353,6 +356,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -457,7 +461,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -530,7 +534,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -543,6 +547,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -561,6 +567,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -612,7 +620,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -636,7 +644,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -644,7 +652,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f index 8d05da36d4..b4592273af 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc index 209e073d74..b4f8ffa4a6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h index a83896951d..b11b67d795 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f index df3b5e689b..e851c0f544 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f index 5a48f895c3..eaf408e47a 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f @@ -343,6 +343,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -353,6 +356,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -457,7 +461,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -530,7 +534,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -543,6 +547,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -561,6 +567,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -612,7 +620,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -636,7 +644,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -644,7 +652,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f index cb4090e743..a8e61a7f70 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc index f63f49b5fd..002d741dcd 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h index eadff47f18..96aee249a6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f index 8a448d0444..27340f01d3 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f index e2759d19f6..0c984e26c2 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f @@ -343,6 +343,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -353,6 +356,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -457,7 +461,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -530,7 +534,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -543,6 +547,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -561,6 +567,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -612,7 +620,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -636,7 +644,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -644,7 +652,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f index bf1d47c73c..2aeb7dfc92 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc index b97e46ece1..9e0ef62036 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h index 1642721bee..c3531e18ef 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f index a0091febb6..c74ff705d6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f index 92e84c1147..6ca85f5dcd 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f @@ -343,6 +343,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -353,6 +356,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -457,7 +461,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -530,7 +534,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -543,6 +547,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -561,6 +567,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -612,7 +620,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -636,7 +644,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -644,7 +652,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f index e194b5f639..927a46e574 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc index b6bdeb9a02..ed88a0e2d5 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc @@ -1162,38 +1162,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1219,7 +1231,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1354,7 +1366,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1378,7 +1390,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1490,25 +1502,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1571,7 +1594,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h index 3e7ccff73e..8cfc26cf49 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h @@ -164,6 +164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -188,6 +189,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f index 369bf6cdf6..3f9567b771 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f @@ -792,8 +792,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -894,9 +893,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f index 75c9ced543..53782bf723 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f @@ -344,6 +344,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -354,6 +357,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -460,7 +464,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -533,7 +537,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -546,6 +550,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -564,6 +570,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -615,7 +623,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -639,7 +647,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -647,7 +655,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f index 164ddfda7d..d4d5a71d55 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f @@ -421,8 +421,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/addmothers.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.inc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/myamp.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py +++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/test/cudacpp_test.mk b/epochX/cudacpp/nobm_pp_ttW.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index a8e3a6d67a..1fbe2eda67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.503042459487915) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -54,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004999399185180664  +DEBUG: model prefixing takes 0.013706207275390625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +167,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.015 s +5 processes with 7 diagrams generated in 0.092 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -205,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.070 s +13 processes with 76 diagrams generated in 0.361 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -371,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 0.941 s +65 processes with 1119 diagrams generated in 4.672 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -382,10 +384,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vec INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -496,9 +498,9 @@ FileWriter t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -507,9 +509,9 @@ FileWriter t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux -DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1749]  INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -518,9 +520,9 @@ FileWriter t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu -DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1749]  INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -529,9 +531,9 @@ FileWriter t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux -DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -540,9 +542,9 @@ FileWriter t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg -DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  35 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -551,9 +553,9 @@ FileWriter t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -562,9 +564,9 @@ FileWriter t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu -DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -573,9 +575,9 @@ FileWriter t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux -DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -584,9 +586,9 @@ FileWriter t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux -DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  14 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -595,9 +597,9 @@ FileWriter t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc -DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -606,9 +608,9 @@ FileWriter t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx -DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1749]  INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -617,9 +619,9 @@ FileWriter t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx -DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1749]  INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -628,9 +630,9 @@ FileWriter t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx -DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  7 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -639,9 +641,9 @@ FileWriter t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -650,9 +652,9 @@ FileWriter t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -661,9 +663,9 @@ FileWriter t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -672,9 +674,9 @@ FileWriter t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx -DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  +DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . @@ -683,25 +685,25 @@ FileWriter t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1748]  -Generated helas calls for 18 subprocesses (372 diagrams) in 0.671 s -Wrote files for 810 helas calls in 5.590 s +DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1749]  +Generated helas calls for 18 subprocesses (372 diagrams) in 3.092 s +Wrote files for 810 helas calls in 6.720 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.216 s +ALOHA: aloha creates 5 routines in 0.562 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.194 s +ALOHA: aloha creates 10 routines in 0.514 s VVV1 VVV1 FFV1 @@ -714,32 +716,32 @@ ALOHA: aloha creates 10 routines in 0.194 s VVVV3 VVVV4 VVVV4 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m15.089s -user 0m5.988s -sys 0m1.827s -Code generation completed in 16 seconds +real 0m28.347s +user 0m24.340s +sys 0m3.113s +Code generation completed in 29 seconds ************************************************************ * * * W E L C O M E to * @@ -760,10 +762,10 @@ Code generation completed in 16 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -790,10 +792,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index fa1bcf88f4..20e300a012 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 8b330d85d5..1e995ee72f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -939,38 +939,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -996,7 +1008,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1131,7 +1143,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1155,7 +1167,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1267,25 +1279,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1348,7 +1371,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h index 1aaf72997b..f67a329ee7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f index e5f47166fb..b6f323bb6a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index 0d129ab296..8e939d3b72 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index 2d0cc3a394..c7dd26745a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -324,8 +324,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index bd9ec082ce..9ac78276aa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -916,38 +916,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -973,7 +985,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1108,7 +1120,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1132,7 +1144,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1244,25 +1256,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1325,7 +1348,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h index a96df4e864..e822c4f778 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f index ae9439cf9e..b23b0a3173 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index c155307e43..21e12cb805 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index ccb869545a..d1f6d2f5c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -330,8 +330,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 0726e0a6ea..2bcaa70441 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1156,38 +1156,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1213,7 +1225,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1348,7 +1360,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1372,7 +1384,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1484,25 +1496,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1565,7 +1588,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h index 5c057176f6..6ad3c7dd1e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index ebf5273614..5b885a4dac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c32cb4d43c..1621d47cbc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 6724cffa4b..186fa86cc3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -340,8 +340,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 92c74d5c62..023fd2fa7c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -994,38 +994,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1051,7 +1063,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1186,7 +1198,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1210,7 +1222,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1322,25 +1334,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1403,7 +1426,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h index ebc491b00d..ab9d7dde82 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 4595d5a38e..abfeda5bd0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 0f523f574b..13398bd74e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index a06e72a3c3..e21125402e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -346,8 +346,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 77d9edb7b2..bfec4fe0e9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -994,38 +994,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1051,7 +1063,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1186,7 +1198,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1210,7 +1222,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1322,25 +1334,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1403,7 +1426,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h index 2c3a739550..55c42cb947 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index e239a05794..b3ffe0f7cf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 7240e416ab..9dfed40308 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index a162af362e..330e1e524e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -346,8 +346,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 888768ef3b..9394f70f59 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -994,38 +994,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1051,7 +1063,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1186,7 +1198,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1210,7 +1222,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1322,25 +1334,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1403,7 +1426,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h index 01180e3e92..f776ee3de7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f index b15c35131c..f6e883a40f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index 95e3e81bc6..5e95a2472f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index 16e908ba11..f23d0437bb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -346,8 +346,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index f1617232e3..e91e521f1d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -3084,38 +3084,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3141,7 +3153,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -3276,7 +3288,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -3300,7 +3312,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -3412,25 +3424,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -3493,7 +3516,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h index 363ab0b79d..f51b7656c4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f index 1108637c49..540fb9c82e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 02c9412706..f5b3c543ca 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 51476eb7fa..c4a8cef829 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -372,8 +372,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 7e011c2c62..9d223187ec 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -1491,38 +1491,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1548,7 +1560,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1683,7 +1695,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1707,7 +1719,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1819,25 +1831,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1900,7 +1923,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h index eb46a03db6..1f9c0ec433 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f index 0f260565e3..07052b5092 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index acc21004ae..6244b1d099 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -352,6 +352,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -362,6 +365,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -476,7 +480,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -549,7 +553,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -562,6 +566,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -580,6 +586,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -631,7 +639,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -655,7 +663,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -663,7 +671,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index d46d392b1f..191b5fc3a0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 20e3623198..3fd3c8eab6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1491,38 +1491,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1548,7 +1560,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1683,7 +1695,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1707,7 +1719,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1819,25 +1831,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1900,7 +1923,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h index 516900ab3b..916fafcf3e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f index 0ae010df69..efc726a583 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 2ed82fafaa..80fec08b91 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index ea575a9bc3..7c40058cc9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 1ba94ad37f..cf638e9285 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1491,38 +1491,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1548,7 +1560,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1683,7 +1695,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1707,7 +1719,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1819,25 +1831,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1900,7 +1923,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h index bcc9e9d736..067e81bad8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f index 236f6d16a9..efc7fe7670 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index dcf20fe396..fe3465e94c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -356,6 +356,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -367,6 +370,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -487,7 +491,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -560,7 +564,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -573,6 +577,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -591,6 +597,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -642,7 +650,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -666,7 +674,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -674,7 +682,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index a780b1f4fa..7d934f4152 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 7665fa9af8..56579cfd80 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -1072,38 +1072,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1129,7 +1141,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1264,7 +1276,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1288,7 +1300,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1400,25 +1412,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1481,7 +1504,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h index 553048dc11..650bd18517 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h @@ -168,6 +168,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -192,6 +193,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f index 956dc07485..a05f0af626 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f @@ -796,8 +796,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -898,9 +897,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 9bc73e492f..8248186d91 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -368,6 +368,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -380,6 +383,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -514,7 +518,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -587,7 +591,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -600,6 +604,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -618,6 +624,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -669,7 +677,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -693,7 +701,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -701,7 +709,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index 559059580c..d1f5e36812 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -382,8 +382,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index a7fde33970..18bed4d243 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -1078,38 +1078,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1135,7 +1147,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1270,7 +1282,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1294,7 +1306,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1406,25 +1418,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1487,7 +1510,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h index b187f2ebf3..0d2dd21169 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h @@ -174,6 +174,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -198,6 +199,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f index 9c2c20435d..dfaf7c9ba2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f @@ -802,8 +802,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -904,9 +903,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index bef5d7dd9f..5d30ea45fa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -400,6 +400,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -412,6 +415,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -586,7 +590,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -659,7 +663,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -672,6 +676,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -690,6 +696,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -741,7 +749,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -765,7 +773,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -773,7 +781,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 56a2755163..40511822f7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -394,8 +394,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index a299144ca6..2b1c5591fb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -1182,38 +1182,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1239,7 +1251,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1374,7 +1386,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1398,7 +1410,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1510,25 +1522,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1591,7 +1614,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h index 98e755a489..70826b49e7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f index bed31f9d2f..cfd22899ba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 9c2eb40089..6ce968148f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 8d7c00bfcd..0243f024be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index edaf7372cc..4db27a524d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -1078,38 +1078,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1135,7 +1147,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1270,7 +1282,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1294,7 +1306,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1406,25 +1418,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1487,7 +1510,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h index 0c551f2f4d..3c73ffcdae 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h @@ -174,6 +174,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -198,6 +199,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f index 48de6ee6aa..93cfcec297 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f @@ -802,8 +802,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -904,9 +903,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 018c1a985b..fef6aeaba9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -400,6 +400,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -412,6 +415,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -586,7 +590,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -659,7 +663,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -672,6 +676,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -690,6 +696,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -741,7 +749,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -765,7 +773,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -773,7 +781,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index 440f838b87..55564f8914 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -394,8 +394,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 57a20afa9c..84d0fbbe9d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1491,38 +1491,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1548,7 +1560,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1683,7 +1695,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1707,7 +1719,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1819,25 +1831,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1900,7 +1923,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h index 3290858ea0..977c1f0143 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f index 1b37ae6930..c22306418b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index e72dc0ca8c..e1a4e480cf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index bc51e47c27..c8073e0c09 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 4a0583759f..db73eca9ba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1182,38 +1182,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1239,7 +1251,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1374,7 +1386,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1398,7 +1410,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1510,25 +1522,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1591,7 +1614,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h index 880e2dace8..49758d2918 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f index d51e86247a..789b82915c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index c8106d783a..893e840a60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index ae0a828447..29aaaf5cb8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 8e34c58b00..f8f19a9615 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -1072,38 +1072,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1129,7 +1141,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1264,7 +1276,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1288,7 +1300,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1400,25 +1412,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1481,7 +1504,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h index 314d5b2955..6e7d0b1d10 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h @@ -168,6 +168,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -192,6 +193,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f index 8991a26bd9..2dd6aab676 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f @@ -796,8 +796,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -898,9 +897,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index 5e6645a738..4b5fa5bd47 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -368,6 +368,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -380,6 +383,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -514,7 +518,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -587,7 +591,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -600,6 +604,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -618,6 +624,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -669,7 +677,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -693,7 +701,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -701,7 +709,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index ef2d0fcb85..01163cdb48 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -382,8 +382,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index b6b3dab286..20403f0c60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -1182,38 +1182,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1239,7 +1251,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1374,7 +1386,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1398,7 +1410,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1510,25 +1522,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1591,7 +1614,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h index 89c57825a9..6e4939c539 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h @@ -166,6 +166,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -190,6 +191,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f index 8d5a646679..fd29cb1b99 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f @@ -794,8 +794,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -896,9 +895,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 7d08f78919..6016b9280f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -360,6 +360,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -372,6 +375,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -498,7 +502,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -571,7 +575,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -584,6 +588,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -602,6 +608,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -653,7 +661,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -677,7 +685,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -685,7 +693,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 85463860ad..e7134f996e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -378,8 +378,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/addmothers.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/myamp.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_FDG.pkl b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_FDG.pkl deleted file mode 100644 index bf5a732979d683e3642a1177b58851862f165d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49027 zcmb__2b>he_BAZ1hyf#DLX7N!gpGg+5fNl1I7>3%I_wVgjvMH^vm|Ydvw%70tWTdg z=bWB7>oaFD>oe!@opZZ7%uMZq|KEr2-L0xyea^k7s=KRudU_5os%uKt&z0mnXGmR( zq=w8+wx()ZW~Wninbw{YTk2DdJ>4!PN4Q;!XLj$>rAu?NDdiT;EJCrh$?Z0CO%&!O zCDTyXm}+&qcLp}ablbb3HPh2w4VbGJuS)-(?i%RZqu42$*%fR_cg>kS__sOJpwaX+ z&1-3Zt@9e1-LRK9G#9gPl>&(?zbCNPQ<*qvpW&NicG$mVG-CkPUZ){?+ zyPj^jprJnFxa&LXDc8LExwUoB!rh>{xAtsoXim8sYH>kw{vvmynLU9xPg>?B=O#1Y z+_<{S%r(%|&{|7*+}_J(u7hf8s&P&&BiF`0%M>BAXkN*(q^12R_x<*WIGJOPfQ5`UMItcDHnVQ5L)X9cbY| z0Vhh^>7{Zs)i>5o+|S+0SwF1K*w5YC*`T_Qv!Sz*v$3;@(|eiIXPMI%C4{k!16C*G z+m?Ks2nzav28(8P0VFnGEN; zI~cwmITaw`L-rX_Aw=S=HDDhv4VT<{Sgc*Y$W zfM=GtJ9ZAfo*jIrT=1QPacFaQ0X7G|ZHc?9f$oNZ7`0-W{&RYT3KiL*N9D@DTL4|> zjyBMBOWfV9(BZV5IK~}ghP%6+Wsh8zu>niQ-P5o@n{g{)nd**@&)li*UYK92+X*M+ z5>^L<_3qw=uzra*=d_Y+LWG}8hEx!)@2%6np@qL&Q9ukMLqp>z&xC}+)md4 zu}ylqg6^%wI%W;^ZU(zwcZW-fwqj?d|6j^kXO=rZF9ux*)FvC}re;gB=NK*vYBJK$ zJh!L209!Bg+(pG^#gLja$6Z`ECrGE~o~G2(Jw)ebse5SF^%Cw9Kl9ewhNk3PtVCpy zd)RVSa8h0Eyp{zxNgs}rG|&2tEiLnE!`35k!k(Sf)d9jtHD?yPN6vDOQu-SFD|L@f z>beT6fO%MG%x!hEp4(pCrMgSIyHvZ48&g|e?k;Q9+1?`cDQR_=d+sqhvGy6ehkI;w zQR~cg1L|mjcHAVVudX?`%yW;Qxjwqg!$PA?W+yp%wYnBLVN#nzp-(J!GSzDug-;%G`mUT~0xMw(rEq9Q*<0tAMxighhb2}^PAe*zDFp;W}?m4Xv zqCeL;wz_Mp!w{Y0ogjwu7{l5ohVzS^f&5QnxIkmLu+x#A)6>03N3FrVc$RxfF+}WI zU6g8fFHN|YdG5bvCq4J_cnVi!-G3+CD+^P&N>i9VeR`>TwWe@QltSq+_gYQiI)4Dh z?BQN-N#zEnvW`jRMw5z3=B8rJ=H_DOA_tM)Qas^r{A0KX_SS@Zo9Es>+xOj3TwPS% zP5toQnQ-s&+`DI|Jolb>BKKz9`x5T`If*F61BT+kkb-(VlyD#R+(!(>qs5RZr1($5 zeav$oC&d$SiYK$~QwjI!LW*Y$#j_zrH&8s6aG&?w7YxOVq$moqcq!q&?76Ry;=gf< zSF`SG3HS9JiWFzm8~*N!@xXw*sXGPBq0car%AWT#ub5fBJpMG276L*+<1r4_#`nz;V{Wn%A)}b{(v6oxEl3a;HzY z5d1hg3vH`Y`4b(PPd)duc=DfT-7gaEm$Bsau+Y%d)RsxkZcMpf>ET9?W|MI}@^wkO z`@gLFjeqb+Hn%i4)Fm6;Zyh?m%evot?hozmk2v^H!JmqqtIeFoA?IhmSTnuM{lyf~ z`l|}~TQR0OFaB`Ysc?Viq-=Np2-F;2;r@vxDEU{|ROxc+c59a|IGYEXmP=REsI?TK zVB6cJ8yr-8GhcD(uJskpwmh~|Dp`%2;@~5zqu|LJ+95myNe|SsvLGxyblUUykMS5UpScf<|gq?w62M3+M9Airo9;oo@}n}DD5p!&q_boglq|0 zn09~du@!xO+FNVGHn5&-3)?Q+!J4$UN3q!X+4)rmuVy+jAAlz3EX}-Fz2Omjc$t)- z#m{^oN@3>1WsvVEYL^{QN}DQX#^6}Lsu3~-RaIyx3f8GH3=S%^g;A(f>pieXLM7Fx zjGI!UauhtN&<>GCm8fT>3N|6bVGA`Hp*=>@=WDd1HtYoJ$u?st?b~vAw6VT=qqaFVKG6g!gZ7z^NK8`}+>ep#x%$ z@{uyjcVIRh7;{urNv-eb-Yy5l9OWY<={r_ym)S8#RjJgWs-sYkf_46-;NU21X+~j= z)>m-$nfW)Dn{pf+6g+9r4$(NIQP0Z3unB2|EgXj??a@r1KMpP0Fb~!f7q(pl){H|d zip6QpI_GFrYr5{I88nz`&Gb@f3#v8Kxfjoms`M^ERfl6C3bElRuare-@rPq^unm`; zWpaq`z;GOjQraB!FbH0(3h9@`su29JpsF$-j+%9{9{~qtE-^A6sde4S&2&GCn^NYZ zQScbje! zsdSq9s)8r8UvxUC94@Dz#aHlDl!6IeSt+Oa4k&m!N@-Jtf@j3~Rh7$`sH$3LpE1A!Z$v#SH^C<4X4t|MZ_ysN(&wjmn>O4I>&YFk?Q$oqN%1Ze zd2T$*`87QNWgY#ywJ|&;jwqFT(Bh|gFG}eleidVWA4REXlHX%>I77FsD_IQduUy!G@ z;Tc#@o`r3f=U|N>&!gxci2VJ6yM*o>nk5cOI48V-HdCFBlfsK(6^ErVc?nfBUtUHr zHebrCUpP76Ma?=n--CmrG1QF4`&!5Sc=d*Vd$4QjkBmT%cPyj&=i?@;Hpv3yU5hGtyBpGg`jIN2!p3tOq+uPCGgiGCxSk?41n6Y>YFCx60HqQB^m zN%S}BS?Pk?hY9HlTPRTxTGOF2-PmVo+6aMNhIBDb_+cA#1~WvJNbzSeO2o6unT-%6hN~ zSs%7giVe^jNwFdObS}k4=t3#Vj1(KQp5{K~RQvZ#q&HC*dmj|S*!!|0F75rfz6ojx z*%a23&0rb*=IjxRehbub@dS&DC)mR1`=d3Aek=9~5A~&`vNalXj&DPc<<^gE3kS0) zH`#2*dYbdwnQXGzp4&2;0Vsso6tiQPO$llV83^miAXsLz1AD}>8H{>XhQKCdC~RRi z!_XRKQ_4O|(B?@8mRqPl>KOD7$jDYoI zBrK!fkv(G3?}U0*c7{#JF0h5s?~2wa`cdrDdGx!X3!|?x(T`?5h~B@cuscy0{TLL& z==WepGmplioRB?XJsAhf*vHc!i+wNDvoZlTA=R*jvG0vmbIh!%kcp`2F>?|-hZAZ< zxlBfbPN*sL*bbWez`>-3o1_w~2fK%VSy;nunbcGi!lb6Lqe*Hy$_beP>j`hEGpT*) zk0rGq>RH(zHX#SV7A7@|TLfo4(q}0Rx-mhlS`YF(wb4;-qotTGu0zaG8xu``ROOqh9*sB zKEZ+$)&h1k$t*-UA&X!=Sq#f$4xvAm%%P}f#e+@A64+QW&Q~*WEb1~<4nsR3+vRYS z+T{q?tQ^VS-=>WdORuBUd9*sSu%5KTG8k-IUDVbtOHnr~g=KU_5(PUznJkAxC&w|` zBUH*rN2GQ+7QSF|j3|@iP}RwCJPOvyaRR|GIYt{jPt-b3q9V}9Y2Yl zOw#ZqT5T@;Phl(P+Nmgn!*d$h%EP+=(XtXm%G$!GtZXlDkpW340F;)REXLHX%>K zb~GqY!PhQNYwu^+FFq*Gs`EK@J`d~33$XD)c@cFpC@<0FA4$Toc$sbSvA`80n*6c& zFG}H943}3?)vJ~Tu%70D0}D0(HuIaTP`tNL2*rDw9gTSJpq!9* zVLf>dmg2one@wg&P|wPTunGAHHYT1)b_H6az5QeMiS6yB@(EgWZ~v4o+ur^e986}c zN#=9b)7;a;WWHd9$$W`In9Nt~Xp;FF<%IkX){}2wnasEJ$CCLD^{jjkn~)!1W69*~ zo2_)|201<1^t8)QvcQ(v>$8{$g8PqYC*OP5w;if@dzm znJ_}SqN+2Y2nDNn-Qb{jdm8b&v!3Q28H%?Wx21TiqY#R>20I$@dZ3(;HDNvJ2}|+T zqCY0y+Nft`9oU4d3)_)+z2L)VQ`&od_KS*C|JeY0uG8d&WP8Kb$>UiF}u;0+?L|?MYbRIn6pJNjedZI60Z2EZny7`7wvO5kgkf!ccz`^ClEL7juuIRw^|p|ElB zhM{i6E2T@t3r-BBQie7?F_hC8SFubg(B!LFiBhOyxm2O5Dh@}%s^SPZsN#5|;z-v0 z{Wnl?M{Y|McS0dlac6ckD(-@ELUx7qWE3n_+>QR2ilb4_%I>fU83WsqihID|RjfwcsJJ&>{uHEm>F{tqk&d{6l`;uUzJilc3Kgu7DZT@X z#C>9pvSE@yRi&*#!7A-kI4JF2M%rnt`+IjF?R0KSX=k7iO3Noa0%`X}IU)POda^$( zr9FWDn6$G{&&q+Y38{tcNZNzoYnP<*C9qR+5I#*43JuSQ)l*q}ovMWWLfw_q!LMo|T2LxI%)B4S|ttF3JkDZ*!3WGWe1y9aqkJw&*4(eGs7d9d1!4?K}K3dZg zv3k&Jw+q-Oy5loaE<}S)wu|VoO}2~SV37NmATMD(&57pJn;ZdR7?Z z4JdeWBYVVxyb1NJ+zgwLTVM-=ycMlckhif<6y!*`9Ss`f9rW0Oyb}%vS!05{i}hf* z{TnWKb6W;^4+@^#t6M~K@IKVDazAWB9)OJnX`V8AP1X^t znl$4NQ1IkK-6G2PBh<690yZHZ!xm=ziT3!EK0o8nwBd7De8vXbE?>f$jK4zBn(^0k zbdd4?;0rSzA>R4p}6k<%TeK)VscWi?cFEwefb)Tt;(S4%hA|gt?4iN0ZAQC?{kr ztS5WIGM91m$8s5udRF#=O~?e;!d$A+8s)M#`$V~v$wV}0E|ci7E(e7y0{e39VF3WAHT{{Y)c1zjOXtxaIge-^k zLb~#CVpUi%7?M_kWsp>op)|1m=?aqgT+BrtM3t0E}*+9Dsxh=K32!&9) zi`mg=cL~Y~xfIru%V4S9zvz!?cRA{~mV(8#6l_P@T?t>iT&2CQX1}<0*QoPabzTSS z$@Q>t?QTHbXm=xBk#^A?vzzFM30NgJqs15S7L-B(E96$+fh&dE;tpIip{mN>fr3@p zJK>tEmK1hE|*@sZi%EPb; zc?7m2WgmqP7g5^#G4_iq`?xxvQ0J4do;(E`SN3Vtjk3?s6)CHG^s{V>i8@@KLyIr! z^C*R)R>}*gs&+4;VAbv=IH+CPX!kPf{vI7@_X@YAcK<~o)b3SwG}^s}azb8*_2dm$ zYWF7nG40+$Ju7d+CgdI1jjeN80@gU%ULKy?%ph>n;|Z3WkcA6 zYy=xq)+n(tTBAF5z1b&xgtSchpsq``zI51@YMa2pRGLgGo3ieIP_mmzWixKeR5nK; zOl1ppG^zAM84m!$deR@3scc1mES0TM$HfFJE+$|LQ`ru!Q7YTB&(gHXAgu}HT`&zm z7slRfVlQUh|0H6piM@m&XyU#y*t(SnR`4&q^t5LdswZ zV=qT*6nh2xbRK&px-j+@6MGfw!4%SO>B?}TF!m8Bgt3og$MC_x9Z^fjPO$iz6)dCQ zg*{@??}~a>M!_azH`v1HN24{0es}ii{K3F6=)&mdndtXmJ(x9q^ka#_==Ve+jD8$D zhS85lEg^fs;u9}eMqkYyvFP_kJu4Go6EX?5F#5@8jiR5zKAlIu54tdV*F>LSJ(xFs z^fg3b^ixp?qo2l(Vf52c!!v-ecm@!b(eKM1vFP_h9nS#5;u%2L!sutAHH!W~_USzO zT6AIb858|MtOwfr4`n8a!sutC5Jq3ej%L=^qnwZwEItr}W$bh5kHzkwo|OjJgrs2$ zV?P+J(f#B`_6aW(M^s1?8uaMXOpop8(*g%`X*0RZV?7uNKNpwVG8aK1%%znbO)eRf z6Ve8Y53pdF%L4jixhzCID~n(gvKY27mqXAR<#H(dgt?5Y5DyKS%MyBQxf})ubD3{) zIh^%ikM?hp9>Hyy%aJIAxg5ogCYPg8PDmCO&jrFVm!byvu7sGmT2`s}19=5y`b#p4cj4ppcMzNw#5dK96_R3tHu&BV=;d1!6^YaUr zBP!(z6tQ+ltA2T>tA8s@rh1`~?n>5!$?i*c6`EAKt5FE0yM`T&bl0Mskn3P^$pcI2 zZlFIV-HoW@Yg@4R+7@g_(%k}IyWFb1Z)3l>bhoSX4t3rM>&acPap~?x-AH#2T{h|N zrK1Ds?t?FoZe*q0Pq+@Gdq80_)rT7C9%SAB+-az}gn9@~D&4~H_ZV{#UD(YEz4Hg$Wu!SkU zp*`ND&rk6!ZFn2jlXqa-vn?mZAJ7t}SS3Gl`wmk4NjJ_^A7)bgne{Y3GjfKSuNeMG& zw}?{w9rdjI0h^FNVPh$pp#MUv?rVCh`ET|K&s0@q(gp8v(lb?86s*VaA~<-aI@|== zjrH`Jn6*xY39>u4-_#K{?pGF`VxdOZGwU)n`(zBrp-{#%I2^M*#b5elli#2pZ3_2K7YdY z*M_ZNJ=q$zUABQWW4|qm#m>=r-=a>>#{)O%1u!~IYw*p`QrQk&^@P8@ZnM-`<{y&= zpqe({B*qD>I9A0sKucm(d;@e~tcq`d4vJOr4bUBeDs~6wn7kLj$Y6LWUN0lw5Uu;) zNB7ruL(!t*4MV|`Qtc3lSB834%3%{y0b3|urS_}y2*s3@?vn|?xDl23cwD?Nxic+xOS60a=RFNp20Ym{iR^h8iM#I63w>25>uJsj| zEO-lxPM0y6t*E_t_-ng87cdq!B04WQqVqJYEaXBr=pObZ_0mdXHxK1Q@p{fRi>d6 zKB$2$E}11W_&4txwuSFUot~5izK*A-@VgjLWnTr^j~nAPLF;KOsfF`I_D9o997Vef z8!}7|K*hX_OH#>3nMEi5-=#h^FV$Qx2cle}12;d}*p`x7)F-vgP8*VgP{n6Fuzcqm z-{Zv>w>2i4X4KTyf~~f;OlHHIR#Lt{H1rP~SSEFFIepV4O)FD655FEBPg1bN=i5!W z{g~>Br3*`C4mVTazHpTGAKX6#56Fj>4UxI%h7|dhxw@97d5x)s;&4kVVFNs#q+tsP z%j95gri6UqY)B@JXh#|yqc$mOvq24HFwrzfONdLCX)YeNLurZ3TN$*g&|;u%BTz1_ zIrw4R0|%B%#>ek?1lkmTzQNBKfpS?80*^L=jGj?5FeEUeP(HADa4{QGnWS+Csh9L0 zq=P0-%Z!>5S?EX8v5<>2n#G0Dlwm#`5=O%?2Zv$Ep(}&+6n2RXwoDESVLP2>hp&v} z2qihvMp7&Y}v}fS+3z6 zV+*HDjt#@voCBtF#5RG-O60he5go5YC)kL}<;0K(NA51Aa?;AsPFA#23>sfy$k{Y; zzu>qcr-q2ZpEJffZEBX_(%ovz4dbc9?XXIhafj1L95FvJf98BakjS{d@iihPMd4h~>- zEzRkR5TrLg4`)Tn0VVr&1@S^LKP8>Zclp~kRe8!FsBL7mx%R6K&TUahvtQsS_ zg7M!gW4uxsuj-Jo9E?}58Y8-b@tTz}UaO4Pb;!u$<@Kw^h^}C~VP%XrD&tKZGV=Iy z^QtkTD;RHC8RMs`vm@Y@+1MUWW*JHm*m(P6HDbOwCM73+-?V*G;&D4 zpLbUJMhC0rmb!!GY529JX5wCxrc9e!BhRpf%SQC!d-vs8G|ZCcV5_?(6}lC#woJ+M zXyKyRMDPNtp1cUl2=GS_#N+13ODOvx?E2~aqRMk?&rOpxO`^09{xXWyMags?t}Cn z{rrl9cK2hAVZKMq!qnea%nuA^aB|W0hkO{JKCt4T(aVSLU4Z%#Q41AVp{O6{q3Uu+ zK8a9YdGXsTfB&SZ0QFO%b}z{PGe!N}pa%IbYO0ejBFrE5`6Aozj)bo*q6PkIG<)(t*toolnr6#4+*ak?ZM~=Y?-@Vj80NRcT&*DF?-cX<0?ds3 z5MkDzgfPLNvo(hKBQaMm!2C%ue=fjG$uA-1sNL>6ZQB*6oKb-JD>2t9!2C@ye=opn zl0PEMyVqWJ*^=izFTnhhnClc^{-v0I8_Zy8Eo#an+oTId&rH}qn$J9b^1C|~pms%< z>Rtt?MQHY<8!Sf2*~$#d^rw52ZvWXL1<`BqiKA<3q&r#sF<*^neG1T4SF|+@+87g3 zd=8~!AUz0DI4{>!ke&u)ya_3Fcx+T-AZrn%a8%b;kaY~mbOW+Me7eOz)+I>cbnB%c z>lu*!4M^>7_DI$z$R-7vY@i?;8j#sWlbi_^i)15$6i%p(6{NQTnQK6DW>5^I4?zlN zP+tYv#DFv!kemq<1KE@yg%f5o1=-wyY;82@w)C%)cHL)7f5PxIw#V2pqxY39(9O)4 zeuOKWF?|2fxZoz%5LED>3Y%hQEvy=37~1?BJ}7i49qO+nN>Pd5wMr9HoWU#FF=)4N?7{AN9AnXjZx6u6XIATiG8xC-3ceL` z3vRbe4B3a~&J7ul{ua``2$M%T0d08p2R5!}-2&O0ot3i>IAaqnphsXOG7-HjOp~~A z9@AvBc`^l-O#F?BXreI5eYlMh+9nFw`bz?REOa&8G>>j7+B}&C8&9ltflOy-@}$XV zL7Tx2d1zdo;vpm0INID&*^iwy>L8<3N{0gy?hl^@{{U{1hd&E#o*W39gTFv(+1ZAV zOb&w2f}i9jdHA!@=1Cna@$pB+pP!m9_3W(#ykS}F?>LydIyehTG#Bh%DRi@N&EeL0 zTyxRpi33Y6-asN2xWb*KfmFS0@YEIiIi2&bJ7CEqsd!k;iuk+B`WFHis`I9y{mrrR;o5 z(AUCu7$Nfb4o90ON5JOrHOZ0eoX^)}=Q|30Eqq54B9AYNHay`4o5PoprRPkMl%KuJLmD}ZQ(qDAbFf8qRo?&V9AL; zx&t<~jhB60?QFDpat>@h8*X2-f9GtxZF?TNS=i3! z)_H6fpv{vDVe{F_D+a=sOkL^;l;d?x=Ic&JMS|0BOm0Ot3)^knI*;vkwBhaR zuz76w34%M>KbNgAw{p1)-7IW(bL%{|d(ejWJiz9&mCJqXpUYO5Te;kiZWgu&xOE=e zgJ|>QA=n%?+?g(whuL4*f=^`d`3h#C9!?h3$s_PvXddNec{Kk)8(!N7n@>|FkF$Rv zjlX08%@gojXrAO|c{ER<4G$N>=F^nRGwfeTQyygVEc_Ol=eSuO>r(TPR@jXmCh; zk^S>&urv8I8F>kQ3(d>iERW_DwBd#?Y(7nyyvqKCG)AiI2Xg-18Lh~s%%cJ=WZFpxrY#vQ{xqQL?`7}6J1TukK=o^;mc^S zagNpnA;*v0D2JnU0Xcp`yM^OtcF*JZ1#O=E3LEFBi#UGcMmZdH%JDneEgXNayWvO$ zpN0I1#;p7WJ4^nC4R7vr!2->buCQbe9=-8zjNmhrA~<~R^{|SG-^)nW;Oev++&LkT zAs`R#A9YuX)eHrnFc>GRhqzpp<=nfdG52orZe6UAPvxi7gFtzTu8B6h$q|-01y`x# z!a-Y$+o+)WU>e73Z8YU1;-jy_o`IHay26{&*6ja;rmPF+Ea?SXT<8B5cWq+>ez{T> z_1x2R>M85NjZfXy#~)8NU|)npEzKi7;fM6HA)Ly9-@k5a!Y@v?%0_IWV2fpA6tmJB z){mT7Eyj<52Osg5NFVs{A)6uWi<&2!z><)kh)GH|<>mxUM?lyM0HhDMZ3a(-usLd; zYyle}G&D&+Zfr&14{XYoISBm~VJm~cN9tNrb+WYuf;(vGkfC)gt;LH=WSbn6Z53rZ zgTfKRuVU88_7)gkTjQh1)?#1*XFv{4vEq~%97a>uS|@o;Vc_fee1_f~1}{6?(RZ<+@2cpde01k`A9_PTGvvI;Wj8zSXbbM{iaW-~ zO^*(v-YkR;jvhJU$R2izu@;Iwm13Ns7!y+P=81m*jLu!TB;)NQds#>(C`q*;so^P3 z$Cr8N6Mqp$cw?w;L5b{brd*$}qz)RN>g5DW!Q0pR-H()1Egq@^`F#6=FXF zF(riHg}DV{e+$F`3Ngz-G=>no+O|L(Xo09zh=U9SUuy1;P58j)UV25HLKWTYsY8l)`7Kp_P zafpH7i(7mMUUpg_4z)md3bDjM@YN+g1TQHq5QkYH4p)dH3z`$ zINCt)eHA_gF9j_SSqntFLM$~9s|9DqG%xck5X&qO%N61n12H#7H8Vw&$7Uut?=g<_(O8xD{{^-A^f>^`136A=PUdLKD@Kcmsdlt zvSVLp$G*sdeX(L+;$!Pi;`#8Ut9+)6t~5*JQakZw7UF*?@#ThiQK;Ea4oPw6CuyEh zE}c1+|CjSgS%^n~1PXS~P4c&{?vXBc-087l)u|GP_O zdgSbZ=Ftt@U(p!+=k`nFemnUC7V-y`{2@a=u>t>q`EH zA?M4zY~=q$_BZY1Z&}FSR`PcYdAD$x5g+s5w(qKC{;r+;Jq!8!O8$W%Plnelyh8Qw zpWp`SN)Ol{+6h0h5UxY2VHZvT){B41)73-4FN%Db`pHQI2?9hNty_6<)=+pH{4pG&^6 zQ+;cp`cA37H&nZX7yX^x$lz)A2Rp@&7K)#g;%7s_-}m;lYZm`GeWKBb#kQV3w)HKsZJ@DjXkzOUPV1UrG(vH5XmgeN2cd0b z4{c*hXuUPGJ|?s?L)rE%2(8ndAsR<(w@^UtI(Q7SuRXv`ECFt+0d8gjNpx#XbQ_asZJ6kG9StfTK#St$v2a^E*>)DP?UihR zA=^JB+cucuc|sL%72CN=EL;PXYmng@5_0izzToWQfBD#d@ILl{o$O$z8El~$qBKK& z8s{8;mMDk)ri{)9K4v=1&QWUNC{vDdpCed94Id^I?5k_2!JUs*g@5LX%`&OPww6<7 z6(RGMQp3^a$q3lk5^i{@jAUP>kIpT51DJC+%^yumEIR_o63b4+&x>VewBcQzusnU2 zgrhpLgBfHAAY2cH2MgKN9>6F|0J~`bqx}HFu_%|_*;mEbp@UF5On(eEwWPlX5%ba? zi#B`<0T$^yfBJ;0wySb=HqOp8-omt(GEFc{fBQ_Uw4aiw+D^2$g=nG@P4bCCL91jk z`>LS(uhQhkp(r@@h3D{KgoB+92QJI>n*t__{`)Y1JpB`B^P~ngzAFrusqCx4?zbwV z|9_8l8i*{hPA7j}tTWJtSFpmyV;v#;vhP0>)~tB`p|Jab$r9}T3?MJq1JLHlEZBIk zBjrH${b$1d-(sx=ktNoH$e$N$5^bK$hKDqYaD;D)nXiJa+M1!$ zsT@Fcb`*|I0)ahFNBH<;MfIdun3S=0S-0-@E7I%dBE>V`IqO5X6tu#!u!yTcJL+( zc(a1H81Ts1wP?2gU6zn*o}J6JaEWrY8m^}BURZ}*b$MJFJ6D^9Yrb+V zFkE~^tS{OIX4WjKi%TX8?Z}HP$cq*E5QEIui~7j;c+~%7Wl>5FwZnN9xFrgAn1NfZ zyMar4_K#Z+w?G`B5Jwsa{)}G11o6L^77p4`=xf>ij<$ej6};VmUl@XSIB2Q7L0f9) zT4v!|u3X0$F22?@U)m-))(&@^1@3r-JHfzh8cNIW7g@4D(E@RjLY! zQi+^`z83IPE#Rjq_~{0mKUe94cPMRBp0sD!xz4n3ouyo78!j)DwnMH=9@jZ`u5&G1 z=PB3uhKuhQ_VWt&y-Zv(xxkKmp#}LOMZVY|^QEvpG8)Xj$8T3%f~sW*E=6lP% z;pcK-d2$&NP7!`V%K~w^1>y>Y__u-J4;=b2>1XG9zTj8B;u)97mFR7OzsdrCwZdOx z;Q4c*K79PfcKk|{WO+k(t)1;U3)}U|c7tKNAUqxH41qhmV-|fOh_4RvzgV9C>Ew;} z7;dt}aI?m6i;3Zre;xzAZmF^6$8f7XhTAML+^#X)VPdEVRbOQt$~%bRPJ0Y@Sz@?b zW4On}aOXeO0Uu*_5W~Ip81A#gaKFazfQjMne;z|eWAUIphKDRMJghN1Vq%!|Psf6< zUbD}KN9{5E#}dP18pGozhFAWn4t!CWJ%%UjF+6FB;VF&bX%oXH|5OLQNX#C?Gxivs zwZ!n8#_+s}A^p!|=x8inu*dLXEQVCt|1QEyXqzQ3Glt-4?pIL3x1iXBI~UjbM@S4( zm%-!YRkY?DYq@>?dEhk#c-;p`ZykTVFjOY zg^;|6iN2HZjvfBp7<{VDIdYb~N7(%DTkPE;_`<~Ep7K7tc!K@|{J}2+vnxIz#p$if zv}rY^@(~*JZb<9AhUQjTf$A*zn7t&-QfIHfRh&-w2p%NI|sB2DDe#_Sm>=kk7GcK*aY2*s)NDf*Q3TIo0^gwKx)?w zT_WweE7EEP>A)zw)eDgLRgMU0aB0avU&+Cxgk2*Cwui#5X<%z3*q#Nj!#jh;2$c;i zB%M<1ldhFRy0((8V@UOM?mr|Q)G2AN9MbiabbUjrzl`FOZt#yuD>}(^!yM9$ zlyqaC)cMs6es5*$#yu`)&n@Mii>Y2*%00JF4pLu5+QcCJY>+lJNWd~keE00cR7M)= zfPz=bY?gzvxuR@gP_8y8{X&#rG9CgH9zZa6Vqh8@4=)9coUl=2xqtjALly;%3@T`3 zhcMrGqimTIb$^X|D--q6ChDy!t(zO`SIlWpTJ2wl#wDf!k1{iu~L{Pb5Xz4izO*0;Fi?9;j@Ce75u`-jDMG-N8aQ zu@k~U{W~Dk{WJ7MLr~6O2ImAbM1vXX2jjeDA{l0hWEE4-V=K*JD^s>|!}hjet5_LZ zuAIoOqueTU*s7FmxX+e8DH3{wg>YIYLgN;2$e0^T3Ew3acju;0w~WjQYDW!fCll0w zD5#w^sIHu*ImZd^ZMxrZ2a;WKz;;!zQ3fm%f$gSXobeG{&cOmuqjR8kSEw-tsx5-r zBZLB&?YM(EH8uxoPlXz1p!8=x{0Tl@p%{}jDjsZPuNmRS)-~T z)WjU9NeVUDKyAloOZ=#&7$}XaFu@ASzE2KRLZNC5)YK@dsR~8ebH|W}cpXDMN>9rH zo33Cp4A}GtjNhc-LM_sY?}OrU#(?R0(Bsi&d+2H&};>&Ge9#Upn3xoMdatklaLCfb0L)jHb=qc8n724Fvoy( zkf0ud8*-r13U#o7dNG1(451>`WBZM!*pvg+tWYfm>ZJ&3o`LE>Ej<>y3YF4tmd{H{ zrlGDeRa@Usml09eR>kP;^r-IR^sDacY&+poTwORTsyB89RpB$^ zzGIC)iB!FwGh)Iwtns&qs*9YRC;Y|QNm%k#cXxK1@CR#`u(rB0cEWG0-OpNAr&=#H z^p~EiyE&65{7K(WNUM4^r)I+MtgX(Tn>aJ8*K%qn{Kyu;RYLVTPW^=MSv!oiUQWY= zZ&^DQwKn~tdP6f_K%La@y!%OJG|4tteP4j2=4g9ure&DSSKk7QuT&PQZxMa`_WInG zOl?y#-6F~6WUDMj^CZ1GY7x9jy4Gm`snj`(rQeu zpX6*YsZF`-TH59{HZ;$z)gW5saP;w`!jH8in_C-NnthiX0gop~D#rEIb%?62P2UJT zAHPt;ZW(+X{wTO-)*{!Tt%xvHKXh&b?g5(sE9TB?8#-}ZWpd(}p-poulTCHajV(i) z@FMNT)}gIU+iY{l0E0DP$NmG#hYTMwY`_lv2h`(*{%j~4R#r85Sn1$ll>*AaGBM4oRsBWgG%qprH#O6|Nc1n`x&Y1prNHhi>6KFtmh^IFZI<*JV?C6x@aKhq z^K+W3c<1Ugm*M#JtN!@oFH5;n#VV7lPJj8w6(QDMT#Gqxr@2V-S1Vk=q;-kFRfGQi zQ<|szw4R=M#`b?iizi(DC9O2imuWq>@pPBgQxnftX+5v-RD+WOPZIu7od^1~9+r8e zPU|t1huO3q5_yzN>k*9yxiBZ!R1TM3noa-g}kpSuOgr}*V+)5 zt6X1Pu0SC!mxd6Ri!onZuB3c%xe$UZT*Uawat-1u%O!-bELQ-&vOInJ%JSUnE6dZK YFB;ETzN9?m_>%H4uR^x9%^vdq08s(jkN^Mx diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_Feynman.pkl b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model_Feynman.pkl deleted file mode 100644 index 3e55c479e2cbe319b2de3f58e86119ef116bf180..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42837 zcmb__2bdJa_B|}1h$1K|m=F3n(gx1X)3yl^}}iusdw;xT){VlC&|7fH~)! zv(KD!&N)5x88JSeIj8?Q)zx9TYZv%^f8YBqRkv=Pb5C`3sGjcL!;5O0Qg!u`YPCkx zwn%!!{8Tnw(=tDkuFYk8OmC@6H}RYNsiw4DG^Yr~Y?Iw(&Uz@c zrX<%;+nCPUT{{BXTZir0kj?e5y8&~Z;x&oyVXuqW?!{KooX%iN+3U^e&VQS84Jw-+ z+P%I?Fx%SDY;RE2WllH5)V4IXh~2ZQ^PF{9vr*%}Uyz1fO6JyFf38yD0tbLH&2LLuaqw5IKTbC{C7`JDCfPi;$Eb1p6T zf0Mn1V{bWU1333gx8_=!XS6lVPm8@3@Hra$*;`k2YO^>({k;)dYHwq?gR;~fU|}pQ zjDQu$wtA`IXsTDd0^<820LkVeYXMxr7 z`38}X6JgO%dwUS7;&6M15aX`jq8a~%8E4yr!;G`-VlehAU|i`jmJ~7$@f6nEL$$(s zdzj|yUZBwGZDHd9c~h&8I~Cfbafv9W!l+p)!DvE(9OrgXiqA z9(Zn?3nLRy3_~7fY5$kM*o>2(Bmj|7-_twzaW%fQDLw7MlCkvsgJ?Q!N zz8ZS|GJB>GI@lPy*|YS<=w@b_UC1)WW3lZ0z`~=NWzPjk=h*HRkW97r4{wX9_5s)y zn`0;Bfxa%4wbFI=JbxGT4`BxuQrD20(>FcbK1eG}54BTTArG1l8V+G;d%jk9cg$VX=)p8r@`ZvhuRBPLp0s4*F-(dL{=eDgGZE04YxCzD3=;)A8aP#8A_c7 z_MKoiVmx^SV}16%sW!U_BihSM+w9Y()v2k0XRB0guA!wlYqxZCQr{zpbgu(iaptl+ zybcI;>0t}Pv&CwgHPqQTjB;0tmlAEo)*SbLX{+ZvdtoF8bsR}(E=4PpVXx?^`_A*y_wx*#eRgWtX zSz;fyQjOq@+M3puML06S$m~pAFC$TK9hF0 zkE<%m&e_nT4ldA+pJDY=*Brdevrm|_5kgvVq0uJuQ`~wr>RRN)8EqB^`lMnjhm)Q= z!YAtyR^xk$#bG?umRjDxoSwJONZMywhpn_A-ExIm5O1cfxvD6AjD1H)|bu#+1r!$9gcnHe7Ehc z;;N#mE~*`EcPH(89Q)q+X~(`VEad*Y{Xo)wut12Sct}$`>{D<&9!c7dI`(6l;_+e( zlTYzP(tgshpCZN6VTxz+_OnU*xj4o1n&JhYq6;WqOxiCw_RE^$6;c#=Dqc<6uQ~SX zqpeX+I58pMO8;{HK#|LE91g}Hyu+rK33U*p`r>G9=p>UXsx{1NO3 ziE{f-mBwF4BR-gaGmUk18vp1tn1pn~S)X}GXA~SMf(>OOT~Nyn>%gn` z(CSiI7d7T4-5Km3YDSmIdT=m9>A@Dm0Y}zH1M}?e<|!MfdUtGjjG9WeC%ZD)4N-8U zmkRKYg3=rHyle!Ul#OBI$@YQQpQdH9392gFO&RTP(hAuOb(L&iHW)@!`k{fzuBVgT zT-CXG`>Ry9U`M99B?^vgrJ93Ow?;iL{b7@`4QxEs0raZbCfl;;k7Y$d2Ew7z+m0=! zu^fa3rq@HKx4o*bQuEIp%N^L22@XcVkzy4PBv^uaUWULXWhiVs!C~-b{1F_^7(d5E zg^Ykh$qHRKg}@G;BP_G4T4^ zyF$jIsh`@<&X0N8kr^WY68<$;Xxb4*l94IC=RgVp|g+Fk<`DFQ)P8m+a*meH7`J+!)vO2dq_QlFPSG6 z{^i-VQ~DTbKshNHSVs|Ybm#iHs3qk*SVzu>W%d^^B9#4wsORM(*rZ$x8_)g{ zc!TUOWsHB6C@q!C;81h?a<&*26mkU`sHTrrb0zB;&Tnfkt>!9LsOD-Ed^OiF&{uOU zYDu{c){*OBspbYogw))KdR}gVP0GJu<7#e(H&Am6W0q&MhK!OBnU=RAgxPPRv%ih? z45y@3t+T(K6=r`23V!x?GSJWdF4U58H>@M~z%u)L84=36|W>>ox5v)@!_{|M_|_Ug)79wiF1e+&gb`^Oom=g|`=C*?_4N1lRZ?oYEll>0NN z=jB=0q&x>3&;5CL^)a)$TwXv;9W!5Kus@;3B;+MH)P#DOEvAF!D`=po&9tajS@(7i z_loj0_NA!TQSe2*!9XqQO_Y=J7OW$0!&210*&Y)04(fS%7d9#H!Nx`XhdsPAL7BXd zs#ic3}?3`J*r ztv}F*SxJQ%ipyw88Ko$)cm1fMbNFRN-M*2ZSoh|qE9Pgol$c)#R?pfc^W;|sYB9f| zoRr^T9r**6V*X@%NX%cT=jCtMr2GRL7t;xMSfgS(E1$1I%Tn8?2u;TA(}jJoeKyyU zx~e+QW8GA#$ilA?)|&qe5k8NtLsI`dR;5>Z>(a~3-5mvggw`XQ9-$s6CuMzDM>c@v z2=!!pXoNOIJukgrlhPYDK0+J8tG2ePHQ@3=~DCq-_+H*HylWEW06b&4k zE%exIrs^DHvLE%8=KAP<=AY2fzG+rb-*%^=2v#wH=#!=uJ#2R50;W-uB!He2ekDOPok zO^GTMS@;piGXMHx;U^y}@;gs>Y=)3DHa0`)<=6~E!5^F9WYc3a0_CKPgmq*TEXSsl z?V+(LLp?7E*rb%h#>b`t-e7Dh850_tN*N808k;d}GL6kxG;nOT(qpqD>lyA(xP)+* z=sU4OG2>A1#q7*LEoK*#ld>zUBjaHyW&+zoVs=9v>rPm#J7MEuCcztsnar4wm{OSn zj}kMLO(rpWpn+nx)?%iyp5YGQi>YFTV)jJA7c-rKTFeZTld>1ABYVSA%sy-niAkcK zmulFg>^$cgMFJ^yMDCPhZd@=JF zsKp$Ja#Ctw9XSY=Vp41miJ6ajUTR^JQU@CslZH1Cvw$%nF{7m(9wo+NlWEH|pn+nx z(PA>JyVI;%PqTyBmtq=G@WnJSP>X3s8S6$^M_OSi#%6m+jG&&EENoJ8uyHYM@CITQ zGA1OZOcueT#4Ki$Nz4*7P|N@=W-06LH1ot9!oC!9C@F+27 zv&kgp95hhOKrQB6Rp*iZJXI>P@GGxn{(09j{H7Z|(w|RK|7YDAz0$paUXIO$C^&MF zq6uylU5t8OE`d$TrLb|)m%$q>JT7O9f1e{ESD>yWU&#iOz8(dv6ctS%`9{>SQiR1y5jHOQW_Sb1w=iaT zMk~!Ip^?}2CeZzu!lWKS z!I4K95!%5XLp?8#!zSek*mzP;!kh8ehfgskSRam+r{PeO?HRV1Cfl=UV3IrNB%fnF z!-;0q=_H?Lg-O1Ef+H_7B9!DysORNn*rdDy8&C38c!MNgV~n5V*o3?ehf4AdwwRK9 z6Aesquuk$V)-#+bR)bFRZC04%zfo}H9Y%zbd>8e+ya$_<|G>tRd>`H*$qyJ4Bso?- zghM6y5nD`2evAetS*(-%gmrIoyUVjx?8_uSMZuBJRF7Z|evW!xzJN{2m$309zhVz{ zK=oG6(n)XMp5HilP|{!)3Y{10tdiW%3H->81MsuG>}x2$_pz)k!+xK!fbqu|I7 zsz;FckErM6C)lL?3>#1U7xqwzXZ(DBr7xUsrlTx>Q_;DqAv(+76{4O4ZkB(*rLz1J z1xNl;J%TL%Mm;b8z$T>=p4W+I*%{twmPPcfah6?FbgpWs&a$hj^SIVcm5MApfoQG% z$wTjX!0>Tx9Uuh9wHj|hOE2eCcN831PtgRTd!U||^1!Pk4iK^oES_Cw}=T z=>>eRT6Div9HB-2{+ zM=`@12aw*^*q}9TOD{DJM8T2m6iuLU5bAl^9yTdEz{WKWhBwey%$SZfmLP;0M`(>h zSa)v~jn5YMH?Hke1z0$08UOi(uHemPzmiS|&3l&{8H-;80qovc;rj4>VBAD6M50>+XF& zPfHd1Qp=tw_*$kjP-~fiGSj3&q~Q&;EMQEarCjRaP+Baun6^s;8mOgAYss+g&M{BR!R$*djVSn9ni#0H zG^30aFf3NUu+(C+J)}iY#|ju0D`42TmNs|;EejbFXsM7za40Q{*<#YN1P#=Z&{~$V z?#?k!%OUJbEr+7uYjGH;wJbv!3t(6*fMKcS2)2i`9Emy>z_3^V!^XAb;SIDL!A-*Kz^_wU!f6#`+f)>t9%EIfd;Z zEvKT6^)D>ezp!yFXTTe1Ig>GgmeFz+97@aCY%ys$2MyFxp|zaLx;w`_E$6W>wVaQF zujK*;YAqL{oRo`T9l02mS}tLGNXwu5Y~~0V5#L{wuiJlf_h#ag-yz1uyHMq!yBwipJ0rCyP-^;L|sXFiVY?yPosfS z#%L+eu+l9r-eAn~j8>3Q0wR}XZz6=bkJY)q#kzZ|Ym(0WZK5#uf1}{%{tg3m?(d?E-z;Gr z`422}f1mB4+&@4)FCW4tam_ok3~9$G#l3UmJ) z1wZ#M80i04{v~Qj`3lyN|H3l+uNe`_{(q?FdW;BV z-vf0lz+kZegNE4H?t%*!MyRv)@@~-tSBb&i8_r7co<=ziStlGDAh@swQYLdnv>c+#|9k=|@NRhD(Xr2L)eDl7U)G zHOg2q!8$S%mSSeHJtSr}>UrVDNXmY&aWQk*BO+#h`qo&?0V+CIwVM_*Pa&!s&Lj1O z?}2bBF*PV)^`v?PcNJ2o=Vd-@Qfgu2S=PZDTsfr~)A3z}1qk6zF;OQ~&$@eev0Nu* z5rxyG0R=}gsz;F2!Kmk@5jH7Ju<@jt;SG{%VNAzKwIYN`?XHuuS$A(JkI_j9QJ7Q~ z1xIqKN03w->UmiRo0LVc@uU{R8zi-aF&!th6d_D%l1}On)-&CB#v7^U;-N%gQVt4u z;#Bnrk~$3ayc`ajlp|o{NgWAqkknC(@sDwpWpXqe>KK=2i|H753>ui^WS!)4)-!#W zWVudq1^Y6|l_)rJtm+XYc^v9_IUW{Ii^9f}JQ3a?$&(nfJfl;~sLUdJ@yQ5bQd4wN zr?8&chDnXlNu5d*CUqJLj-0M~1WBENdS1?iP0CrY@ubd%H%RIn#a7m!>BX%g z7sE9NVmpl*F-k5$MZXa!=~Sa!%0~4HZMrqxTql>IUZQ>(SeR;TOUvb`&uE*U(Ii)( z>d2L_eDRbo6Y}lR##Gbn>Y5s`)zp;9RcOs9Qhvn1Zyz$GOs+!w~OEY7Pv>@->dNpwm?Ge^MNPm0hutndWcV;w?bk_ z@$h0g)47yx_N12#7^*g!SS_=wOXPkxoAw9t0hP^z@odU4A0G0vVVcAJH00sc!9Jp3 zA2q?2$zwijhturw)sZ}*NS-v2B;+ZdWb+P2vP_;{9m_L{<=Hq(0weUC&$3$wEZAP& z&J^EbdN0!(?D^HD^MXp}MN>Lu@{*sJT<@Q*dPKOj^60r|wo z_XlJu2V~XikUv$(pJ`<809M=5oT;yIkMs%o+z0Qkl6Mc|jA_HycG7C%sFOkhTE1`@ z+dqhWsTjX%m$7VdLjJpEj0gqe*Q;avpJM!`UB(0$zg;s%go5$A)iHjr7=LJ&k;ls) z*NhRNVEk!yj6W;JU)p8l@#ohyV?-zze_I{n?~3t{b{WeSm&u=N#)wcb{eOxEBc>&Q*khu`_}@ymBZ)fFoUC!xvcLy`xmRt{EdPnZVd>b&TsM#&tVj zER*hQ%1Djtwa1uNAInHD#x-vYP&0+1gZNS~i7;7sWb*%}~YNF<556i)kK+r!0_u zaJeb$^7+D|it}pD%aAogqKpf^If_+9sZ0cN3%DKG5|)s--_R*lEnBf0FPNw7`as35 zhkkvE6@%NFa9v_>{T19cE?kC}uNvpMH!ewi^2-8+qdr77yQOhKx;dLmH#Ext^0{el zOWe*e+<^*rI~P|C9hKvhE`!+D&Gn0S2EMfFpxxYDLzvqWGcJ7xg*jMbdM6ik{UOBx z>O-pzny_;8o-x!CqQ*yHh(aA2K~yvFefU^-8H5+wU#vH zQf)FRK>f4%tP}Qnf2SDgWTN(pp-xe#Q(aVRTfJFkJ=eW-hcA{W5VaPcKB2l=_8^P9 z&8HEqZw#$Uq3x;BChC1CJclwNkm&@8&&wGKWG@XeRi~6bJhW9qAbS%ezE$^8AW01} zOM`42o^BzKYJ$Y4+rA29rUuzxgVc;SXEKW*o5#jvwgTY+82f=WUyn(_gbHP{A3@?1 zYOVs=UxU?;-faE6JBdRe3kcFLHpKM` z#L^&pXkot&+}>VxK_&*$K#=&1$taM6HOK)PWa*#-hd%p3uNX)pLE;;`Nr5zLkR}&o z=c9f(N=ff8DI6|O09uL^fV+G{Q!Sv~@5Z?)7x z8bjQ;)&_OAC0Ll2j8qX*7AU`7{FRiH$dYK%?r{ZKb`_5HG7A<0p3*hk$4 z+Z1}D86yH0eajG2Fh(xjVeHEDRA{6QhYwHwz;gJ#%cXQ$j%2Xn83dlPkfLA!)C$%c zixN2sT@18G6DdNQhtH8?U`gw*XlMH?+U4w|=)E(;^x0g|u7Kabv6A5tj$`3-gh*=QALJb^&~jTnHORt1p#{7_74P6s6N@b6^NBMw&eO=^vJpyJD4i0y4LuCBw-YErdk1`u+zA`rP)p{^T?|&N zTXP&Kc}dg!&|#>|9ZQb+-H0{t-9v~7-@WiTavy8~U#;BF;3!|MnePF_8u%V0M1=1l z`0%6{Yyn?d9$|2lFKy;~6tM=r#|RPOdmKJTo`5ajYmz4!9OY{=^F4)F1K-nxi10lF zpCix07VzcdIR;1ha%R5g5o_Rkfe;bC7vaNWYp~?=zL>g%i-wmOt{AroW(*q4oUb6- z!1*dcBAl>pw+IxWeH%V}y91WA z-f3`;(#45Vd57_em`544bGZ9P_~ukX-bEPL{JXIk_d?)3_KvXq2R=MY2ODL>?Q6z& z%*NZcA0o`a_7QtW*gl5OkxyWwYzbM#_>S3l%l1=*8Q4B!?+DxH@Hz4YYylhYt(M7` zj4xz!_xfNYdAs&2gc;cW%ia;Tui?X2M_{9D3HgTcg>3FV5wP)A?Y9Usuzkng5w`E) z!&_dk5w@~2`H}I3Y;kR6@)N=gY(KMigzXpj9QhSC%9fDd7+=U1*H$LKBh0||2YW}@ z{)7+jhQUVI5@qr?;|tm1+7j{)!VGMkaHSh%>kJ=0ehC|8OGp>S7qZ2*C8R6D3~b%l zJECnJ_#9amwtx+Hrc0$e;}x6tiwu5W!7NmVlO?sX9@-5wJ=iTmvp#(IHVSN%rc8P= zK2GCavVdkov>Rx8v0H?uH++t41RJGE$i|G1(8*@WF9G@HVQM<8J%G&m&o zWqgzdJCjS3lYVG7&}`0b5t=RF!wp~9C{3Ac#rQal9<-cnjdlY~e|C$|Yy+Po17M>x z3E7tMaT+~nIT?s{1I>2q7NHpgpCjAD7SNP0E=|Y|jE~ab4C4;kd>M>(15GizMQBRk zb7Tl?gr+PZLm3~X!70O|8HRQP&2V;$(2Ri3k&&!ulj`8ptI3_S$bELiBLUw}_FV?}% zligwcn>&+Gabz-G7)9^V8~4TtenXjp2A8`xu42OXGE&u8olZq_K?-E>$i4eVd%)+& zG+0vb34uh0iPp#!IIEhrB3lT+TQG= z$gw8wG=t;?> zOYs+pyr1|>b734>d;)ge4(=5tFpkuscCB0T5~cQV+KsgmQq8Ld}u+upUA~ zlhmSWM&MWerLF)Wtq>My1U^!iP1j1j0fIYd>4=fFE!pCwC1MqzG$@peM&TC2r}=B; zU;_--*7zwhTMR7VG#21ADV%1F!)$7^wbEjM(b;4hnwd>&0gA0qM5Cx|2$MCysB8%0 zW|LDm>XGp_cGriUg(%OHMarX}8eEKuBTMMAiqw}&-EGcCpY)Sr?lRPWOl)Lgge*lo zrf{&5AhI`ky?yZz1%0T7=F31X^cH~DcS3NGN3fWFXY{4dWH^WJUh#Uh?h7a#p z!gB9b$GM3$qvceFDmE%p&j1Ew{;i_gMI~|?A`M)p6ClEM27Hd32^-?V=ZF}pxJtmK z?hgfA1;TJwUf-l5^V#TUU_OU%5$1E@!;`?UA?C4iK0_5V=e0XEyzN&&bpZkmR2Q;; zgz6&r@WLl7sjz0@XESkqb_rw2mEnrt0CuSX>@o#*c?7InuJFOQpf`YBX#l%Qfn6N| ztB`AaFs|1PVAmSJu2W#wN5CrO1`kX?Kfw41_Ji=@eO1^HcG)O-n6WWztg0VDg8}+O4F>F|7$3oY8a_v!femAq$+L_lcBVT@2C(NC5CMB0K1W`F4TB}*MaC+y zo+uf>USdE5>}B{Ic?C8MRxYnHR)KAdk^$^B21LMKhtH8WV8dV)@+M;ySYMP3U~e%X z0`@k1xLpVv2CI~J7^}dxLdgL3E(0Q9@4<&RY+=J-qvd_ZDzI%)GJt)+fC$)!@Hz4k zY#3~ee9Tyah4y;2D*Xfv2JBUgk6?caAHL268^#_hpEEXwjiu-pXfR-Z$@mENSMcFQ zUDyzIVw8N%*cdj}pZ`OH0s9-qN3g$z&ynw7!`P+rJ!2I%eNH1?k^cIg_Q_k$&MiFCsKdxN6Ra7Pst!H35`VWB89!oO@A$t_dd@g)Z@ z3&ONr&9vPNwCgC^bv12`PrH++H(0%^fo=D)r!WWUZcbr6Lkc}q3hTQmc$XuI(Xs(! z)uiE7vA)3buX(b#yy}Sv!|-m%j*$t|3qD7B!-i+(7}Nty-YdbA0b80Gw=ytptr+`j#?d}wg~#Z=!lfrq!Op3lV8LxBm4o~GLWyi+CLdrR z-&TzdUTh#QQRG83dA*U` zyNSPMiEgkxO*E3_mNk7=$>dM+}%JpNfA!ggu8GNpa_e-quQGAO)>LLHSq1B z_@=ph-Z`+MOsW{Gn0HW8)pbC7-0mTzMD|1{!}(%5u_EV-8Svr$GAz#*1O3zgcB|9r za>?Fis(lPpNkvudQuz|gWnacBi5=a?;i-2fA`QA`5g?*#Hhhlo@rlr>x1vJ!W2_?M z8PU4~>D^F?o*?v{9CFV^M+5i%#EWnr0G}iCV8h&%av)41^7eFrx|iHCUI>-yPfK3*C%6*v!{x;A>KR%`TreS1U(J3uDzL+|i#t z{@z${(dIqCs}sX!^{-s1y%j))O>7fCvWW#gN3yV?=~P)NImW77_KoM#{!GS$$9geQ z5EE>n(1^8x!jR=cGDosp1RoxPhUG*&+CN}cb0Ttv`NLE|8;lA@Xl+tkVoq(TA+R>ZNbr_3V$fdBMF&&M+AIMlm zADovWo3h}xp1WNQxm*q)LoQbkKa$Ip@Hui7EON=L=kK*++nGy-1pMnO|4<@Vn-jRk zkifMnf$Q7^{B4nt>lv#CV^}+>v@8DxbT!Dok%$rbH^GN5jlzaEMY-I}SjE0(`!X;1 zZb2Ud?X850(B1~0Be%nbX)ELo#wyx1+Ly_CC%PC|?;=u!^=|m^Cl_JEtd(*vV-@TE zYc%a~;POtM{`G*j?Y-R*hdIN>zGmUB4bsq54JaB;|D6duBYF}TeEA84yUpQ z)zR@eI6cInf)q;RC8#kB$jcNN8IV`tbL3T63`nNhKV099Vd&r@%i~|Wi!HBLaLE!V z3SH(G?D_@69)!oC!UOP{c>rEF48R*|0N!*5z&l-xDU-Jtt7ZgWU2|^x4VRgZISk}Me6ewLX-+i^n=X%e~^&bP*`-^lYaeFSWb z{NRDj2~8yTeL{cZ{)iX@%}?wa$@ypa9Qg&76N!JYX*Clm9oe|Qnz?>6aQ&{h{&2ax zjXQRf{K?qJz&FWX@ESz?&ESZLf8cYZ6JDJP4gASa{%@!jrH>)Sf^St++OFx%`b>AsaKca1YLf2T%GSz%X{3uwP{CHiZxGOu&Y>cct`Y ztm5Dksp_{}^%`A2_zlpTGdu#l1$=l<0yYdiTDD@W+K2g0o&juY16Y3rwoL?Vj12I= z=BOtk)SW9P8Xk`l*%na-qJiuj$$C5Z92o@5$;3Yv<_<&nc7Ay6OR~r&-rmf%gMn?Z zVk_2ceSEfY81#1U`UP);@w-v(R|TTK$(ERN7-Gm_sLElO&SA>h=YV%@Rj$z-hMRL3 zVaQ>m%3+kwVbR*>kZ31|QgaSvh8z+qhjN|6_G_C1e!OibhYE8Jm4+Nfs~pDY9A>Y5 z4()A=vF03hG~}?8%3++&VfotU(B8J#*_^{Jh8%WPIgHmij9%Lu_?0^IoSb0JVK+k# z6IBko>m2IWK8Njf@3ZFM~||5D}a4mfXrq7MYs1Vm4C%OMUqiG zH>+v`>b?qfri*HQ;HKsMcIQKK@f5s}Gs_G=I|QF@vyPl6e4RY@vdx|?g12Rs_K^M1 zig#`1;vYx$XQ+Pkedf&SQaJz)bpp?}HZ*5t9;)-?Kt@gb2mdKk)j6$R!O7;Ntv1)j zS7s29;rCGSVvNQ=2rm4IEcW#AXO+r)M!KlAYzv`wNoR@joz3yBh` zLqmX_M$M50?2nClw#KVx9iPX?|6HU#Ths>!odomh<|HC=fX#= z_m-B-^x`spNVyRKK^je{InvBdS|R|;qy-KQ*vdA*1%N#F2Y^_xv$D5~ZX-HC7u4|M z91#Nagyi7R=xuB(K=;zsfC~{204_oezoW4et{3<+^dOtuqB8J3Qd;+B5r6oh$Ast>y*dq&I zk5aHlYuFD0*nA9jbVsn*RAocrq8RnGKkC!Z>iFiVj~wAHQvEL0{eIW|E>Zougf~H^aFi}p{Vvn}-qHOo_xqu*9&J9W><>_R zRG8lqj6>P5@)EfMNWsRvQn6j-vSl_2Hty91!s#6l4js@Aq1spdwqX1dOy-(`WUf`o zT<0cZy{n~PZ^&c~rAOFqC}6u$vE8KE-qURVS{+;Aa6(;s+HNjjyG60x>au0V1_OPY zfpBIA1C2Y>K4W1nCH&r8xVcb1o8@+uR9d}a+?tYHLv3TarmmqjCwHg>?^M}rW_7RX zYxS?{XAPS6Ij%IURaKi>L#j5gMo#+?RrNPXsy4LBr+v>F|M*8$Z)?o7Z&~AC-KZ+E z#!dTMMdEN<)zun5?N8SFvbK&jY1;3s@vrPub+)S1WrzCvj8$E%y{7%ewnZ4As%}>G zv_Du|&f4bIoT~M$nrT1L!#`?M)zhk*_5*9zu-40JnD!lO{F~5i>W$@wW_*b_rCxq^ zMc$=E-VLi>9GCP0<*&)LjFNj)+r5UiQn^pH-Oo0@#$4Z$t7%GQS|rt+%E|+9&rk=_ z7Qr`#Ype#4O0Bg-9#q{Pf-Sagg4E-xirmDo7SE<-VY;rS*22rvX?Yki_|#8by`);L z5exCBKU&(dBbpkrwLSJCXu7UCo$DcwAgm-Qk1E6sXINX!u(qDjrnqZc+FBbMn(J#+ z5?Of+F>Y4)s~xH4Y(q=4+a!;p1)ps#AKPM=bdnm%!4Q+-9MskXVXWn>e+sN9$xnQhu`yF&(Rtid}D z7@Qa}dc>%~!v+kl!vE)Ieb*<@KF_m9eIjGxTDjlKGlYScpBAt@(h!!!t6+^ z&m`31{pp%4zFORnYXI=GXkDo`u(UNdQ**j8?Y@8hoa+8OtlGe-#>SRKHLVS`2jip8 wFTlG(32kg>YRIK>(3+JORgah0gP%dm=CD^Rkd~%YW2==KB`>34W!s4V2gj7TGynhq diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/pp_tt012j.mad/test/cudacpp_test.mk b/epochX/cudacpp/pp_tt012j.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/pp_tt012j.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 9a1af87664..67638a92a6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,22 +38,23 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices @@ -70,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.06830000877380371  +DEBUG: model prefixing takes 0.0938117504119873  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -85,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.021 s +1 processes with 72 diagrams generated in 4.342 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -96,10 +97,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False - INFO: initialize a new directory: CODEGEN_mad_smeft_gg_tttt INFO: remove old information in CODEGEN_mad_smeft_gg_tttt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 @@ -111,25 +112,25 @@ FileWriter t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.097 s -Wrote files for 119 helas calls in 0.474 s +DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s +Wrote files for 119 helas calls in 0.432 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.204 s +ALOHA: aloha creates 5 routines in 0.297 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.193 s +ALOHA: aloha creates 10 routines in 0.304 s VVV5 VVV5 FFV1 @@ -139,32 +140,32 @@ ALOHA: aloha creates 10 routines in 0.193 s VVVV1 VVVV9 VVVV10 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h -INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README Run "open index.html" to see more information about this process. quit -real 0m7.520s -user 0m3.917s -sys 0m0.620s -Code generation completed in 8 seconds +real 0m9.249s +user 0m8.497s +sys 0m0.583s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * @@ -185,10 +186,10 @@ Code generation completed in 8 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -215,10 +216,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat index 5e08560167..137f01e301 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc index 0d7fe2e5ae..56113b542d 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc @@ -2017,38 +2017,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -2074,7 +2086,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -2209,7 +2221,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -2233,7 +2245,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -2345,25 +2357,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -2426,7 +2449,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h index 87d1743da6..aa52499cf0 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f index e9f856aa23..5a7aff8889 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f index 7f0900eb3e..31e3e60c31 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f index 0f8b03e464..a3c2666b88 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f @@ -372,8 +372,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/addmothers.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.inc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/myamp.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/test/cudacpp_test.mk b/epochX/cudacpp/smeft_gg_tttt.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index c1a6a8c137..c315bb1b7e 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,35 +38,36 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t -INFO: download model from http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz to the following directory: /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/models  ---2026-03-10 10:39:42-- http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz +INFO: download model from http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz to the following directory: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models  +--2026-04-14 12:02:25-- http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz Resolving feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)... 130.104.48.109 Connecting to feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)|130.104.48.109|:80... connected. HTTP request sent, awaiting response... 200 Ok Length: 80562 (79K) [application/x-tar] Saving to: ‘tmp.tgz’ - 0K .......... .......... .......... .......... .......... 63% 832K 0s - 50K .......... .......... ........ 100% 70.5M=0.06s + 0K .......... .......... .......... .......... .......... 63% 767K 0s + 50K .......... .......... ........ 100% 47.5M=0.07s -2026-03-10 10:39:43 (1.27 MB/s) - ‘tmp.tgz’ saved [80562/80562] +2026-04-14 12:02:26 (1.17 MB/s) - ‘tmp.tgz’ saved [80562/80562] SMEFTsim_topU3l_MwScheme_UFO/ SMEFTsim_topU3l_MwScheme_UFO/__init__.py @@ -87,7 +88,7 @@ SMEFTsim_topU3l_MwScheme_UFO/lorentz.py SMEFTsim_topU3l_MwScheme_UFO/vertices.py SMEFTsim_topU3l_MwScheme_UFO/restrict_SMlimit_massless.dat fail to load model but auto_convert_model is on True. Trying to convert the model -convert model /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/models/SMEFTsim_topU3l_MwScheme_UFO +convert model /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models/SMEFTsim_topU3l_MwScheme_UFO retry the load of the model import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles @@ -105,7 +106,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.06466126441955566  +DEBUG: model prefixing takes 0.09116840362548828  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -123,13 +124,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.072 s +1 processes with 72 diagrams generated in 5.002 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 @@ -138,18 +139,18 @@ INFO: Processing color information for process: g g > t t~ t t~ @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.094 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. +Generated helas calls for 1 subprocesses (72 diagrams) in 0.201 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.194 s +ALOHA: aloha creates 5 routines in 0.268 s VVV5 VVV5 FFV1 @@ -159,17 +160,17 @@ ALOHA: aloha creates 5 routines in 0.194 s VVVV1 VVVV9 VVVV10 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h -INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m4.177s -user 0m2.874s -sys 0m0.228s -Code generation completed in 4 seconds +real 0m7.646s +user 0m6.626s +sys 0m0.412s +Code generation completed in 8 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc index dc1d2ecd53..698f03ec15 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc @@ -1965,38 +1965,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -2022,7 +2034,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -2157,7 +2169,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -2181,7 +2193,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -2293,25 +2305,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -2374,7 +2397,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h index 87d1743da6..aa52499cf0 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/test/cudacpp_test.mk b/epochX/cudacpp/smeft_gg_tttt.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index e0e58acbf4..08d8a27cb8 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -547,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.055 s +1 processes with 6 diagrams generated in 0.136 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -558,10 +559,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False -- INFO: initialize a new directory: CODEGEN_mad_susy_gg_t1t1 INFO: remove old information in CODEGEN_mad_susy_gg_t1t1 DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 @@ -573,52 +574,52 @@ FileWriter t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.005 s -Wrote files for 16 helas calls in 0.279 s +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (6 diagrams) in 0.010 s +Wrote files for 16 helas calls in 0.114 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.114 s +ALOHA: aloha creates 3 routines in 0.160 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.120 s +ALOHA: aloha creates 6 routines in 0.160 s VVV1 VSS1 VSS1 VSS1 VVSS1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README Run "open index.html" to see more information about this process. quit -real 0m5.502s -user 0m1.722s -sys 0m0.643s -Code generation completed in 6 seconds +real 0m5.118s +user 0m4.255s +sys 0m0.745s +Code generation completed in 5 seconds ************************************************************ * * * W E L C O M E to * @@ -639,10 +640,10 @@ Code generation completed in 6 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -669,10 +670,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat index ee7d1277ff..1d5b59c692 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc index 7aef93970a..76e2f8a0ec 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc @@ -962,38 +962,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1019,7 +1031,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1154,7 +1166,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1178,7 +1190,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1290,25 +1302,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1371,7 +1394,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h index 293c26a2e9..f5d3042d1a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f index c8bb469792..3bcc8b4dec 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f index bdf00312dc..0621786ba2 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f index c5dcf87c06..7c72f63f66 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f @@ -312,8 +312,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/addmothers.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.inc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/myamp.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py +++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/test/cudacpp_test.mk b/epochX/cudacpp/susy_gg_t1t1.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 0ee162c616..7ec4128ad9 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -547,13 +548,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.055 s +1 processes with 6 diagrams generated in 0.184 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 @@ -562,32 +563,32 @@ INFO: Processing color information for process: g g > t1 t1~ @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. -Generated helas calls for 1 subprocesses (6 diagrams) in 0.004 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. +Generated helas calls for 1 subprocesses (6 diagrams) in 0.016 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.113 s +ALOHA: aloha creates 3 routines in 0.146 s VVV1 VSS1 VSS1 VSS1 VVSS1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.441s -user 0m0.724s -sys 0m0.134s -Code generation completed in 1 seconds +real 0m1.939s +user 0m1.773s +sys 0m0.128s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc index c5cac709d7..bea4c7340c 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc @@ -963,38 +963,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1020,7 +1032,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1155,7 +1167,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1179,7 +1191,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1291,25 +1303,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1372,7 +1395,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.h index 293c26a2e9..f5d3042d1a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/test/cudacpp_test.mk b/epochX/cudacpp/susy_gg_t1t1.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 88e01c7e57..10227b8958 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -1,4 +1,3 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. @@ -16,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +28,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +38,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -547,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.052 s +1 processes with 3 diagrams generated in 0.183 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -558,10 +559,10 @@ output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --ve INFO: initialize a new directory: CODEGEN_mad_susy_gg_tt INFO: remove old information in CODEGEN_mad_susy_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards  -WARNING: File exists /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt  +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards  +WARNING: File exists /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -573,49 +574,49 @@ FileWriter t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1723]  -DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1747]  -DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.004 s -Wrote files for 10 helas calls in 0.273 s +DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  +DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  +DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  +Generated helas calls for 1 subprocesses (3 diagrams) in 0.009 s +Wrote files for 10 helas calls in 0.090 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.095 s +ALOHA: aloha creates 2 routines in 0.140 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.079 s +ALOHA: aloha creates 4 routines in 0.167 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages DEBUG: result.returncode =  0 [output.py at line 273]  -Output to directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done. +Output to directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done. Type "launch" to generate events from this process, or see -/shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README +/home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m5.086s -user 0m1.635s -sys 0m0.704s -Code generation completed in 5 seconds +real 0m4.587s +user 0m3.940s +sys 0m0.561s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * @@ -636,10 +637,10 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards run @@ -666,10 +667,10 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +INFO: load configuration from /home/dmass/.mg5/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt treatcards param diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt index 712b1897aa..db7e3616c4 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt @@ -255,7 +255,7 @@ # pineappl = pineappl -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /shared/git/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat index 3a6928f635..a25875a280 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-14-g6dba8f068 3.7.1 * +#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f index e986b059a9..47699fa614 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f +++ b/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f @@ -2022,21 +2022,6 @@ subroutine orxxxx(p,rmass,nhel,nsr , ro) end - complex*16 function THETA_FUNCTIONR(cond, out_true, out_false) - - double precision cond - double precision out_true, out_false - - if (cond.ge.0d0) then - THETA_FUNCTIONR = out_true - else - THETA_FUNCTIONR = out_false - endif - - return - - - end complex*16 function THETA_FUNCTION(cond, out_true, out_false) double precision cond diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index b575475690..9b0a81b6a5 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -939,38 +939,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -996,7 +1008,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1131,7 +1143,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1155,7 +1167,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1267,25 +1279,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1348,7 +1371,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 732f9919c9..58e1bfe668 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 7f809ad0ff..cbe257bc8a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -791,8 +791,7 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C Input: C pp 4 momentum of external particles C wgt weight from Monte Carlo -C imode 0 run, 1 init, 2 reweight, 3 finalize, 4: PDF only, 5: ME -C only +C imode 0 run, 1 init, 2 reweight, 3 finalize C Output: C Amplitude squared and summed C **************************************************** @@ -893,9 +892,8 @@ FUNCTION DSIGPROC(PP,ICONF,IPROC,IMIRROR,SYMCONF,CONFSUB,WGT C endif C set the running scale -C and update the couplings accordingly (but deactivate for -C discrete sampler(imode=5) and - IF (VECSIZE_MEMMAX.LE.1.AND.IMODE.NE.5) THEN ! no-vector (NB not VECSIZE_USED!) +C and update the couplings accordingly + IF (VECSIZE_MEMMAX.LE.1) THEN ! no-vector (NB not VECSIZE_USED!) CALL UPDATE_SCALE_COUPLING(PP, WGT) ENDIF diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index a68aa6e4c0..b79f45da06 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -337,6 +337,9 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, DOUBLE PRECISION P1(0:3, NEXTERNAL) INTEGER IVEC, CURR_WARP, IWARP, NB_WARP_USED INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C DATA C @@ -347,6 +350,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C ---------- SELECTED_HEL(:) = 0 SELECTED_COL(:) = 0 + IGRAPH(:) = 0 IF(IMODE.EQ.1)THEN NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE) @@ -443,7 +447,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, ENDDO ! end loop on IWARP/IVEC ENDDO ! end loop on the CURR_WARP CALL SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, ALL_OUT , SELECTED_HEL, SELECTED_COL, VECSIZE_USED) DO CURR_WARP=1, NB_WARP_USED @@ -516,7 +520,7 @@ SUBROUTINE PRINT_ZERO_AMP1() SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, - $ OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) + $ IGRAPH, OUT, SELECTED_HEL, SELECTED_COL, VECSIZE_USED) IMPLICIT NONE @@ -529,6 +533,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, DOUBLE PRECISION HEL_RAND(VECSIZE_MEMMAX) DOUBLE PRECISION COL_RAND(VECSIZE_MEMMAX) INTEGER CHANNELS(VECSIZE_MEMMAX) +C Per-event MLM graph: igraphs(1) from REWGT (0 = no MLM) + INTEGER IGRAPH(VECSIZE_MEMMAX) DOUBLE PRECISION OUT(VECSIZE_MEMMAX) INTEGER SELECTED_HEL(VECSIZE_MEMMAX) INTEGER SELECTED_COL(VECSIZE_MEMMAX) @@ -547,6 +553,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) + #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' @@ -598,7 +606,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) CALL COUNTERS_SMATRIX1MULTI_START( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. C ! This is a workaround for @@ -622,7 +630,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled - & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN @@ -630,7 +638,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, STOP ENDIF CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled - & HEL_RAND, COL_RAND, CHANNELS, OUT2, + & HEL_RAND, COL_RAND, CHANNELS, IGRAPH, OUT2, & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacppMEs=0 diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index ef4145fa88..211e4703dc 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -324,8 +324,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) LOGICAL CHOSEN_SO_CONFIGS(NSQAMPSO) DATA CHOSEN_SO_CONFIGS/.TRUE./ SAVE CHOSEN_SO_CONFIGS - DOUBLE PRECISION BWCUTOFF - COMMON/TO_BWCUTOFF/ BWCUTOFF C C ARGUMENTS C diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/addmothers.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/addmothers.f index d6cded9a2d..593c620d9b 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/addmothers.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/addmothers.f @@ -111,7 +111,7 @@ subroutine addmothers(ip,jpart,pb,isym,jsym,rscale,aqcd,aqed,buff, if (btest(mlevel,3)) then write(*,*)'unwgt.f: write out diagram ',igraphs(1) endif - lconfig = vec_igraph1(ivec) + lconfig = vec_igraph(ivec) endif is_LC=.true. maxcolor=0 diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.inc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.inc index 8ddf5bee13..940c25eac0 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.inc @@ -43,5 +43,5 @@ c parameters for sudakovs integer iipdg,iimode common/gamma_args/Q1,iipdg,iimode - integer vec_igraph1(VECSIZE_MEMMAX) - common/vec_igraph/vec_igraph1 + integer vec_igraph(VECSIZE_MEMMAX) + common/vec_igraph/vec_igraph diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile_original.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile_original.mk index 6cb56d0409..348c283be7 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile_original.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/makefile_original.mk @@ -58,10 +58,7 @@ $(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) $(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -libcollier.$(dylibext): - ln -s $(LIBDIR)/collier_lib/libcollier.$(dylibext) || echo 'already done' - -gensym: $(SYMMETRY) configs.inc $(LIBS) libcollier.$(dylibext) +gensym: $(SYMMETRY) configs.inc $(LIBS) $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) $(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/myamp.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/myamp.f index bd02dfe2b4..5360566ef4 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/myamp.f @@ -139,7 +139,7 @@ logical function cut_bw(p) $ gForceBW(i,iconfig).eq.1)) if(onshell)then c Remove on-shell forbidden s-channels (gForceBW=2) (JA 2/10/11) - if(gForceBW(i,iconfig).eq.2.and.sde_strat.eq.1) then + if(gForceBW(i,iconfig).eq.2) then cut_bw = .true. return endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f index 353e025d71..8e4672a421 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f @@ -1416,6 +1416,7 @@ double precision function rewgt(p, ivec) rewgt=1.0d0 clustered=.false. + vec_igraph(ivec) = 0 ! default: no MLM graph selected for this event if(ickkw.le.0.and..not.use_syst) return @@ -1467,6 +1468,7 @@ double precision function rewgt(p, ivec) rewgt = 0d0 return endif + vec_igraph(ivec) = igraphs(1) ! save MLM-matched graph for this event c Store pdf information for systematics studies (initial) @@ -1592,10 +1594,6 @@ double precision function rewgt(p, ivec) c alpha_s weight if(ipdgcl(imocl(n),igraphs(1),iproc).ne.fake_id)then - if (q2now.le.4)then - rewgt=0d0 - return - endif rewgt=rewgt*alphas(alpsfact*sqrt(q2now))/asref c Store information for systematics studies if(use_syst)then @@ -1907,7 +1905,7 @@ subroutine update_scale_coupling_vec(all_p, all_wgt,all_q2fact, VECSIZE_USED) else all_q2fact(1,i) = q2fact(1) all_q2fact(2,i) = q2fact(2) - vec_igraph1(i) = igraphs(1) + vec_igraph(i) = igraphs(1) endif c call save_cl_val_to(i) c endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py index 74f6b04b68..c248436e7f 100755 --- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py @@ -1004,8 +1004,6 @@ def __init__(self, finput=None, **opt): self.comments = {} # comment associated to parameters. can be display via help message # store the valid options for a given parameter. self.allowed_value = {} - # allow nickname for some parameter to avoid integer mapping for some var - self.shortcut_values = {} self.default_setup() @@ -1134,11 +1132,6 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): scan_targettype = self.scan_set[lower_name] del self.scan_set[lower_name] - # check if the user used a shortcut value (which are always str) - if lower_name in self.shortcut_values: - if isinstance(value,str) and value.strip().lower() in self.shortcut_values[lower_name]: - value = self.shortcut_values[lower_name][value.strip().lower()] - # 2. Find the type of the attribute that we want if lower_name in self.list_parameter: targettype = self.list_parameter[lower_name] @@ -1317,8 +1310,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False): def add_param(self, name, value, system=False, comment=False, typelist=None, - allowed=[], - shortcut={}): + allowed=[]): """add a default parameter to the class""" lower_name = name.lower() @@ -1353,11 +1345,6 @@ def add_param(self, name, value, system=False, comment=False, typelist=None, assert val in allowed or '*' in allowed else: assert value in allowed or '*' in allowed - if shortcut: - if allowed and shortcut and '*' not in allowed: - assert all([val in allowed for val in shortcut.values()]), "Some shortcut value are not in the allowed list" - assert all([isinstance(v, str) for v in shortcut.keys()]), "All shortcut values should be str" - self.shortcut_values[lower_name] = shortcut #elif isinstance(value, bool) and allowed != ['*']: # self.allowed_value[name] = [True, False] @@ -4186,10 +4173,8 @@ def default_setup(self): allowed=['partonshower'], comment="list of check that can be bypassed manually.") self.add_param("python_seed", -2, include=False, hidden=True, comment="controlling python seed [handling in particular the final unweighting].\n -1 means use default from random module.\n -2 means set to same value as iseed") self.add_param("lpp1", 1, fortran_name="lpp(1)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='first beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("lpp2", 1, fortran_name="lpp(2)", allowed=[-1,1,0,2,3,9,-2,-3,4,-4], - shortcut={'p':1,"p~":-1,'e-':3,'e+':-3,'mu-':4,'mu+':-4, 'no':0}, comment='second beam energy distribution:\n 0: fixed energy\n 1: PDF of proton\n -1: PDF of antiproton\n 2:elastic photon from proton, +/-3:PDF of electron/positron, +/-4:PDF of muon/antimuon, 9: PLUGIN MODE') self.add_param("ebeam1", 6500.0, fortran_name="ebeam(1)") self.add_param("ebeam2", 6500.0, fortran_name="ebeam(2)") @@ -4198,24 +4183,18 @@ def default_setup(self): self.add_param("polbeam2", 0.0, fortran_name="pb2", hidden=True, comment="Beam polarization from -100 (left-handed) to 100 (right-handed) --use lpp=0 for this parameter--") self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_proton2', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(2)", - shortcut={'lead':82}, comment='For heavy ion physics nb of proton in the ion (used for beam 2 if group_subprocess was False)') self.add_param('nb_neutron1', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(1)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (for both beam but if group_subprocess was False)') self.add_param('nb_neutron2', 0, hidden=True, allowed=[1,0, 126 , '*'],fortran_name="nb_neutron(2)", - shortcut={'lead':126}, comment='For heavy ion physics nb of neutron in the ion (of beam 2 if group_subprocess was False )') self.add_param('mass_ion1', -1.0, hidden=True, fortran_name="mass_ion(1)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 1)') self.add_param('mass_ion2', -1.0, hidden=True, fortran_name="mass_ion(2)", allowed=[-1,0, 0.938, 207.9766521*0.938, 0.000511, 0.105, '*'], - shortcut={'proton':0.938,'lead':207.9766521*0.938,'electron':0.000511,'muon':0.105}, comment='For heavy ion physics mass in GeV of the ion (of beam 2)') valid_pdf = ['lhapdf', 'cteq6_m','cteq6_l', 'cteq6l1','nn23lo', 'nn23lo1', 'nn23nlo','iww','eva','edff','chff','none','mixed']+\ sum(self.allowed_lep_densities.values(),[]) @@ -4228,14 +4207,12 @@ def default_setup(self): self.add_param("fixed_fac_scale1", False, hidden=True) self.add_param("fixed_fac_scale2", False, hidden=True) self.add_param("fixed_extra_scale", False, hidden=True) - self.add_param("scale", 91.1880, shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) - self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2", shortcut={'mz':91.1880, 'mh':125.0, 'mt':173.0, 'mtau':1.77686}) + self.add_param("scale", 91.1880) + self.add_param("dsqrt_q2fact1", 91.1880, fortran_name="sf1") + self.add_param("dsqrt_q2fact2", 91.1880, fortran_name="sf2") self.add_param("mue_ref_fixed", 91.1880, hidden=True) self.add_param("dynamical_scale_choice", -1, comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2\n '4' is the center of mass energy\n'0' allows to use the user_hook definition (need to be defined via custom_fct entry) ", - allowed=[-1,0,1,2,3,4,10], - shortcut={'ckkw':-1,'ht':2,'ht/2':3,'et':1,'shat':4}, - ) + allowed=[-1,0,1,2,3,4,10]) self.add_param("mue_over_ref", 1.0, hidden=True, comment='ratio mu_other/mu for dynamical scale') self.add_param("ievo_eva",0,hidden=True, allowed=[0,1],fortran_name="ievo_eva", comment='eva: 0 for EW pdf muf evolution by q^2; 1 for evo by pT^2') @@ -5598,10 +5575,8 @@ def default_setup(self): self.add_param('niters_fo', 6, include=False) #seed and collider self.add_param('iseed', 0) - self.add_param('lpp1', 1, fortran_name='lpp(1)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) - self.add_param('lpp2', 1, fortran_name='lpp(2)', - shortcut={'p':1, 'p~':-1, 'e-': 3, 'e+':-3, 'mu-':4, 'mu+':-4}) + self.add_param('lpp1', 1, fortran_name='lpp(1)') + self.add_param('lpp2', 1, fortran_name='lpp(2)') self.add_param('ebeam1', 6500.0, fortran_name='ebeam(1)') self.add_param('ebeam2', 6500.0, fortran_name='ebeam(2)') self.add_param('nb_proton1', 1, hidden=True, allowed=[1,0, 82 , '*'],fortran_name="nb_proton(1)", @@ -5644,15 +5619,13 @@ def default_setup(self): self.add_param('fixed_ren_scale', False) self.add_param('fixed_fac_scale', False) self.add_param('fixed_extra_scale', True, hidden=True, system=True) # set system since running from Ellis-Sexton scale not implemented - self.add_param('mur_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mur_ref_fixed', 91.118) self.add_param('muf1_ref_fixed', -1.0, hidden=True) - self.add_param('muf_ref_fixed', 91.118, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('muf_ref_fixed', 91.118) self.add_param('muf2_ref_fixed', -1.0, hidden=True) - self.add_param('mue_ref_fixed', 91.118, hidden=True, shortcut={'mz':91.118, 'mw':80.419, 'mt':172.5, 'mh':125.0}) + self.add_param('mue_ref_fixed', 91.118, hidden=True) self.add_param("dynamical_scale_choice", [-1],fortran_name='dyn_scale', - allowed = [-2,-1,0,1,2,3,10], - shortcut={ 'ht/2':3,'ht':2,'et':1}, - comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") + allowed = [-2,-1,0,1,2,3,10], comment="\'-1\' is based on CKKW back clustering (following feynman diagram).\n \'1\' is the sum of transverse energy.\n '2' is HT (sum of the transverse mass)\n '3' is HT/2, '0' allows to use the user_hook definition (need to be defined via custom_fct entry) ") self.add_param('fixed_qes_scale', False, hidden=True) self.add_param('qes_ref_fixed', -1.0, hidden=True) self.add_param('mur_over_ref', 1.0) diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py index 6f82393c3f..3c5601e27d 100755 --- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py @@ -5205,12 +5205,12 @@ def init_run(self, cards): if self.run_set: self.special_shortcut.update( {'ebeam':([float],['run_card ebeam1 %(0)s', 'run_card ebeam2 %(0)s']), - 'lpp': ([str],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), + 'lpp': ([int],['run_card lpp1 %(0)s', 'run_card lpp2 %(0)s' ]), 'lhc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), 'lep': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'ilc': ([int],['run_card lpp1 0', 'run_card lpp2 0', 'run_card ebeam1 %(0)s/2', 'run_card ebeam2 %(0)s/2']), 'lcc': ([float],['run_card lpp1 1', 'run_card lpp2 1', 'run_card ebeam1 %(0)s*1000/2', 'run_card ebeam2 %(0)s*1000/2']), - 'fixed_scale': ([str],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), + 'fixed_scale': ([float],['run_card fixed_fac_scale T', 'run_card fixed_ren_scale T', 'run_card scale %(0)s', 'run_card dsqrt_q2fact1 %(0)s' ,'run_card dsqrt_q2fact2 %(0)s']), 'no_parton_cut':([],['run_card nocut T']), 'cm_velocity':([float], [lambda self :self.set_CM_velocity]), 'pbp':([],['run_card lpp1 1', 'run_card lpp2 1','run_card nb_proton1 82', 'run_card nb_neutron1 126', 'run_card mass_ion1 195.0820996698','run_card nb_proton2 1', 'run_card nb_neutron2 0', 'run_card mass_ion1 -1']), @@ -5795,8 +5795,6 @@ def complete_set(self, text, line, begidx, endidx, formatting=True): allowed_for_run.remove('*') elif isinstance(self.run_card[args[-1]], bool): allowed_for_run = ['True', 'False'] - if args[-1].lower() in self.run_card.shortcut_values: - allowed_for_run += self.run_card.shortcut_values[args[-1].lower()] opts += [str(i) for i in allowed_for_run] diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py index 262d39a736..3bd0c281fc 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py @@ -38,11 +38,26 @@ def compile(self, *args, **opts): cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py - logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend in ['cpp', 'cppauto']: + backend_log = pjoin(opts["cwd"], ".resolved-backend") + # try to remove old file if present + try: + os.remove(backend_log) + except FileNotFoundError: + pass + misc.compile(["-f", "cudacpp.mk", f"BACKEND=cppauto", f"BACKEND_LOG={backend_log}", "detect-backend"], **opts) + try: + with open(backend_log, "r") as f: + resolved_backend = f.read().strip() + logger.info(f"Backend '{cudacpp_backend}' resolved as '{resolved_backend}'") + cudacpp_backend = resolved_backend + except FileNotFoundError: + raise RuntimeError("Could not resolve cudacpp_backend=cppauto|cpp; ensure Makefile detection runs properly.") + logger.info(f"Building madevent in madevent_interface.py with '{cudacpp_backend}' matrix elements") if cudacpp_backend in cudacpp_supported_backends : args[0][0] = 'madevent_' + cudacpp_backend + '_link' else: - raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) + raise Exception(f"Invalid cudacpp_backend='{cudacpp_backend}': supported backends are [ '" + "', '".join(cudacpp_supported_backends) + "' ]") return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/test/cudacpp_test.mk b/epochX/cudacpp/susy_gg_tt.mad/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/test/cudacpp_test.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 7142d5e27a..602c164e6a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -1,5 +1,5 @@ -WARNING:root:Support for Python3.9 (and below) has been dropped since end of 2025. Please consider update your version of Python. Continue at your own risk  Running MG5 in debug mode +('WARNING: loading of madgraph too slow!!!', 0.5225210189819336) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -16,7 +16,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-14-g6dba8f068 3.7.1 * +* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -29,6 +29,7 @@ It has been validated for the last time with version: 3.6.5 * Type 'tutorial MadLoop' to learn how MadLoop works * * * ************************************************************ +load MG5 configuration from /home/dmass/.mg5/mg5_configuration.txt load MG5 configuration from input/mg5_configuration.txt fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). @@ -38,15 +39,16 @@ eMELA-config does not seem to correspond to a valid eMELA-config executable. Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). MG5_aMC> set eMELA /PATH/TO/eMELA-config +set ninja to /home/dmass/Apps/HEPTools/lib +set collier to /home/dmass/Apps/HEPTools/lib lhapdf-config does not seem to correspond to a valid lhapdf-config executable. Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt Using default gzip "pigz". Set another one in ./input/mg5_configuration.txt -import /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg +import /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -54,7 +56,7 @@ set zerowidth_tchannel F import model MSSM_SLHA2 INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.4310164451599121  +DEBUG: model prefixing takes 0.5660371780395508  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -550,13 +552,13 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.054 s +1 processes with 3 diagrams generated in 0.120 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 175]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 180]  -INFO: Creating subdirectories in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt +INFO: Creating subdirectories in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -565,30 +567,30 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(fortran_model)= [output.py at line 224]  DEBUG: type(me)= me=0 [output.py at line 225]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'color_sum.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'cudacpp_overlay.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 226]  -INFO: Creating files in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.004 s +INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. +Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.082 s +ALOHA: aloha creates 2 routines in 0.127 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h -FileWriter for /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h +FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /shared/git/madgraph4gpu/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. +INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.103s -user 0m1.223s -sys 0m0.178s -Code generation completed in 2 seconds +real 0m3.115s +user 0m2.811s +sys 0m0.239s +Code generation completed in 4 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h index 4e3f17e0dd..9cdf2f90d1 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h @@ -125,7 +125,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #else /** * Sequence to be executed for the vectorized CPU matrix element calculation @@ -143,7 +143,7 @@ namespace mg5amcCpu * @param selcol the pointer to the output selected colors * @param goodHelOnly quit after computing good helicities? */ - void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); + void cpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool goodHelOnly = false ); #endif // Return the number of good helicities (-1 initially when they have not yet @@ -343,6 +343,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -394,6 +395,7 @@ paramCard; #endif "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); #ifdef MGONGPUCPP_VERBOSE @@ -423,6 +425,7 @@ paramCard; #endif const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -454,6 +457,7 @@ paramCard; #endif "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); } if( goodHelOnly ) return; + m_pmek->setigraph( igraph ); m_pmek->computeMatrixElements( useChannelIds ); #ifdef MGONGPUCPP_VERBOSE flagAbnormalMEs( m_hstMEs.data(), m_nevt ); diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/BridgeKernels.cc index 62e2c3af96..2d46db185e 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/BridgeKernels.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/BridgeKernels.cc @@ -80,7 +80,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -90,7 +90,7 @@ namespace mg5amcCpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- @@ -139,7 +139,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = true; constexpr unsigned int* pChannelIds = nullptr; // disable multi-channel for helicity filtering - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, nullptr, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); return m_bridge.nGoodHel(); } @@ -149,7 +149,7 @@ namespace mg5amcGpu { constexpr bool goodHelOnly = false; const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.cc index b61df224f1..1e7dcf38fe 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.cc @@ -220,7 +220,7 @@ namespace mg5amcCpu computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_numerators.data(), m_denominators.data(), nevt() ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), nevt() ); @@ -504,7 +504,7 @@ namespace mg5amcGpu #endif #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const unsigned int* pChannelIds = ( useChannelIds ? m_channelIds.data() : nullptr ); - sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), pChannelIds, m_igraph, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), m_colJamp2s.data(), m_pHelNumerators->data(), m_pHelDenominators->data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); #else assert( useChannelIds == false ); sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_matrixElements.data(), m_selhel.data(), m_pHelMEs->data(), m_pHelJamps->data(), ghelAllBlasTmp, pBlasHandle, m_helStreams, m_gpublocks, m_gputhreads ); diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.h index 16f8874888..9382732d9f 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/MatrixElementKernels.h @@ -46,6 +46,9 @@ namespace mg5amcCpu // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) virtual int computeGoodHelicities() = 0; + // Set the per-event MLM graph array (nullptr = no MLM matching; must be called before computeMatrixElements if needed) + void setigraph( const int* igraph ) { m_igraph = igraph; } + // Compute matrix elements virtual void computeMatrixElements( const bool useChannelIds ) = 0; @@ -84,6 +87,9 @@ namespace mg5amcCpu // The buffer for the channel ids for single-diagram enhancement const BufferChannelIds& m_channelIds; + // The per-event MLM graph array (nullptr = no MLM; set via setigraph before computeMatrixElements) + const int* m_igraph = nullptr; + // The buffer for the output matrix elements BufferMatrixElements& m_matrixElements; diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc index e0e3bfd321..838c9595bc 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc @@ -936,38 +936,50 @@ namespace mg5amcCpu select_col( int* allselcol, // output: color selection[nevt] const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable SDE enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM const fptype_sv* allJamp2s, // input: jamp2[ncolor][nevt] for color choice (nullptr if disabled) const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) { const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) // SCALAR channelId for the current event (CUDA) unsigned int channelId = gpu_channelId( allChannelIds ); + // Per-event MLM graph (0 = no MLM) + const int igraph = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + if( channelId != 0 || igraph != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } // Determine the jamp2 for this event (TEMPORARY? could do this with a dedicated memory accessor instead...) fptype_sv jamp2_sv[ncolor] = { 0 }; assert( allJamp2s != nullptr ); // sanity check using J2_ACCESS = DeviceAccessJamp2; for( int icolC = 0; icolC < ncolor; icolC++ ) jamp2_sv[icolC] = J2_ACCESS::kernelAccessIcolConst( allJamp2s, icolC ); - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -993,7 +1005,7 @@ namespace mg5amcCpu } else { - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) } return; } @@ -1128,7 +1140,7 @@ namespace mg5amcCpu gpuLaunchKernel( add_and_select_hel, gpublocks, gputhreads, allselhel, allrndhel, ghelAllMEs, allMEs, gpublocks * gputhreads ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Event-by-event random choice of color #402 - gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, colAllJamp2s, gpublocks * gputhreads ); + gpuLaunchKernel( select_col, gpublocks, gputhreads, allselcol, allrndcol, allChannelIds, allIgraph, colAllJamp2s, gpublocks * gputhreads ); #endif // *** END OF PART 1a - CUDA (one event per GPU thread) *** @@ -1152,7 +1164,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, mgOnGpu::icolamp, mgOnGpu::channel2iconfig +#define _OMPLIST1 , allDenominators, allNumerators, allChannelIds, allIgraph, mgOnGpu::icolamp, mgOnGpu::channel2iconfig #else #define _OMPLIST1 #endif @@ -1264,25 +1276,36 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph + if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) - { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 - } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - const int iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_page != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } fptype_sv targetamp[ncolor] = { 0 }; // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] @@ -1345,7 +1368,7 @@ namespace mg5amcCpu for( int ieppV = 0; ieppV < neppV; ++ieppV ) { const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.h index 732f9919c9..58e1bfe668 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.h @@ -163,6 +163,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] @@ -187,6 +188,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index f5bf67efbc..7969c42777 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s) # Detect architecture (x86_64, ppc64le...) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +UNAME_M := $(shell uname -m) #------------------------------------------------------------------------------- @@ -57,10 +58,11 @@ endif #=== Redefine BACKEND if the current value is 'cppauto' # Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available) +BACKEND_ORIG := $(BACKEND) ifeq ($(BACKEND),cppauto) ifeq ($(UNAME_P),ppc64le) override BACKEND = cppsse4 - else ifneq (,$(filter $(UNAME_P),arm aarch64)) + else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) override BACKEND = cppsse4 else ifeq ($(wildcard /proc/cpuinfo),) override BACKEND = cppnone @@ -84,6 +86,11 @@ else $(info BACKEND='$(BACKEND)') endif +# Create file with the resolved backend in case user chooses 'cppauto' +BACKEND_LOG ?= .resolved-backend +ifneq ($(BACKEND_ORIG),$(BACKEND)) + $(file >$(BACKEND_LOG),$(BACKEND)) +endif #------------------------------------------------------------------------------- #=== Configure the C++ compiler @@ -184,15 +191,32 @@ ifeq ($(BACKEND),cuda) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. + # Default: detect all compute capability (e.g., "8.0", "8.6", "9.0"), unique and sorted from lowest to higherst + # then we embed device code for each compute capability, and for the highest PTX (forward-compatible) + # use nvidia-smi and validate output with grep before going forward + DETECTED_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+\.[0-9]+$$' | tr -d '.' | sort -un) # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + MADGRAPH_CUDA_ARCHITECTURE ?= $(foreach arch,$(DETECTED_CC),$(arch)$(comma)) + # Convert to space-separated list for looping + MADGRAPH_CUDA_ARCH_LIST ?= $(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)) + + # Fallback if detection failed (box has CUDA selected but probe failed) + ifeq ($(strip $(MADGRAPH_CUDA_ARCH_LIST)),) + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster) + # This will embed device code for 70, and PTX for 70+ + MADGRAPH_CUDA_ARCHITECTURE := 70 + MADGRAPH_CUDA_ARCH_LIST := 70 + $(info Automatic compute capability detection failed; defaulting to $(MADGRAPH_CUDA_ARCHITECTURE)) + $(info Override with: make MADGRAPH_CUDA_ARCHITECTURE=) + endif + + # Build for every detected SM, and add one PTX for the highest SM (forward-compatibility) + HIGHEST_SM := $(lastword $(MADGRAPH_CUDA_ARCH_LIST)) + GENCODE_FLAGS := $(foreach arch,$(MADGRAPH_CUDA_ARCH_LIST),-gencode arch=compute_$(arch),code=sm_$(arch)) + GENCODE_PTX := -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) + GPUARCHFLAGS := $(GENCODE_FLAGS) $(GENCODE_PTX) GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags @@ -531,7 +555,7 @@ ifeq ($(UNAME_P),ppc64le) else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif -else ifeq ($(UNAME_P),arm) # ARM on Apple silicon +else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON override AVXFLAGS = -DMGONGPU_NOARMNEON else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon @@ -543,7 +567,7 @@ else ifeq ($(UNAME_P),arm) # ARM on Apple silicon else ifeq ($(BACKEND),cpp512z) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif -else ifeq ($(UNAME_P),aarch64) # ARM on Linux +else ifeq ($(UNAME_M),aarch64) # ARM on Linux ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent override AVXFLAGS = -march=armv8-a+nosimd else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers) @@ -1111,7 +1135,7 @@ bld512z: ifeq ($(UNAME_P),ppc64le) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) ###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavxs: bldnone bldsse4 else @@ -1254,4 +1278,9 @@ endif cuda-memcheck: all.$(TAG) $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +# Detect backend (to be used in case of 'cppauto' to give info to the user) +.PHONY: detect-backend +detect-backend: + @echo "Resolved backend has already been written to $(BACKEND_LOG) at parse time." + #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp_overlay.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp_overlay.mk index d2c3b0c747..b9d17f0e38 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp_overlay.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp_overlay.mk @@ -16,6 +16,7 @@ endif # Basic uname helpers (if not already set) UNAME_S ?= $(shell uname -s) UNAME_P ?= $(shell uname -p) +UNAME_M ?= $(shell uname -m) # Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html FFLAGS+= -cpp @@ -225,7 +226,7 @@ madevent_%_link: # Cudacpp bldall targets ifeq ($(UNAME_P),ppc64le) bldavxs: bldnone bldsse4 -else ifneq (,$(filter $(UNAME_P),arm aarch64)) +else ifneq (,$(filter $(UNAME_M),arm64 aarch64)) bldavxs: bldnone bldsse4 else bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc index 8b3f302975..fea35823f5 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc @@ -91,6 +91,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -102,11 +103,11 @@ extern "C" #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, igraph, mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -129,13 +130,14 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, const bool* pgoodHelOnly ) { //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, igraph, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.h index 7d5014a138..b3667b03fe 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.h @@ -29,6 +29,7 @@ extern "C" const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, const unsigned int* channelIds, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -39,6 +40,7 @@ extern "C" const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, const FORTRANFPTYPE* rndcol, + const int* igraph, FORTRANFPTYPE* mes, int* selhel, int* selcol, @@ -46,4 +48,4 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc index 5708dca15c..590063408a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc @@ -37,6 +37,7 @@ C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection C - CHANID: the input array of channels (Feynman diagrams) to enhance +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -44,13 +45,15 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, CHANID, IGRAPH, MES, SELHEL, SELCOL, + & HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) INTEGER*4 CHANID(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) @@ -65,6 +68,7 @@ C - MOMENTA: the input 4-momenta Fortran array C - GS: the input Gs (running QCD coupling constant alphas) Fortran array C - RNDHEL: the input random number Fortran array for helicity selection C - RNDCOL: the input random number Fortran array for color selection +C - IGRAPH: the input per-event MLM graph array (0 = no MLM graph) C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array @@ -72,12 +76,13 @@ C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) DOUBLE PRECISION RNDHEL(*) DOUBLE PRECISION RNDCOL(*) + INTEGER*4 IGRAPH(*) DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h index 9f3533a875..73719032b3 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h @@ -54,7 +54,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR #else - typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ), aligned( neppV * sizeof( fptype ) ) ) ); // RRRR #endif // Mixed fptypes #537: float for color algebra and double elsewhere @@ -65,7 +65,7 @@ namespace mg5amcCpu #ifdef __clang__ typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR #else - typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ), aligned( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR #endif #else typedef fptype_v fptype2_v; @@ -123,14 +123,14 @@ namespace mg5amcCpu #if defined MGONGPU_FPTYPE_DOUBLE typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb #endif #else // gcc - typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ) ) ); + typedef unsigned int uint_v __attribute__( ( vector_size( neppV * sizeof( unsigned int ) ), aligned( neppV * sizeof( unsigned int ) ) ) ); #if defined MGONGPU_FPTYPE_DOUBLE - typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ), aligned( neppV * sizeof( long int ) ) ) ); // bbbb #elif defined MGONGPU_FPTYPE_FLOAT - typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ), aligned( neppV * sizeof( int ) ) ) ); // bbbb #endif #endif diff --git a/epochX/cudacpp/susy_gg_tt.sa/test/cudacpp_test.mk b/epochX/cudacpp/susy_gg_tt.sa/test/cudacpp_test.mk index 977c75fc48..73dce678ef 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/test/cudacpp_test.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/test/cudacpp_test.mk @@ -8,11 +8,12 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) # Host detection UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) # Only add AVX2/FMA on non-mac and non-ARM hosts ifeq ($(UNAME_S),Darwin) GTEST_CMAKE_FLAGS := -else ifeq ($(UNAME_P),aarch64) +else ifeq ($(UNAME_M),aarch64) GTEST_CMAKE_FLAGS := else GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma" From b09f7c7525dafc2f928391842132dfbb476fcb5c Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 18:13:48 +0200 Subject: [PATCH 05/17] Fix missing definition of allIgraph --- .../iolibs/template_files/gpu/process_function_definitions.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index 6140ddc4b0..cd73d52ed3 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -677,6 +677,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] From 2f591c7731075527b4ce4849e67adb7a10a0f6b0 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 18:15:27 +0200 Subject: [PATCH 06/17] Update fcheck_sa with igraph for MLM --- .../madgraph/iolibs/template_files/gpu/fcheck_sa.f | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f index fb942500a5..4ff41257c3 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, From 83730469e2c3b277bdd3b2013f5f319404ff13b1 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 18:40:17 +0200 Subject: [PATCH 07/17] Regenerate processes --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 21 +++---- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 1 + .../SubProcesses/P1_epem_mupmum/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 17 +++-- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 1 + .../P1_Sigma_sm_epem_mupmum/fcheck_sa.f | 4 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 21 +++---- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttx/fcheck_sa.f | 4 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 17 +++-- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 1 + .../P1_Sigma_sm_gg_ttx/fcheck_sa.f | 4 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 +++---- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttx/fcheck_sa.f | 4 +- .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 1 + .../SubProcesses/P2_gg_ttxg/fcheck_sa.f | 4 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++--- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttxg/fcheck_sa.f | 4 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 16 ++--- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 1 + .../P1_Sigma_sm_gg_ttxg/fcheck_sa.f | 4 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++--- .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttxgg/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++--- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 1 + .../P1_Sigma_sm_gg_ttxgg/fcheck_sa.f | 4 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 +++---- .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttxggg/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 ++--- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 1 + .../P1_Sigma_sm_gg_ttxggg/fcheck_sa.f | 4 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +++--- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 1 + .../SubProcesses/P1_gu_ttxu/fcheck_sa.f | 4 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 1 + .../SubProcesses/P1_gux_ttxux/fcheck_sa.f | 4 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 16 ++--- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 1 + .../P1_Sigma_sm_gu_ttxu/fcheck_sa.f | 4 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 1 + .../P1_Sigma_sm_gux_ttxux/fcheck_sa.f | 4 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 19 +++--- .../SubProcesses/P1_gg_bbx/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_bbx/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 62 +++---------------- .../P1_Sigma_heft_gg_bbx/CPPProcess.cc | 1 + .../P1_Sigma_heft_gg_bbx/fcheck_sa.f | 4 +- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 22 +++---- .../SubProcesses/P0_dux_ttxwm/CPPProcess.cc | 1 + .../SubProcesses/P0_dux_ttxwm/fcheck_sa.f | 4 +- .../SubProcesses/P0_udx_ttxwp/CPPProcess.cc | 1 + .../SubProcesses/P0_udx_ttxwp/fcheck_sa.f | 4 +- .../SubProcesses/P1_dux_ttxwmg/CPPProcess.cc | 1 + .../SubProcesses/P1_dux_ttxwmg/fcheck_sa.f | 4 +- .../SubProcesses/P1_gd_ttxwmu/CPPProcess.cc | 1 + .../SubProcesses/P1_gd_ttxwmu/fcheck_sa.f | 4 +- .../SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc | 1 + .../SubProcesses/P1_gdx_ttxwpux/fcheck_sa.f | 4 +- .../SubProcesses/P1_gu_ttxwpd/CPPProcess.cc | 1 + .../SubProcesses/P1_gu_ttxwpd/fcheck_sa.f | 4 +- .../SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc | 1 + .../SubProcesses/P1_gux_ttxwmdx/fcheck_sa.f | 4 +- .../SubProcesses/P1_udx_ttxwpg/CPPProcess.cc | 1 + .../SubProcesses/P1_udx_ttxwpg/fcheck_sa.f | 4 +- .../CODEGEN_mad_pp_tt012j_log.txt | 25 ++++---- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 1 + .../SubProcesses/P0_gg_ttx/fcheck_sa.f | 4 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 1 + .../SubProcesses/P0_uux_ttx/fcheck_sa.f | 4 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttxg/fcheck_sa.f | 4 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 1 + .../SubProcesses/P1_gu_ttxu/fcheck_sa.f | 4 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 1 + .../SubProcesses/P1_gux_ttxux/fcheck_sa.f | 4 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 1 + .../SubProcesses/P1_uux_ttxg/fcheck_sa.f | 4 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 1 + .../SubProcesses/P2_gg_ttxgg/fcheck_sa.f | 4 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 1 + .../SubProcesses/P2_gg_ttxuux/fcheck_sa.f | 4 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 1 + .../SubProcesses/P2_gu_ttxgu/fcheck_sa.f | 4 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 1 + .../SubProcesses/P2_gux_ttxgux/fcheck_sa.f | 4 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 1 + .../SubProcesses/P2_uc_ttxuc/fcheck_sa.f | 4 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 1 + .../SubProcesses/P2_ucx_ttxucx/fcheck_sa.f | 4 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 1 + .../SubProcesses/P2_uu_ttxuu/fcheck_sa.f | 4 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 1 + .../SubProcesses/P2_uux_ttxccx/fcheck_sa.f | 4 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 1 + .../SubProcesses/P2_uux_ttxgg/fcheck_sa.f | 4 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 1 + .../SubProcesses/P2_uux_ttxuux/fcheck_sa.f | 4 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 1 + .../SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f | 4 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 1 + .../SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f | 4 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 +++--- .../SubProcesses/P1_gg_ttxttx/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttxttx/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 54 +++------------- .../CPPProcess.cc | 1 + .../fcheck_sa.f | 4 +- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 +++--- .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_t1t1x/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 14 ++--- .../CPPProcess.cc | 1 + .../P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f | 4 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 18 +++--- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 1 + .../SubProcesses/P1_gg_ttx/fcheck_sa.f | 4 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 18 +++--- .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc | 1 + .../P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f | 4 +- 123 files changed, 409 insertions(+), 355 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 3ce3428de8..b131fec83c 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.5544004440307617) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -57,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.008105278015136719  +DEBUG: model prefixing takes 0.0036313533782958984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.014 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,19 +177,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (2 diagrams) in 0.009 s -Wrote files for 8 helas calls in 0.185 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Wrote files for 8 helas calls in 0.063 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.353 s +ALOHA: aloha creates 3 routines in 0.177 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.374 s +ALOHA: aloha creates 7 routines in 0.232 s FFV1 FFV1 FFV2 @@ -221,10 +220,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.692s -user 0m4.638s -sys 0m0.843s -Code generation completed in 6 seconds +real 0m3.100s +user 0m2.713s +sys 0m0.346s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 4a7ad99d1d..cefddd36a5 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -1012,6 +1012,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index cea3cd6aff..4d2c6c11cb 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.5546493530273438) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -57,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.010983943939208984  +DEBUG: model prefixing takes 0.002979755401611328  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.009 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,13 +167,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.399 s +ALOHA: aloha creates 4 routines in 0.172 s FFV1 FFV1 FFV2 @@ -193,7 +192,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m1.581s -user 0m1.308s -sys 0m0.223s -Code generation completed in 2 seconds +real 0m0.575s +user 0m0.501s +sys 0m0.062s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 358c2d341e..3bd1fe8442 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -1010,6 +1010,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index f64815a8e5..63a2f7c315 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.5098216533660889) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -57,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.007402896881103516  +DEBUG: model prefixing takes 0.002977609634399414  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.018 s +1 processes with 3 diagrams generated in 0.006 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -179,16 +178,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.012 s -Wrote files for 10 helas calls in 0.199 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Wrote files for 10 helas calls in 0.056 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.264 s +ALOHA: aloha creates 2 routines in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.250 s +ALOHA: aloha creates 4 routines in 0.085 s VVV1 FFV1 FFV1 @@ -215,10 +214,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.071s -user 0m4.130s -sys 0m0.804s -Code generation completed in 5 seconds +real 0m1.915s +user 0m1.613s +sys 0m0.285s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 8a87b6ffd2..528c019861 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1025,6 +1025,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 6f8f43751d..7ec072c7f6 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.5027971267700195) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -57,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.010428905487060547  +DEBUG: model prefixing takes 0.0032498836517333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.029 s +1 processes with 3 diagrams generated in 0.006 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,11 +168,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.021 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.362 s +ALOHA: aloha creates 2 routines in 0.096 s VVV1 FFV1 FFV1 @@ -188,7 +187,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m1.704s -user 0m1.443s -sys 0m0.212s -Code generation completed in 2 seconds +real 0m0.525s +user 0m0.446s +sys 0m0.073s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 2637b5b7b3..c9237a883f 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -1022,6 +1022,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 9923650dee..9f11804fe2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005435466766357422  +DEBUG: model prefixing takes 0.003109455108642578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.013 s +1 processes with 3 diagrams generated in 0.006 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -157,7 +157,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.031 s +1 processes with 16 diagrams generated in 0.014 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -199,22 +199,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.077 s -Wrote files for 46 helas calls in 0.331 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.034 s +Wrote files for 46 helas calls in 0.173 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.440 s +ALOHA: aloha creates 5 routines in 0.213 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.368 s +ALOHA: aloha creates 10 routines in 0.176 s VVV1 VVV1 FFV1 @@ -246,10 +246,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.708s -user 0m3.946s -sys 0m0.658s -Code generation completed in 5 seconds +real 0m2.611s +user 0m2.233s +sys 0m0.354s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 8a87b6ffd2..528c019861 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1025,6 +1025,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index 10c348765c..5e0525f85b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -1242,6 +1242,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 3ba90dabe6..6be8b49601 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006871461868286133  +DEBUG: model prefixing takes 0.0029730796813964844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.037 s +1 processes with 16 diagrams generated in 0.016 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.074 s -Wrote files for 36 helas calls in 0.281 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.093 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.501 s +ALOHA: aloha creates 5 routines in 0.184 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.441 s +ALOHA: aloha creates 10 routines in 0.170 s VVV1 VVV1 FFV1 @@ -225,10 +225,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.750s -user 0m4.686s -sys 0m0.918s -Code generation completed in 6 seconds +real 0m2.289s +user 0m1.950s +sys 0m0.319s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 2bcaa70441..9345bb5600 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1242,6 +1242,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 4be597d33d..cc49ef69ca 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00689697265625  +DEBUG: model prefixing takes 0.005289793014526367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.049 s +1 processes with 16 diagrams generated in 0.020 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.066 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.028 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.484 s +ALOHA: aloha creates 5 routines in 0.184 s VVV1 VVV1 FFV1 @@ -195,7 +195,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m1.660s -user 0m1.379s -sys 0m0.231s -Code generation completed in 2 seconds +real 0m0.728s +user 0m0.647s +sys 0m0.073s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 028e8696cc..0861f53607 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -1236,6 +1236,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 28fded2fac..a4b5a8f891 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.009834766387939453  +DEBUG: model prefixing takes 0.003253459930419922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.348 s +1 processes with 123 diagrams generated in 0.127 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.955 s -Wrote files for 222 helas calls in 1.123 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.369 s +Wrote files for 222 helas calls in 0.428 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.532 s +ALOHA: aloha creates 5 routines in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.568 s +ALOHA: aloha creates 10 routines in 0.196 s VVV1 VVV1 FFV1 @@ -228,10 +228,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.954s -user 0m7.850s -sys 0m0.859s -Code generation completed in 9 seconds +real 0m3.463s +user 0m3.149s +sys 0m0.290s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 1e7036acd0..07c5e96e24 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -3170,6 +3170,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 993b7c15cb..da2293ff67 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.008510351181030273  +DEBUG: model prefixing takes 0.0031633377075195312  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.349 s +1 processes with 123 diagrams generated in 0.115 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.757 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.336 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.622 s +ALOHA: aloha creates 5 routines in 0.169 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m3.239s -user 0m2.961s -sys 0m0.180s -Code generation completed in 3 seconds +real 0m1.154s +user 0m1.088s +sys 0m0.058s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index e1e3f4b970..cb673ba887 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -3227,6 +3227,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 59080caa88..471639e441 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.009574413299560547  +DEBUG: model prefixing takes 0.003108978271484375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 4.089 s +1 processes with 1240 diagrams generated in 1.607 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,7 +169,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 16s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 5s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h @@ -180,22 +180,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 17.014 s -Wrote files for 2281 helas calls in 38.104 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.210 s +Wrote files for 2281 helas calls in 12.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.672 s +ALOHA: aloha creates 5 routines in 0.274 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.557 s +ALOHA: aloha creates 10 routines in 0.172 s VVV1 VVV1 FFV1 @@ -230,10 +230,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 1m14.494s -user 1m10.674s -sys 0m2.006s -Code generation completed in 74 seconds +real 0m24.610s +user 0m24.002s +sys 0m0.474s +Code generation completed in 25 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index 764a22ed2f..5c211958ad 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -30741,6 +30741,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f index 870c890410..5ae4792dfa 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index e3ac0cd576..f6b6aaf821 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00561833381652832  +DEBUG: model prefixing takes 0.002972126007080078  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 4.382 s +1 processes with 1240 diagrams generated in 1.566 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 14.358 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.701 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.888 s +ALOHA: aloha creates 5 routines in 0.223 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m31.360s -user 0m29.999s -sys 0m0.614s -Code generation completed in 31 seconds +real 0m10.443s +user 0m10.251s +sys 0m0.137s +Code generation completed in 11 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index eefc81a09f..379a3750aa 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -32631,6 +32631,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f index 870c890410..5ae4792dfa 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 1f2a00cc6e..d25f2a033e 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005351066589355469  +DEBUG: model prefixing takes 0.00315093994140625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.229 s +8 processes with 40 diagrams generated in 0.087 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -212,16 +212,16 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.067 s -Wrote files for 32 helas calls in 0.549 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.056 s +Wrote files for 32 helas calls in 0.326 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.278 s +ALOHA: aloha creates 2 routines in 0.179 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.292 s +ALOHA: aloha creates 4 routines in 0.093 s FFV1 FFV1 FFV1 @@ -249,10 +249,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m6.914s -user 0m5.669s -sys 0m1.040s -Code generation completed in 7 seconds +real 0m2.871s +user 0m2.409s +sys 0m0.431s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 7382e1b70e..158152f03a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -1080,6 +1080,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index c62d49022d..7282662b6e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -1080,6 +1080,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index db06660f7c..6181a90d99 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.009302139282226562  +DEBUG: model prefixing takes 0.003228902816772461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.148 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -200,11 +200,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.065 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.039 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.226 s +ALOHA: aloha creates 2 routines in 0.160 s FFV1 FFV1 FFV1 @@ -220,7 +220,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m1.590s -user 0m1.312s -sys 0m0.233s -Code generation completed in 1 seconds +real 0m0.725s +user 0m0.646s +sys 0m0.070s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 95bddd1643..a677127357 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -1075,6 +1075,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index 3a5fa5afe7..d22359741c 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -1075,6 +1075,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index a5473b9464..e1a9cda4cc 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.605353593826294) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -122,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.019 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -151,20 +150,20 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (4 diagrams) in 0.021 s -Wrote files for 12 helas calls in 0.277 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s +Wrote files for 12 helas calls in 0.059 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.605 s +ALOHA: aloha creates 4 routines in 0.157 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.446 s +ALOHA: aloha creates 8 routines in 0.159 s VVS3 VVV1 FFV1 @@ -193,10 +192,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m6.546s -user 0m5.426s -sys 0m0.926s -Code generation completed in 7 seconds +real 0m2.061s +user 0m1.749s +sys 0m0.292s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc index dd67bb37d5..b7c2f8f53a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc @@ -1039,6 +1039,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 905af14e1e..9f9e24bb5b 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -56,54 +56,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft -INFO: download model from http://madgraph.phys.ucl.ac.be/Downloads/models/heft.tgz to the following directory: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models  ---2026-04-14 11:59:48-- http://madgraph.phys.ucl.ac.be/Downloads/models/heft.tgz -Resolving madgraph.phys.ucl.ac.be (madgraph.phys.ucl.ac.be)... 130.104.2.143 -Connecting to madgraph.phys.ucl.ac.be (madgraph.phys.ucl.ac.be)|130.104.2.143|:80... connected. -HTTP request sent, awaiting response... 200 OK -Length: 50876 (50K) [application/x-gzip] -Saving to: ‘tmp.tgz’ - - 0K .......... .......... .......... .......... ......... 100% 911K=0.05s - -2026-04-14 11:59:49 (911 KB/s) - ‘tmp.tgz’ saved [50876/50876] - -heft/ -heft/write_param_card.py -heft/restrict_ckm.dat -heft/couplings.py -heft/HEFT_UFO.log -heft/lorentz.py -heft/__init__.py -heft/__pycache__/ -heft/particles.py -heft/object_library.py -heft/restrict_default.dat -heft/restrict_zeromass_ckm.dat -heft/restrict_no_b_mass.dat -heft/function_library.py -heft/parameters.py -heft/py3_model.pkl -heft/coupling_orders.py -heft/restrict_no_tau_mass.dat -heft/vertices.py -heft/restrict_no_masses.dat -heft/__pycache__/write_param_card.cpython-311.pyc -heft/__pycache__/parameters.cpython-311.pyc -heft/__pycache__/function_library.cpython-311.pyc -heft/__pycache__/coupling_orders.cpython-311.pyc -heft/__pycache__/object_library.cpython-311.pyc -heft/__pycache__/couplings.cpython-311.pyc -heft/__pycache__/particles.cpython-311.pyc -heft/__pycache__/vertices.cpython-311.pyc -heft/__pycache__/lorentz.cpython-311.pyc -heft/__pycache__/__init__.cpython-311.pyc -INFO: reload from .py file -INFO: load particles -INFO: load vertices -WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.011363983154296875  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -169,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.019 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -188,13 +140,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. -Generated helas calls for 1 subprocesses (4 diagrams) in 0.027 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.482 s +ALOHA: aloha creates 4 routines in 0.159 s VVS3 VVV1 FFV1 @@ -211,7 +163,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m2.111s -user 0m1.591s -sys 0m0.266s -Code generation completed in 2 seconds +real 0m0.550s +user 0m0.471s +sys 0m0.069s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc index b6fdebd1f6..53350bc7d1 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc @@ -1035,6 +1035,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 56b44455c8..5bd319bfaa 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -55,7 +55,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005021333694458008  +DEBUG: model prefixing takes 0.004093170166015625  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -179,7 +179,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.179 s +4 processes with 8 diagrams generated in 0.097 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -221,7 +221,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.940 s +12 processes with 144 diagrams generated in 0.511 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -349,18 +349,18 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.289 s -Wrote files for 212 helas calls in 1.169 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.146 s +Wrote files for 212 helas calls in 0.739 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.239 s +ALOHA: aloha creates 3 routines in 0.139 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.215 s +ALOHA: aloha creates 6 routines in 0.116 s FFV1 FFV1 FFV1 @@ -390,10 +390,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.199s -user 0m6.827s -sys 0m1.178s -Code generation completed in 9 seconds +real 0m4.792s +user 0m4.111s +sys 0m0.630s +Code generation completed in 5 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc index 90d559ebf6..8a74a16222 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc @@ -1052,6 +1052,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc index d0667f1e25..5aea836d76 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc @@ -1052,6 +1052,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc index 64e5979e50..ff7c5406b9 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc index a9287ad910..8fb1cc30ca 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc index b4f8ffa4a6..4105b65b3c 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc index 002d741dcd..23e12cdaed 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc index 9e0ef62036..72257f5a04 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc index ed88a0e2d5..3991dd624e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc @@ -1248,6 +1248,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/fcheck_sa.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/fcheck_sa.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 1fbe2eda67..ff8a2b2d8b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.503042459487915) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -56,7 +55,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.013706207275390625  +DEBUG: model prefixing takes 0.003739595413208008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -167,7 +166,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.092 s +5 processes with 7 diagrams generated in 0.030 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -207,7 +206,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.361 s +13 processes with 76 diagrams generated in 0.114 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -373,7 +372,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 4.672 s +65 processes with 1119 diagrams generated in 1.520 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -688,22 +687,22 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1749]  -Generated helas calls for 18 subprocesses (372 diagrams) in 3.092 s -Wrote files for 810 helas calls in 6.720 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.059 s +Wrote files for 810 helas calls in 2.376 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.562 s +ALOHA: aloha creates 5 routines in 0.177 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.514 s +ALOHA: aloha creates 10 routines in 0.219 s VVV1 VVV1 FFV1 @@ -738,10 +737,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.347s -user 0m24.340s -sys 0m3.113s -Code generation completed in 29 seconds +real 0m10.131s +user 0m8.972s +sys 0m1.051s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 1e995ee72f..e6f7d28e97 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -1025,6 +1025,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 9ac78276aa..08c8080553 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -1002,6 +1002,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 2bcaa70441..9345bb5600 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1242,6 +1242,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 023fd2fa7c..2bfaa38568 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -1080,6 +1080,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index bfec4fe0e9..fdfedd86b1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -1080,6 +1080,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 9394f70f59..21ca5edc9c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -1080,6 +1080,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f index cb7efdfbcf..70c3d08b67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index e91e521f1d..876e7914c5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -3170,6 +3170,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 9d223187ec..e6e6173b55 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -1577,6 +1577,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 3fd3c8eab6..5b085b0f17 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1577,6 +1577,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index cf638e9285..57480126ae 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1577,6 +1577,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 56579cfd80..4f022cc1ac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -1158,6 +1158,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 18bed4d243..3b29d7716b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -1164,6 +1164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index 2b1c5591fb..e16f764860 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -1268,6 +1268,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 4db27a524d..9cce6c958c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -1164,6 +1164,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 84d0fbbe9d..11f17169f6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1577,6 +1577,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index db73eca9ba..1071853dfa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1268,6 +1268,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index f8f19a9615..73a7b4a37d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -1158,6 +1158,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 20403f0c60..2ac5935d6a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -1268,6 +1268,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 67638a92a6..b1d29b1224 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.0938117504119873  +DEBUG: model prefixing takes 0.05240178108215332  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 4.342 s +1 processes with 72 diagrams generated in 2.804 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -115,22 +115,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s -Wrote files for 119 helas calls in 0.432 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.130 s +Wrote files for 119 helas calls in 0.297 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.297 s +ALOHA: aloha creates 5 routines in 0.203 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.304 s +ALOHA: aloha creates 10 routines in 0.212 s VVV5 VVV5 FFV1 @@ -162,10 +162,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.249s -user 0m8.497s -sys 0m0.583s -Code generation completed in 9 seconds +real 0m5.915s +user 0m5.357s +sys 0m0.469s +Code generation completed in 6 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc index 56113b542d..79386c8233 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc @@ -2103,6 +2103,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index c315bb1b7e..ef6ee59072 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -56,41 +56,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t -INFO: download model from http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz to the following directory: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models  ---2026-04-14 12:02:25-- http://feynrules.irmp.ucl.ac.be/raw-attachment/wiki/SMEFT/SMEFTsim_topU3l_MwScheme_UFO.tar.gz -Resolving feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)... 130.104.48.109 -Connecting to feynrules.irmp.ucl.ac.be (feynrules.irmp.ucl.ac.be)|130.104.48.109|:80... connected. -HTTP request sent, awaiting response... 200 Ok -Length: 80562 (79K) [application/x-tar] -Saving to: ‘tmp.tgz’ - - 0K .......... .......... .......... .......... .......... 63% 767K 0s - 50K .......... .......... ........ 100% 47.5M=0.07s - -2026-04-14 12:02:26 (1.17 MB/s) - ‘tmp.tgz’ saved [80562/80562] - -SMEFTsim_topU3l_MwScheme_UFO/ -SMEFTsim_topU3l_MwScheme_UFO/__init__.py -SMEFTsim_topU3l_MwScheme_UFO/param_card_massless.dat -SMEFTsim_topU3l_MwScheme_UFO/CT_couplings.py -SMEFTsim_topU3l_MwScheme_UFO/particles.py -SMEFTsim_topU3l_MwScheme_UFO/write_param_card.py -SMEFTsim_topU3l_MwScheme_UFO/decays.py -SMEFTsim_topU3l_MwScheme_UFO/parameters.py -SMEFTsim_topU3l_MwScheme_UFO/restrict_massless.dat -SMEFTsim_topU3l_MwScheme_UFO/object_library.py -SMEFTsim_topU3l_MwScheme_UFO/coupling_orders.py -SMEFTsim_topU3l_MwScheme_UFO/version.info -SMEFTsim_topU3l_MwScheme_UFO/function_library.py -SMEFTsim_topU3l_MwScheme_UFO/couplings.py -SMEFTsim_topU3l_MwScheme_UFO/propagators.py -SMEFTsim_topU3l_MwScheme_UFO/lorentz.py -SMEFTsim_topU3l_MwScheme_UFO/vertices.py -SMEFTsim_topU3l_MwScheme_UFO/restrict_SMlimit_massless.dat -fail to load model but auto_convert_model is on True. Trying to convert the model -convert model /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/mg5amcnlo/models/SMEFTsim_topU3l_MwScheme_UFO -retry the load of the model -import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices CRITICAL: Model with non QCD emission of gluon (found 14 of those). @@ -106,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.09116840362548828  +DEBUG: model prefixing takes 0.05950522422790527  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -115,16 +80,13 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ -INFO: Change particles name to pass to MG5 convention -Kept definitions of multiparticles p / j / l+ / l- / vl / vl~ unchanged -Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ generate g g > t t~ t t~ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 5.002 s +1 processes with 72 diagrams generated in 3.119 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -143,14 +105,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.201 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.137 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.268 s +ALOHA: aloha creates 5 routines in 0.170 s VVV5 VVV5 FFV1 @@ -170,7 +132,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m7.646s -user 0m6.626s -sys 0m0.412s -Code generation completed in 8 seconds +real 0m4.219s +user 0m4.105s +sys 0m0.085s +Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc index 698f03ec15..2118f73596 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc @@ -2051,6 +2051,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f index 6a66bac979..b60ff6b550 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 08d8a27cb8..b4550f03a3 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.136 s +1 processes with 6 diagrams generated in 0.090 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,18 +577,18 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.010 s -Wrote files for 16 helas calls in 0.114 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s +Wrote files for 16 helas calls in 0.066 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.160 s +ALOHA: aloha creates 3 routines in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.160 s +ALOHA: aloha creates 6 routines in 0.093 s VVV1 VSS1 VSS1 @@ -616,10 +616,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.118s -user 0m4.255s -sys 0m0.745s -Code generation completed in 5 seconds +real 0m2.797s +user 0m2.446s +sys 0m0.328s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc index 76e2f8a0ec..235ed516b0 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc @@ -1048,6 +1048,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 7ec4128ad9..30f8a4d405 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.184 s +1 processes with 6 diagrams generated in 0.094 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,12 +567,12 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. -Generated helas calls for 1 subprocesses (6 diagrams) in 0.016 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.146 s +ALOHA: aloha creates 3 routines in 0.103 s VVV1 VSS1 VSS1 @@ -588,7 +588,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.939s -user 0m1.773s -sys 0m0.128s -Code generation completed in 2 seconds +real 0m1.102s +user 0m1.024s +sys 0m0.072s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc index bea4c7340c..b852cad515 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc @@ -1049,6 +1049,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 10227b8958..f42ccd164c 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.183 s +1 processes with 3 diagrams generated in 0.087 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,16 +577,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.009 s -Wrote files for 10 helas calls in 0.090 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Wrote files for 10 helas calls in 0.065 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.140 s +ALOHA: aloha creates 2 routines in 0.077 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.167 s +ALOHA: aloha creates 4 routines in 0.072 s VVV1 FFV1 FFV1 @@ -613,10 +613,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.587s -user 0m3.940s -sys 0m0.561s -Code generation completed in 4 seconds +real 0m2.672s +user 0m2.335s +sys 0m0.308s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 9b0a81b6a5..3030703c0d 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1025,6 +1025,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 602c164e6a..c3b152c337 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -1,5 +1,4 @@ Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 0.5225210189819336) Loading plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT Plugin MG5aMC_PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.7.0. It has been validated for the last time with version: 3.6.5 @@ -54,9 +53,6 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.5660371780395508  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -552,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.120 s +1 processes with 3 diagrams generated in 0.090 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -571,11 +567,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.127 s +ALOHA: aloha creates 2 routines in 0.084 s VVV1 FFV1 FFV1 @@ -590,7 +586,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m3.115s -user 0m2.811s -sys 0m0.239s -Code generation completed in 4 seconds +real 0m1.139s +user 0m1.050s +sys 0m0.074s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc index 838c9595bc..b1158d6d28 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc @@ -1022,6 +1022,7 @@ namespace mg5amcCpu #ifdef MGONGPU_SUPPORTS_MULTICHANNEL const fptype* allrndcol, // input: random numbers[nevt] for color selection const unsigned int* allChannelIds, // input: multichannel channelIds[nevt] (1 to #diagrams); nullptr to disable single-diagram enhancement (fix #899/#911) + const int* allIgraph, // input: per-event MLM graph (0 = no MLM); nullptr if no MLM #endif fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities int* allselhel, // output: helicity selection[nevt] diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f index f0220047d7..61be922c33 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f @@ -20,6 +20,7 @@ PROGRAM FCHECK_SA DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 IGRAPH(NEVTMAX) ! per-event MLM graph (0 = no MLM) INTEGER*4 SELHEL(NEVTMAX) ! not yet used INTEGER*4 SELCOL(NEVTMAX) ! not yet used DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision @@ -62,8 +63,9 @@ PROGRAM FCHECK_SA DO IEVT = 1, NEVT GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO + IGRAPH(:) = 0 ! no MLM graph matching in standalone check CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities + & RNDHEL, RNDCOL, IGRAPH, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, From 1eee4d86ebf389dd057206fe3ed59bdb036704be Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 19:23:05 +0200 Subject: [PATCH 08/17] Move cluster.inc sourcing to upstream template to avoid vector.inc double sourcing This fix requires also a fix upstream to remove the previous vector.inc sourcing. --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index ac4a3f168e..f2d7189ddd 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -358,8 +358,6 @@ def write_auto_dsig_file(self, writer, matrix_element, proc_id = ""): SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' From 415cc50bd4f1392303663ee893406f92b74b9638 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Tue, 14 Apr 2026 20:54:39 +0200 Subject: [PATCH 09/17] Regenerate processes --- MG5aMC/mg5amcnlo | 2 +- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 22 ++++++++-------- .../ee_mumu.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 14 +++++----- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 14 +++++----- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 4 +-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 14 +++++----- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++++++------- .../gg_tt01g.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 4 +-- .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 4 +-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++++++------- .../gg_ttg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 4 +-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 16 ++++++------ .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++++++------- .../gg_ttgg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++++++------ .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 24 ++++++++--------- .../gg_ttggg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 18 ++++++------- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +++++++------- .../gq_ttq.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 4 +-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 16 ++++++------ .../CODEGEN_mad_heft_gg_bb_log.txt | 18 ++++++------- .../heft_gg_bb.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_bbx/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 14 +++++----- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 24 ++++++++--------- .../nobm_pp_ttW.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_dux_ttxwm/auto_dsig1.f | 4 +-- .../SubProcesses/P0_udx_ttxwp/auto_dsig1.f | 4 +-- .../SubProcesses/P1_dux_ttxwmg/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gd_ttxwmu/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gu_ttxwpd/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f | 4 +-- .../SubProcesses/P1_udx_ttxwpg/auto_dsig1.f | 4 +-- .../CODEGEN_mad_pp_tt012j_log.txt | 26 +++++++++---------- .../pp_tt012j.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 4 +-- .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 4 +-- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 4 +-- .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 4 +-- .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 4 +-- .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 4 +-- .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 4 +-- .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 4 +-- .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 4 +-- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 4 +-- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 +++++++------- .../smeft_gg_tttt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxttx/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 ++++++------ .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 ++++++------- .../susy_gg_t1t1.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_t1t1x/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 14 +++++----- .../CODEGEN_mad_susy_gg_tt_log.txt | 18 ++++++------- .../susy_gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 4 +-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 14 +++++----- 76 files changed, 261 insertions(+), 339 deletions(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 723f843075..c39fc765ad 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 723f8430750707c35d8c64dc73732a1ae94aa5c3 +Subproject commit c39fc765ade7dd44ca238f48c41fa0a6c27edfc4 diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index b131fec83c..352378e243 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0036313533782958984  +DEBUG: model prefixing takes 0.002836942672729492  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.003 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -177,19 +177,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.063 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Wrote files for 8 helas calls in 0.050 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.177 s +ALOHA: aloha creates 3 routines in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.232 s +ALOHA: aloha creates 7 routines in 0.124 s FFV1 FFV1 FFV2 @@ -220,10 +220,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.100s -user 0m2.713s -sys 0m0.346s -Code generation completed in 3 seconds +real 0m1.841s +user 0m1.574s +sys 0m0.251s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index 5c3cdba491..76707b2df1 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 035ebb9a2e..cf12adfab5 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -543,8 +543,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -571,8 +571,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 4d2c6c11cb..ec0401b7f8 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002979755401611328  +DEBUG: model prefixing takes 0.0028874874114990234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.003 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -173,7 +173,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.172 s +ALOHA: aloha creates 4 routines in 0.140 s FFV1 FFV1 FFV2 @@ -192,7 +192,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.575s -user 0m0.501s -sys 0m0.062s +real 0m0.526s +user 0m0.455s +sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 63a2f7c315..1bf7409d7b 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002977609634399414  +DEBUG: model prefixing takes 0.002843141555786133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,11 +183,11 @@ Wrote files for 10 helas calls in 0.056 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.098 s +ALOHA: aloha creates 2 routines in 0.079 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.085 s +ALOHA: aloha creates 4 routines in 0.068 s VVV1 FFV1 FFV1 @@ -214,9 +214,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.915s -user 0m1.613s -sys 0m0.285s +real 0m1.804s +user 0m1.509s +sys 0m0.277s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index 4ac8928826..5dccd79cf6 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index b79f45da06..7cf597b197 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 7ec072c7f6..8f21787927 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0032498836517333984  +DEBUG: model prefixing takes 0.003323078155517578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -168,11 +168,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.096 s +ALOHA: aloha creates 2 routines in 0.081 s VVV1 FFV1 FFV1 @@ -187,7 +187,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.525s -user 0m0.446s -sys 0m0.073s +real 0m0.468s +user 0m0.399s +sys 0m0.065s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 9f11804fe2..613e90f3dc 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003109455108642578  +DEBUG: model prefixing takes 0.0030622482299804688  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -199,22 +199,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.034 s -Wrote files for 46 helas calls in 0.173 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.136 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.213 s +ALOHA: aloha creates 5 routines in 0.177 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.176 s +ALOHA: aloha creates 10 routines in 0.160 s VVV1 VVV1 FFV1 @@ -246,10 +246,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.611s -user 0m2.233s -sys 0m0.354s -Code generation completed in 3 seconds +real 0m2.474s +user 0m2.128s +sys 0m0.326s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index e08e065c5d..198775ab17 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index b79f45da06..7cf597b197 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 78dfad938f..b29c6aeca2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 6be8b49601..9c91843731 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0029730796813964844  +DEBUG: model prefixing takes 0.0030193328857421875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.016 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.093 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.028 s +Wrote files for 36 helas calls in 0.081 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.184 s +ALOHA: aloha creates 5 routines in 0.183 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.170 s +ALOHA: aloha creates 10 routines in 0.153 s VVV1 VVV1 FFV1 @@ -225,9 +225,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.289s -user 0m1.950s -sys 0m0.319s +real 0m2.097s +user 0m1.842s +sys 0m0.237s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index f2554dda03..60e8eb1680 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 1621d47cbc..77820f0e51 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index cc49ef69ca..8378db05c8 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005289793014526367  +DEBUG: model prefixing takes 0.00603795051574707  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.047 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.028 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.067 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.184 s +ALOHA: aloha creates 5 routines in 0.362 s VVV1 VVV1 FFV1 @@ -195,7 +195,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.728s -user 0m0.647s -sys 0m0.073s +real 0m1.292s +user 0m1.160s +sys 0m0.120s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index a4b5a8f891..41e277a07c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003253459930419922  +DEBUG: model prefixing takes 0.003140687942504883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.127 s +1 processes with 123 diagrams generated in 0.106 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.369 s -Wrote files for 222 helas calls in 0.428 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.314 s +Wrote files for 222 helas calls in 0.430 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.239 s +ALOHA: aloha creates 5 routines in 0.161 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.196 s +ALOHA: aloha creates 10 routines in 0.206 s VVV1 VVV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.463s -user 0m3.149s -sys 0m0.290s +real 0m3.159s +user 0m2.845s +sys 0m0.291s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index a6eb18aa10..e36b410272 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 539796fad6..3a363b9a2b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index da2293ff67..b64e85c5f6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0031633377075195312  +DEBUG: model prefixing takes 0.002850770950317383  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.115 s +1 processes with 123 diagrams generated in 0.133 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.336 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.282 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.169 s +ALOHA: aloha creates 5 routines in 0.156 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.154s -user 0m1.088s -sys 0m0.058s +real 0m1.042s +user 0m0.963s +sys 0m0.072s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 471639e441..5c1ced3d64 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003108978271484375  +DEBUG: model prefixing takes 0.0031096935272216797  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.607 s +1 processes with 1240 diagrams generated in 1.317 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,7 +169,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 5s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 4s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h @@ -180,22 +180,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.210 s -Wrote files for 2281 helas calls in 12.142 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 4.272 s +Wrote files for 2281 helas calls in 10.459 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.274 s +ALOHA: aloha creates 5 routines in 0.250 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.172 s +ALOHA: aloha creates 10 routines in 0.153 s VVV1 VVV1 FFV1 @@ -230,10 +230,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m24.610s -user 0m24.002s -sys 0m0.474s -Code generation completed in 25 seconds +real 0m20.973s +user 0m20.527s +sys 0m0.385s +Code generation completed in 21 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index bb850487c9..e5edac134f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 58e121be6e..0c9e5f7080 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=128) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index f6b6aaf821..c1d79282cd 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002972126007080078  +DEBUG: model prefixing takes 0.004813194274902344  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.566 s +1 processes with 1240 diagrams generated in 1.283 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.701 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 4.282 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.223 s +ALOHA: aloha creates 5 routines in 0.204 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m10.443s -user 0m10.251s -sys 0m0.137s -Code generation completed in 11 seconds +real 0m8.515s +user 0m8.356s +sys 0m0.130s +Code generation completed in 9 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index d25f2a033e..9c84bdacd3 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00315093994140625  +DEBUG: model prefixing takes 0.0030677318572998047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.087 s +8 processes with 40 diagrams generated in 0.053 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -212,16 +212,16 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.056 s -Wrote files for 32 helas calls in 0.326 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.023 s +Wrote files for 32 helas calls in 0.138 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.179 s +ALOHA: aloha creates 2 routines in 0.081 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.093 s +ALOHA: aloha creates 4 routines in 0.066 s FFV1 FFV1 FFV1 @@ -249,9 +249,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.871s -user 0m2.409s -sys 0m0.431s +real 0m2.468s +user 0m2.099s +sys 0m0.348s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index 2ebb2fe196..1d7ba76b8f 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 13398bd74e..f252c024f6 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 9dfed40308..bb34349714 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 6181a90d99..29b637877c 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003228902816772461  +DEBUG: model prefixing takes 0.0028433799743652344  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.064 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -200,11 +200,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.039 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.021 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.160 s +ALOHA: aloha creates 2 routines in 0.083 s FFV1 FFV1 FFV1 @@ -220,7 +220,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.725s -user 0m0.646s -sys 0m0.070s +real 0m0.564s +user 0m0.504s +sys 0m0.055s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index e1a9cda4cc..ee673a9803 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.005 s +1 processes with 4 diagrams generated in 0.006 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -150,20 +150,20 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s -Wrote files for 12 helas calls in 0.059 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s +Wrote files for 12 helas calls in 0.055 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.157 s +ALOHA: aloha creates 4 routines in 0.150 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.159 s +ALOHA: aloha creates 8 routines in 0.128 s VVS3 VVV1 FFV1 @@ -192,9 +192,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.061s -user 0m1.749s -sys 0m0.292s +real 0m2.156s +user 0m1.806s +sys 0m0.327s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat index 16084f30b2..8498ad5355 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f index 81f64c5619..6346c8cc25 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 9f9e24bb5b..e9ab17168a 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.005 s +1 processes with 4 diagrams generated in 0.004 s Total: 1 processes with 4 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -140,13 +140,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. -Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.159 s +ALOHA: aloha creates 4 routines in 0.145 s VVS3 VVV1 FFV1 @@ -163,7 +163,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.550s -user 0m0.471s -sys 0m0.069s +real 0m0.523s +user 0m0.452s +sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 5bd319bfaa..ec917b8858 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004093170166015625  +DEBUG: model prefixing takes 0.0033407211303710938  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -179,7 +179,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.097 s +4 processes with 8 diagrams generated in 0.087 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -221,7 +221,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.511 s +12 processes with 144 diagrams generated in 0.462 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -349,18 +349,18 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.146 s -Wrote files for 212 helas calls in 0.739 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.134 s +Wrote files for 212 helas calls in 0.580 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.139 s +ALOHA: aloha creates 3 routines in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.116 s +ALOHA: aloha creates 6 routines in 0.102 s FFV1 FFV1 FFV1 @@ -390,10 +390,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.792s -user 0m4.111s -sys 0m0.630s -Code generation completed in 5 seconds +real 0m4.075s +user 0m3.538s +sys 0m0.497s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat index f7d965154f..eb68a4bde3 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f index 4ea33017ad..959af9abb8 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f @@ -542,8 +542,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=48) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -570,8 +570,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f index fc948b0b81..d2d45ddbaa 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f @@ -542,8 +542,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=48) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -570,8 +570,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f index 7ebd1a0f28..5681d8535f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f @@ -542,8 +542,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -570,8 +570,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f index 8bc3bb1631..f72ed6255e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f @@ -539,8 +539,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -567,8 +567,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f index eaf408e47a..0caf0301e3 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f @@ -539,8 +539,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -567,8 +567,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f index 0c984e26c2..b5b15b7c5c 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f @@ -539,8 +539,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -567,8 +567,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f index 6ca85f5dcd..b2737ec3fa 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f @@ -539,8 +539,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -567,8 +567,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f index 53782bf723..41ca9266c1 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f @@ -542,8 +542,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=96) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -570,8 +570,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index ff8a2b2d8b..62ee7940cf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003739595413208008  +DEBUG: model prefixing takes 0.002837657928466797  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,7 +166,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.030 s +5 processes with 7 diagrams generated in 0.022 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -206,7 +206,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.114 s +13 processes with 76 diagrams generated in 0.096 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -372,7 +372,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.520 s +65 processes with 1119 diagrams generated in 1.515 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -687,22 +687,22 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1749]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.059 s -Wrote files for 810 helas calls in 2.376 s +Generated helas calls for 18 subprocesses (372 diagrams) in 0.901 s +Wrote files for 810 helas calls in 1.969 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.177 s +ALOHA: aloha creates 5 routines in 0.167 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.219 s +ALOHA: aloha creates 10 routines in 0.185 s VVV1 VVV1 FFV1 @@ -737,10 +737,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.131s -user 0m8.972s -sys 0m1.051s -Code generation completed in 10 seconds +real 0m9.031s +user 0m7.999s +sys 0m0.950s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index 20e300a012..92c81a8bde 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index 8e939d3b72..a582ad2fd4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index 21e12cb805..0600c671ce 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 1621d47cbc..77820f0e51 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 13398bd74e..f252c024f6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 9dfed40308..bb34349714 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index 5e95a2472f..632d791617 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=32) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index f5b3c543ca..4705c638be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index 6244b1d099..263e3be1b2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -558,8 +558,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -586,8 +586,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 80fec08b91..16d795c6a6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index fe3465e94c..1a6277d156 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -569,8 +569,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -597,8 +597,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 8248186d91..284cc76158 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -596,8 +596,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -624,8 +624,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 5d30ea45fa..33cf19f705 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -668,8 +668,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -696,8 +696,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 6ce968148f..d76f34423b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index fef6aeaba9..19450cafaf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -668,8 +668,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -696,8 +696,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index e1a4e480cf..cdb3a6377b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index 893e840a60..0a2a87d5d5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index 4b5fa5bd47..b507ecd05b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -596,8 +596,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -624,8 +624,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 6016b9280f..f01a8215a1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -580,8 +580,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -608,8 +608,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index b1d29b1224..52b5aafe04 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.05240178108215332  +DEBUG: model prefixing takes 0.045355796813964844  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.804 s +1 processes with 72 diagrams generated in 2.471 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -115,22 +115,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.130 s -Wrote files for 119 helas calls in 0.297 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.121 s +Wrote files for 119 helas calls in 0.250 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.203 s +ALOHA: aloha creates 5 routines in 0.189 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.212 s +ALOHA: aloha creates 10 routines in 0.184 s VVV5 VVV5 FFV1 @@ -162,9 +162,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.915s -user 0m5.357s -sys 0m0.469s +real 0m5.245s +user 0m4.948s +sys 0m0.272s Code generation completed in 6 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat index 137f01e301..cac68e6a66 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f index 31e3e60c31..731770fcdf 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=64) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index ef6ee59072..78cb6c2bd6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.05950522422790527  +DEBUG: model prefixing takes 0.049256324768066406  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.119 s +1 processes with 72 diagrams generated in 2.532 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -105,14 +105,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.137 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.136 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.170 s +ALOHA: aloha creates 5 routines in 0.182 s VVV5 VVV5 FFV1 @@ -132,7 +132,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m4.219s -user 0m4.105s +real 0m3.612s +user 0m3.509s sys 0m0.085s -Code generation completed in 5 seconds +Code generation completed in 4 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index b4550f03a3..b91dcac5ad 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.090 s +1 processes with 6 diagrams generated in 0.088 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -578,17 +578,17 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s -Wrote files for 16 helas calls in 0.066 s +Wrote files for 16 helas calls in 0.060 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.097 s +ALOHA: aloha creates 3 routines in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.093 s +ALOHA: aloha creates 6 routines in 0.102 s VVV1 VSS1 VSS1 @@ -616,10 +616,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.797s -user 0m2.446s -sys 0m0.328s -Code generation completed in 2 seconds +real 0m2.611s +user 0m2.296s +sys 0m0.299s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat index 1d5b59c692..db4ddbc444 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f index 0621786ba2..5e894db7e8 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=4) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 30f8a4d405..1253d68d8e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.094 s +1 processes with 6 diagrams generated in 0.088 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,12 +567,12 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. -Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.103 s +ALOHA: aloha creates 3 routines in 0.094 s VVV1 VSS1 VSS1 @@ -588,7 +588,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.102s -user 0m1.024s -sys 0m0.072s +real 0m1.035s +user 0m0.961s +sys 0m0.068s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index f42ccd164c..08ad886479 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.087 s +1 processes with 3 diagrams generated in 0.110 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,16 +577,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.065 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +Wrote files for 10 helas calls in 0.054 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.077 s +ALOHA: aloha creates 2 routines in 0.075 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.072 s +ALOHA: aloha creates 4 routines in 0.078 s VVV1 FFV1 FFV1 @@ -613,9 +613,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.672s -user 0m2.335s -sys 0m0.308s +real 0m2.636s +user 0m2.339s +sys 0m0.278s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat index a25875a280..9d7a740f8d 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index b79f45da06..7cf597b197 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -525,8 +525,8 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, IMPLICIT NONE INCLUDE 'nexternal.inc' - INCLUDE '../../Source/vector.inc' ! defines VECSIZE_MEMMAX INCLUDE 'maxamps.inc' + INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection); also defines VECSIZE_MEMMAX via vector.inc INTEGER NCOMB PARAMETER ( NCOMB=16) DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_MEMMAX) @@ -553,8 +553,6 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS, SAVE FIRST_CHID DATA FIRST_CHID/.TRUE./ - INCLUDE 'cluster.inc' ! for IGRAPHS common block (MLM per-event color selection) - #ifdef MG5AMC_MEEXPORTER_CUDACPP INCLUDE 'coupl.inc' ! for ALL_G INCLUDE 'fbridge.inc' diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index c3b152c337..ebfea5a14b 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-2-g723f84307 copilot/fix-mlm-issue-phase-space * +* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.090 s +1 processes with 3 diagrams generated in 0.088 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,11 +567,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.084 s +ALOHA: aloha creates 2 routines in 0.082 s VVV1 FFV1 FFV1 @@ -586,7 +586,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.139s -user 0m1.050s -sys 0m0.074s +real 0m1.050s +user 0m0.988s +sys 0m0.051s Code generation completed in 1 seconds From 9c1a524fe8ef23858307617df8b583e1531c1e80 Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Wed, 15 Apr 2026 11:06:13 +0200 Subject: [PATCH 10/17] update the fortran part --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index c39fc765ad..9640edd85a 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit c39fc765ade7dd44ca238f48c41fa0a6c27edfc4 +Subproject commit 9640edd85aca88e6a9c74f73aeec5e73f35dc525 From 1bffef634d49a98949e343497520135fd8e933eb Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Wed, 15 Apr 2026 12:10:28 +0200 Subject: [PATCH 11/17] Update submodule --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 9640edd85a..69b7ec3d48 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 9640edd85aca88e6a9c74f73aeec5e73f35dc525 +Subproject commit 69b7ec3d4839cb59877a877b921a63782a75e2a4 From f49d54cdd0fd0a16b55153cdf7899f993333a201 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:25:34 +0000 Subject: [PATCH 12/17] Fix per-event igraph/iconfig for color selection in SIMD pages Agent-Logs-Url: https://github.com/madgraph5/madgraph4gpu/sessions/4c9417fc-2ffa-4b23-9885-18f87879424b Co-authored-by: oliviermattelaer <33414646+oliviermattelaer@users.noreply.github.com> --- .../gpu/process_sigmaKin_function.inc | 140 ++++++++++-------- 1 file changed, 75 insertions(+), 65 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index 57c8d7fab5..620d317f16 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -206,71 +206,61 @@ } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which is greater than nchannels=%%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%%d (invalid SDE iconfig=%%d\n > nconfig=%%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%%4d rndcol=%%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -278,32 +268,52 @@ break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%%4d rndcol=%%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%%d icol=%%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } From b449bc0a4db968c32c678e29a8e663c149a6457a Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Wed, 15 Apr 2026 16:18:59 +0200 Subject: [PATCH 13/17] Regenerate processes --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 20 +-- .../ee_mumu.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_epem_mupmum/auto_dsig.f | 11 +- .../SubProcesses/P1_epem_mupmum/matrix1.f | 2 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 16 +- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 140 ++++++++++-------- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 +-- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 14 +- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 +-- .../gg_tt01g.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 +- .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_gg_ttxg/auto_dsig.f | 11 +- .../SubProcesses/P2_gg_ttxg/matrix1.f | 2 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +-- .../gg_ttg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 16 +- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 140 ++++++++++-------- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +-- .../gg_ttgg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttxgg/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 2 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 +- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 140 ++++++++++-------- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 24 +-- .../gg_ttggg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttxggg/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 2 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 18 +-- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 140 ++++++++++-------- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 +-- .../gq_ttq.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 11 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 11 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 18 +-- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 140 ++++++++++-------- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 140 ++++++++++-------- .../CODEGEN_mad_heft_gg_bb_log.txt | 20 +-- .../heft_gg_bb.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_bbx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_bbx/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_bbx/matrix1.f | 2 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 16 +- .../P1_Sigma_heft_gg_bbx/CPPProcess.cc | 140 ++++++++++-------- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 22 +-- .../nobm_pp_ttW.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_dux_ttxwm/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P0_dux_ttxwm/auto_dsig.f | 11 +- .../SubProcesses/P0_dux_ttxwm/matrix1.f | 2 +- .../SubProcesses/P0_udx_ttxwp/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P0_udx_ttxwp/auto_dsig.f | 11 +- .../SubProcesses/P0_udx_ttxwp/matrix1.f | 2 +- .../SubProcesses/P1_dux_ttxwmg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_dux_ttxwmg/auto_dsig.f | 11 +- .../SubProcesses/P1_dux_ttxwmg/matrix1.f | 2 +- .../SubProcesses/P1_gd_ttxwmu/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gd_ttxwmu/auto_dsig.f | 11 +- .../SubProcesses/P1_gd_ttxwmu/matrix1.f | 2 +- .../SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gdx_ttxwpux/auto_dsig.f | 11 +- .../SubProcesses/P1_gdx_ttxwpux/matrix1.f | 2 +- .../SubProcesses/P1_gu_ttxwpd/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gu_ttxwpd/auto_dsig.f | 11 +- .../SubProcesses/P1_gu_ttxwpd/matrix1.f | 2 +- .../SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gux_ttxwmdx/auto_dsig.f | 11 +- .../SubProcesses/P1_gux_ttxwmdx/matrix1.f | 2 +- .../SubProcesses/P1_udx_ttxwpg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_udx_ttxwpg/auto_dsig.f | 11 +- .../SubProcesses/P1_udx_ttxwpg/matrix1.f | 2 +- .../CODEGEN_mad_pp_tt012j_log.txt | 26 ++-- .../pp_tt012j.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P0_gg_ttx/auto_dsig.f | 11 +- .../SubProcesses/P0_gg_ttx/matrix1.f | 2 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P0_uux_ttx/auto_dsig.f | 11 +- .../SubProcesses/P0_uux_ttx/matrix1.f | 2 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 11 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 11 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_uux_ttxg/auto_dsig.f | 11 +- .../SubProcesses/P1_uux_ttxg/matrix1.f | 2 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_gg_ttxgg/auto_dsig.f | 11 +- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 2 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_gg_ttxuux/auto_dsig.f | 11 +- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 2 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_gu_ttxgu/auto_dsig.f | 11 +- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 2 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_gux_ttxgux/auto_dsig.f | 11 +- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 2 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uc_ttxuc/auto_dsig.f | 11 +- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 2 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_ucx_ttxucx/auto_dsig.f | 11 +- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 2 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uu_ttxuu/auto_dsig.f | 11 +- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 2 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uux_ttxccx/auto_dsig.f | 11 +- .../SubProcesses/P2_uux_ttxccx/matrix1.f | 2 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uux_ttxgg/auto_dsig.f | 11 +- .../SubProcesses/P2_uux_ttxgg/matrix1.f | 2 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uux_ttxuux/auto_dsig.f | 11 +- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 2 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f | 11 +- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 2 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig.f | 11 +- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 2 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 22 +-- .../smeft_gg_tttt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttxttx/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttxttx/matrix1.f | 2 +- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 +- .../CPPProcess.cc | 140 ++++++++++-------- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 16 +- .../susy_gg_t1t1.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_t1t1x/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_t1t1x/matrix1.f | 2 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 14 +- .../CPPProcess.cc | 140 ++++++++++-------- .../CODEGEN_mad_susy_gg_tt_log.txt | 18 +-- .../susy_gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 11 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 14 +- .../P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc | 140 ++++++++++-------- 164 files changed, 4367 insertions(+), 3594 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 352378e243..2895cc529b 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002836942672729492  +DEBUG: model prefixing takes 0.003197908401489258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.003 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -177,19 +177,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s -Wrote files for 8 helas calls in 0.050 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Wrote files for 8 helas calls in 0.057 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.105 s +ALOHA: aloha creates 3 routines in 0.126 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.124 s +ALOHA: aloha creates 7 routines in 0.175 s FFV1 FFV1 FFV2 @@ -220,9 +220,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.841s -user 0m1.574s -sys 0m0.251s +real 0m2.099s +user 0m1.796s +sys 0m0.279s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index 76707b2df1..bdf200888e 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index cefddd36a5..fb09c252b7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -1267,71 +1267,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1339,32 +1329,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f index 6dbcbc178f..a21b9558bb 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index 8d9020151c..fa57230d40 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -304,7 +304,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index ec0401b7f8..d1896b428c 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0028874874114990234  +DEBUG: model prefixing takes 0.0035784244537353516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -148,7 +148,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.003 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -167,13 +167,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.172 s FFV1 FFV1 FFV2 @@ -192,7 +192,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.526s -user 0m0.455s -sys 0m0.065s +real 0m0.621s +user 0m0.533s +sys 0m0.073s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 3bd1fe8442..064d2aacd5 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -1265,71 +1265,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1337,32 +1327,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 1bf7409d7b..0c0065b306 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002843141555786133  +DEBUG: model prefixing takes 0.0035247802734375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.006 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -179,15 +179,15 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.056 s +Wrote files for 10 helas calls in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.079 s +ALOHA: aloha creates 2 routines in 0.106 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.068 s +ALOHA: aloha creates 4 routines in 0.088 s VVV1 FFV1 FFV1 @@ -214,9 +214,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.804s -user 0m1.509s -sys 0m0.277s +real 0m2.327s +user 0m1.937s +sys 0m0.356s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index 5dccd79cf6..c233771f8c 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 528c019861..877693ab4e 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1280,71 +1280,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1352,32 +1342,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index cbe257bc8a..0ee3fad958 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 6ddd16cc10..a6ff6ae67f 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -286,7 +286,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 8f21787927..9e55b4f43b 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003323078155517578  +DEBUG: model prefixing takes 0.0032906532287597656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.006 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -172,7 +172,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.081 s +ALOHA: aloha creates 2 routines in 0.099 s VVV1 FFV1 FFV1 @@ -187,7 +187,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.468s -user 0m0.399s -sys 0m0.065s +real 0m0.546s +user 0m0.478s +sys 0m0.063s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index c9237a883f..0a7fdcf80b 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -1277,71 +1277,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1349,32 +1339,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 613e90f3dc..a35d3e30c5 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0030622482299804688  +DEBUG: model prefixing takes 0.0033724308013916016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.006 s +1 processes with 3 diagrams generated in 0.007 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -157,7 +157,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.014 s +1 processes with 16 diagrams generated in 0.016 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -199,22 +199,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.136 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s +Wrote files for 46 helas calls in 0.173 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.177 s +ALOHA: aloha creates 5 routines in 0.211 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.160 s +ALOHA: aloha creates 10 routines in 0.178 s VVV1 VVV1 FFV1 @@ -246,9 +246,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.474s -user 0m2.128s -sys 0m0.326s +real 0m2.633s +user 0m2.230s +sys 0m0.370s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index 198775ab17..ea8ca8db1f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 528c019861..877693ab4e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1280,71 +1280,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1352,32 +1342,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index cbe257bc8a..0ee3fad958 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index 6ddd16cc10..a6ff6ae67f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -286,7 +286,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index 5e0525f85b..a6b6ef610c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -1497,71 +1497,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1569,32 +1559,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f index b0bbdf17fb..ba1f735272 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index 44078e305d..41a39a13ca 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -302,7 +302,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 9c91843731..8b226dca47 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0030193328857421875  +DEBUG: model prefixing takes 0.0031692981719970703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.018 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.028 s -Wrote files for 36 helas calls in 0.081 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.031 s +Wrote files for 36 helas calls in 0.107 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.183 s +ALOHA: aloha creates 5 routines in 0.224 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.153 s +ALOHA: aloha creates 10 routines in 0.189 s VVV1 VVV1 FFV1 @@ -225,9 +225,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.097s -user 0m1.842s -sys 0m0.237s +real 0m2.578s +user 0m2.210s +sys 0m0.342s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index 60e8eb1680..9be80ae5a4 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 9345bb5600..85622f37c6 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1497,71 +1497,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1569,32 +1559,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index 5b885a4dac..e596678cdc 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 186fa86cc3..7388a4bf7e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -302,7 +302,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 8378db05c8..6285c8742e 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00603795051574707  +DEBUG: model prefixing takes 0.003176450729370117  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.047 s +1 processes with 16 diagrams generated in 0.017 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.067 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.035 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.362 s +ALOHA: aloha creates 5 routines in 0.344 s VVV1 VVV1 FFV1 @@ -195,7 +195,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m1.292s -user 0m1.160s -sys 0m0.120s +real 0m0.890s +user 0m0.803s +sys 0m0.077s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 0861f53607..887adbc468 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -1491,71 +1491,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1563,32 +1553,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 41e277a07c..859a79a2b4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003140687942504883  +DEBUG: model prefixing takes 0.003329753875732422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.106 s +1 processes with 123 diagrams generated in 0.127 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.314 s -Wrote files for 222 helas calls in 0.430 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.367 s +Wrote files for 222 helas calls in 0.514 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.161 s +ALOHA: aloha creates 5 routines in 0.218 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.206 s +ALOHA: aloha creates 10 routines in 0.236 s VVV1 VVV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.159s -user 0m2.845s -sys 0m0.291s +real 0m3.680s +user 0m3.301s +sys 0m0.345s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index e36b410272..8eeac7b204 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 07c5e96e24..93c8bce4aa 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -3425,71 +3425,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -3497,32 +3487,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f index 6913e66444..ccb857518f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index c45d039de2..19b83eff7d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -334,7 +334,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index b64e85c5f6..41182c8dfd 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002850770950317383  +DEBUG: model prefixing takes 0.0036923885345458984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.133 s +1 processes with 123 diagrams generated in 0.128 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.282 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.422 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.156 s +ALOHA: aloha creates 5 routines in 0.226 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.042s -user 0m0.963s -sys 0m0.072s +real 0m1.340s +user 0m1.247s +sys 0m0.075s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index cb673ba887..dfa0ab3dc3 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -3482,71 +3482,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -3554,32 +3544,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 5c1ced3d64..5c8dc17558 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0031096935272216797  +DEBUG: model prefixing takes 0.003306865692138672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.317 s +1 processes with 1240 diagrams generated in 1.710 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,7 +169,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 4s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 6s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h @@ -180,22 +180,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 4.272 s -Wrote files for 2281 helas calls in 10.459 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.291 s +Wrote files for 2281 helas calls in 13.681 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.250 s +ALOHA: aloha creates 5 routines in 0.304 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.153 s +ALOHA: aloha creates 10 routines in 0.238 s VVV1 VVV1 FFV1 @@ -230,10 +230,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m20.973s -user 0m20.527s -sys 0m0.385s -Code generation completed in 21 seconds +real 0m26.984s +user 0m26.323s +sys 0m0.498s +Code generation completed in 27 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index e5edac134f..d77e39cc0c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index 5c211958ad..14d3f28923 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -30996,71 +30996,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -31068,32 +31058,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f index fc4a203533..89a856a460 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index 75d6f7a707..e11cef7ff9 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -398,7 +398,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index c1d79282cd..03f105764b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004813194274902344  +DEBUG: model prefixing takes 0.0037555694580078125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.283 s +1 processes with 1240 diagrams generated in 1.867 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 4.282 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.295 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.204 s +ALOHA: aloha creates 5 routines in 0.240 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m8.515s -user 0m8.356s -sys 0m0.130s -Code generation completed in 9 seconds +real 0m10.649s +user 0m10.454s +sys 0m0.140s +Code generation completed in 10 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 379a3750aa..692ef2ca37 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -32886,71 +32886,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -32958,32 +32948,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 9c84bdacd3..8dd042e628 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0030677318572998047  +DEBUG: model prefixing takes 0.0034317970275878906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.053 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -212,16 +212,16 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.023 s -Wrote files for 32 helas calls in 0.138 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.026 s +Wrote files for 32 helas calls in 0.129 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.081 s +ALOHA: aloha creates 2 routines in 0.114 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.066 s +ALOHA: aloha creates 4 routines in 0.081 s FFV1 FFV1 FFV1 @@ -249,10 +249,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.468s -user 0m2.099s -sys 0m0.348s -Code generation completed in 3 seconds +real 0m3.447s +user 0m2.884s +sys 0m0.446s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index 1d7ba76b8f..568c75e0e2 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 158152f03a..47e6c1de98 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -1335,71 +1335,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1407,32 +1397,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index abfeda5bd0..85c5157797 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index 45a46d5129..14fd0f0017 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -320,7 +320,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 7282662b6e..8655477d5e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -1335,71 +1335,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1407,32 +1397,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index b3ffe0f7cf..8d9f98f8ac 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 7245268851..784c7b3ebc 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -320,7 +320,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 29b637877c..b4a42bd09e 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0028433799743652344  +DEBUG: model prefixing takes 0.0031960010528564453  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.064 s +8 processes with 40 diagrams generated in 0.108 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -200,11 +200,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.021 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.027 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.083 s +ALOHA: aloha creates 2 routines in 0.093 s FFV1 FFV1 FFV1 @@ -220,7 +220,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.564s -user 0m0.504s -sys 0m0.055s -Code generation completed in 0 seconds +real 0m0.664s +user 0m0.581s +sys 0m0.074s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index a677127357..e3a48ec6dc 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -1330,71 +1330,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1402,32 +1392,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index d22359741c..56eba53054 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -1330,71 +1330,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1402,32 +1392,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index ee673a9803..6b28ca9564 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.006 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -150,20 +150,20 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s -Wrote files for 12 helas calls in 0.055 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s +Wrote files for 12 helas calls in 0.063 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.150 s +ALOHA: aloha creates 4 routines in 0.194 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.128 s +ALOHA: aloha creates 8 routines in 0.202 s VVS3 VVV1 FFV1 @@ -192,10 +192,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.156s -user 0m1.806s -sys 0m0.327s -Code generation completed in 2 seconds +real 0m2.521s +user 0m2.176s +sys 0m0.319s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat index 8498ad5355..0a3868c028 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc index b7c2f8f53a..2876d942ce 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc @@ -1294,71 +1294,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1366,32 +1356,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f index 19da4f5d75..f966affe59 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f index 7395a3966a..6fb79f6e5d 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f @@ -286,7 +286,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index e9ab17168a..cebb600dfc 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.004 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -140,13 +140,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. -Generated helas calls for 1 subprocesses (4 diagrams) in 0.006 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.145 s +ALOHA: aloha creates 4 routines in 0.165 s VVS3 VVV1 FFV1 @@ -163,7 +163,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.523s -user 0m0.452s -sys 0m0.065s -Code generation completed in 1 seconds +real 0m0.573s +user 0m0.497s +sys 0m0.068s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc index 53350bc7d1..8f5fdbdfe1 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/CPPProcess.cc @@ -1290,71 +1290,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1362,32 +1352,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index ec917b8858..64af513301 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0033407211303710938  +DEBUG: model prefixing takes 0.0030832290649414062  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -179,7 +179,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.087 s +4 processes with 8 diagrams generated in 0.090 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -221,7 +221,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.462 s +12 processes with 144 diagrams generated in 0.542 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -349,18 +349,18 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.134 s -Wrote files for 212 helas calls in 0.580 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.149 s +Wrote files for 212 helas calls in 0.683 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.105 s +ALOHA: aloha creates 3 routines in 0.119 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.102 s +ALOHA: aloha creates 6 routines in 0.117 s FFV1 FFV1 FFV1 @@ -390,9 +390,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.075s -user 0m3.538s -sys 0m0.497s +real 0m4.610s +user 0m3.960s +sys 0m0.592s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat index eb68a4bde3..03f015e1cd 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc index 8a74a16222..1db6dc2843 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc @@ -1307,71 +1307,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1379,32 +1369,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f index fc86391731..0dc54b5499 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f index f474e88c0b..8ea7cbd981 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f @@ -334,7 +334,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc index 5aea836d76..3734c48717 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc @@ -1307,71 +1307,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1379,32 +1369,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f index d017298ec3..b202ccafd8 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f index 3b69954d1e..72232f43be 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f @@ -334,7 +334,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc index ff7c5406b9..b516bc3dd1 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f index e87d79430d..98bf3c96c9 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f index 59d1fada6b..bf4575a44d 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc index 8fb1cc30ca..3bb4f68f77 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f index 9953cf61f0..31d9da001c 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f index b4592273af..60736d40f4 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc index 4105b65b3c..f585a5805c 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f index e851c0f544..1c15ce9fb2 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f index a8e61a7f70..8befd86e93 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc index 23e12cdaed..a1b0193992 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f index 27340f01d3..ecbd058457 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f index 2aeb7dfc92..1d55f3f5b6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc index 72257f5a04..0ab1f3a681 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f index c74ff705d6..c0f3f25286 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f index 927a46e574..d475dc7829 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc index 3991dd624e..99663e5339 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc @@ -1503,71 +1503,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1575,32 +1565,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f index 3f9567b771..81ecfc876a 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f @@ -1204,7 +1204,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1219,10 +1219,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1234,7 +1237,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f index d4d5a71d55..795d4cc364 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f @@ -382,7 +382,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 62ee7940cf..b135c17f32 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.002837657928466797  +DEBUG: model prefixing takes 0.0034799575805664062  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,7 +166,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.022 s +5 processes with 7 diagrams generated in 0.024 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -206,7 +206,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.096 s +13 processes with 76 diagrams generated in 0.106 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -372,7 +372,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.515 s +65 processes with 1119 diagrams generated in 1.517 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -687,22 +687,22 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1749]  -Generated helas calls for 18 subprocesses (372 diagrams) in 0.901 s -Wrote files for 810 helas calls in 1.969 s +Generated helas calls for 18 subprocesses (372 diagrams) in 0.974 s +Wrote files for 810 helas calls in 2.288 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.167 s +ALOHA: aloha creates 5 routines in 0.181 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.185 s +ALOHA: aloha creates 10 routines in 0.255 s VVV1 VVV1 FFV1 @@ -737,10 +737,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.031s -user 0m7.999s -sys 0m0.950s -Code generation completed in 9 seconds +real 0m10.061s +user 0m8.893s +sys 0m1.041s +Code generation completed in 11 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index 92c81a8bde..e56d77408c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index e6f7d28e97..eb6bb3cfbf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -1280,71 +1280,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1352,32 +1342,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f index b6f323bb6a..3d6273e9de 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index c7dd26745a..f01adf2ab1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -286,7 +286,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 08c8080553..2781752348 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -1257,71 +1257,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1329,32 +1319,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f index b23b0a3173..86d509e240 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index d1f6d2f5c6..1e6c927bfb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -289,7 +289,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 9345bb5600..85622f37c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1497,71 +1497,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1569,32 +1559,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index 5b885a4dac..e596678cdc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 186fa86cc3..7388a4bf7e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -302,7 +302,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 2bfaa38568..42a1ee15b1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -1335,71 +1335,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1407,32 +1397,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index abfeda5bd0..85c5157797 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index e21125402e..613dbb7f66 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -305,7 +305,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index fdfedd86b1..b8f320369d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -1335,71 +1335,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1407,32 +1397,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index b3ffe0f7cf..8d9f98f8ac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 330e1e524e..22a6b8c5b9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -305,7 +305,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 21ca5edc9c..eddfb835fb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -1335,71 +1335,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1407,32 +1397,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f index f6e883a40f..a9cb66d424 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index f23d0437bb..0ec6e93020 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -305,7 +305,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index 876e7914c5..0c082e4dff 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -3425,71 +3425,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -3497,32 +3487,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f index 540fb9c82e..586a8a369d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index c4a8cef829..b4b9172028 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -334,7 +334,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index e6e6173b55..b6939136f8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -1832,71 +1832,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1904,32 +1894,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f index 07052b5092..122d95ac63 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 191b5fc3a0..3816770328 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 5b085b0f17..49f71ba3c3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1832,71 +1832,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1904,32 +1894,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f index efc726a583..7b3432104c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index 7c40058cc9..c10cd1e6e8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 57480126ae..c8ba603c5f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1832,71 +1832,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1904,32 +1894,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f index efc7fe7670..6cf808b1bf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 7d934f4152..8fdfbc4513 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 4f022cc1ac..b084c58dc7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -1413,71 +1413,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1485,32 +1475,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f index a05f0af626..0cf8140c5e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f @@ -1208,7 +1208,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1223,10 +1223,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1238,7 +1241,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index d1f5e36812..572c6ced56 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -339,7 +339,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 3b29d7716b..f0464ec557 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -1419,71 +1419,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1491,32 +1481,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f index dfaf7c9ba2..1a3666b37c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f @@ -1214,7 +1214,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1229,10 +1229,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1244,7 +1247,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 40511822f7..65520b0758 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -345,7 +345,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index e16f764860..f0b0a896cb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -1523,71 +1523,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1595,32 +1585,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f index cfd22899ba..26bb17ea64 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 0243f024be..0218e57040 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 9cce6c958c..83783088e4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -1419,71 +1419,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1491,32 +1481,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f index 93cfcec297..bf1d38788c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f @@ -1214,7 +1214,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1229,10 +1229,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1244,7 +1247,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index 55564f8914..2c2555366f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -345,7 +345,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 11f17169f6..e8ccccc22d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1832,71 +1832,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1904,32 +1894,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f index c22306418b..4eb3d980ad 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index c8073e0c09..ae3e89ba33 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 1071853dfa..a4dd4e74fc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1523,71 +1523,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1595,32 +1585,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f index 789b82915c..5bc3293585 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 29aaaf5cb8..3dcb0ae4ef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 73a7b4a37d..9e3d0186a4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -1413,71 +1413,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1485,32 +1475,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f index 2dd6aab676..ddd7ababe0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f @@ -1208,7 +1208,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1223,10 +1223,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1238,7 +1241,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 01163cdb48..0eba207bd8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -339,7 +339,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 2ac5935d6a..70e782fbbb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -1523,71 +1523,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1595,32 +1585,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f index fd29cb1b99..932747b697 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f @@ -1206,7 +1206,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1221,10 +1221,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1236,7 +1239,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index e7134f996e..c8a51154a0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -337,7 +337,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 52b5aafe04..2038fc26a8 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.045355796813964844  +DEBUG: model prefixing takes 0.05546402931213379  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.471 s +1 processes with 72 diagrams generated in 2.878 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -115,22 +115,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.121 s -Wrote files for 119 helas calls in 0.250 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.189 s +Wrote files for 119 helas calls in 0.289 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.189 s +ALOHA: aloha creates 5 routines in 0.179 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.184 s +ALOHA: aloha creates 10 routines in 0.203 s VVV5 VVV5 FFV1 @@ -162,10 +162,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.245s -user 0m4.948s -sys 0m0.272s -Code generation completed in 6 seconds +real 0m6.256s +user 0m5.835s +sys 0m0.356s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat index cac68e6a66..2b50d9cbfd 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc index 79386c8233..8d80903b40 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc @@ -2358,71 +2358,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -2430,32 +2420,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f index 5a7aff8889..0858a86382 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f index a3c2666b88..841be0ffef 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f @@ -334,7 +334,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 78cb6c2bd6..783771ed66 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.049256324768066406  +DEBUG: model prefixing takes 0.06393218040466309  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.532 s +1 processes with 72 diagrams generated in 2.815 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -105,14 +105,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.136 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.137 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.182 s +ALOHA: aloha creates 5 routines in 0.186 s VVV5 VVV5 FFV1 @@ -132,7 +132,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m3.612s -user 0m3.509s -sys 0m0.085s +real 0m4.030s +user 0m3.932s +sys 0m0.073s Code generation completed in 4 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc index 2118f73596..b7fd59ff8d 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc @@ -2306,71 +2306,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -2378,32 +2368,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index b91dcac5ad..e8531e68d4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.088 s +1 processes with 6 diagrams generated in 0.096 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -578,17 +578,17 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s -Wrote files for 16 helas calls in 0.060 s +Wrote files for 16 helas calls in 0.087 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.100 s +ALOHA: aloha creates 3 routines in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.102 s +ALOHA: aloha creates 6 routines in 0.105 s VVV1 VSS1 VSS1 @@ -616,9 +616,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.611s -user 0m2.296s -sys 0m0.299s +real 0m2.959s +user 0m2.602s +sys 0m0.336s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat index db4ddbc444..e242e6277f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc index 235ed516b0..a239d19d92 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc @@ -1303,71 +1303,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1375,32 +1365,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f index 3bcc8b4dec..b109d1e601 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f index 7c72f63f66..2c3622336c 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f @@ -274,7 +274,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 1253d68d8e..d8ff800cdc 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.088 s +1 processes with 6 diagrams generated in 0.099 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,12 +567,12 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.094 s +ALOHA: aloha creates 3 routines in 0.112 s VVV1 VSS1 VSS1 @@ -588,7 +588,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.035s -user 0m0.961s -sys 0m0.068s +real 0m1.212s +user 0m1.118s +sys 0m0.083s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc index b852cad515..e6ede120a7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/CPPProcess.cc @@ -1304,71 +1304,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1376,32 +1366,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 08ad886479..8249a679e1 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.110 s +1 processes with 3 diagrams generated in 0.094 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,16 +577,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s -Wrote files for 10 helas calls in 0.054 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s +Wrote files for 10 helas calls in 0.068 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.075 s +ALOHA: aloha creates 2 routines in 0.096 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.078 s +ALOHA: aloha creates 4 routines in 0.080 s VVV1 FFV1 FFV1 @@ -613,9 +613,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.636s -user 0m2.339s -sys 0m0.278s +real 0m3.100s +user 0m2.697s +sys 0m0.373s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat index 9d7a740f8d..a41c9bfc68 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 3030703c0d..353c6e5f48 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -1280,71 +1280,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1352,32 +1342,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index cbe257bc8a..0ee3fad958 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1203,7 +1203,7 @@ INTEGER FUNCTION GET_NHEL(HEL,PARTID) END - SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) + SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) IMPLICIT NONE INCLUDE 'nexternal.inc' INCLUDE 'maxamps.inc' ! for the definition of maxflow @@ -1218,10 +1218,13 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION JAMP2(0:MAXFLOW) INTEGER ICONFIG ! amplitude selected INTEGER IPROC ! matrix element selected + INTEGER IVEC C C argument OUT C INTEGER ICOL + INTEGER IGRAPH(VECSIZE_MEMMAX) + COMMON/VEC_IGRAPH/IGRAPH C C local C @@ -1233,7 +1236,11 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL) DOUBLE PRECISION XTARGET IF (ICKKW.GT.0) THEN - ICONFIG = IGRAPHS(1) + IF (IVEC.EQ.0) THEN + ICONFIG = IGRAPHS(1) + ELSE + ICONFIG = VEC_IGRAPH(IVEC) + ENDIF ENDIF diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 211e4703dc..52a516cda9 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -286,7 +286,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ANS=ANS/DBLE(IDEN) - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL, IVEC) END diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index ebfea5a14b..241396992d 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-3-gc39fc765a copilot/fix-mlm-issue-phase-space * +* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.088 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,7 +567,7 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -586,7 +586,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.050s -user 0m0.988s -sys 0m0.051s -Code generation completed in 1 seconds +real 0m1.283s +user 0m1.180s +sys 0m0.084s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc index b1158d6d28..2fb5bea34b 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/CPPProcess.cc @@ -1277,71 +1277,61 @@ namespace mg5amcCpu } #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // multichannel enabled (random color choice) // Event-by-event random choice of color #402 - // Use per-event MLM graph if provided, otherwise use channel2iconfig - const int igraph1_page = ( allIgraph != nullptr ) ? allIgraph[ievt00] : 0; // all events in SIMD page share the same igraph - if( channelId != 0 || igraph1_page != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + // NB: with MLM, different events in a SIMD page may have different igraph values, so iconfig must be per-event + for( int ieppV = 0; ieppV < neppV; ++ieppV ) { - // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig - int iconfig; - if( igraph1_page != 0 ) - { - iconfig = igraph1_page; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) - } - else + const int ievt = ievt00 + ieppV; + // Use per-event MLM graph if provided, otherwise use channel2iconfig + const int igraph1_ievt = ( allIgraph != nullptr ) ? allIgraph[ievt] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) { - if( channelId > mgOnGpu::nchannels ) + // Determine iconfig: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig; + if( igraph1_ievt != 0 ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); - assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + iconfig = igraph1_ievt; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) } - // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) - // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! - iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) - if( iconfig <= 0 ) + else { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); - assert( iconfig > 0 ); // SANITY CHECK #917 + if( channelId > mgOnGpu::nchannels ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which is greater than nchannels=%d\n", channelId, mgOnGpu::nchannels ); + assert( channelId <= mgOnGpu::nchannels ); // SANITY CHECK #919 #910 + } + // NB (see #877): in the array channel2iconfig, the input index uses C indexing (channelId -1), the output index uses F indexing (iconfig) + // NB (see #917): mgOnGpu::channel2iconfig returns an int (which may be -1), not an unsigned int! + iconfig = mgOnGpu::channel2iconfig[channelId - 1]; // map N_diagrams to N_config <= N_diagrams configs (fix LHE color mismatch #856: see also #826, #852, #853) + if( iconfig <= 0 ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d which has no associated SDE iconfig\n", channelId ); + assert( iconfig > 0 ); // SANITY CHECK #917 + } + else if( iconfig > (int)mgOnGpu::nconfigSDE ) + { + printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); + assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 + } } - else if( iconfig > (int)mgOnGpu::nconfigSDE ) + // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] + // NB: targetamp is a scalar fptype (not fptype_sv) - iconfig is per-event so we extract the scalar lane from jamp2_sv + fptype targetamp[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) { - printf( "INTERNAL ERROR! Cannot choose an event-by-event random color for channelId=%d (invalid SDE iconfig=%d\n > nconfig=%d)", channelId, iconfig, mgOnGpu::nconfigSDE ); - assert( iconfig <= (int)mgOnGpu::nconfigSDE ); // SANITY CHECK #917 - } - } - fptype_sv targetamp[ncolor] = { 0 }; - // NB (see #877): explicitly use 'icolC' rather than 'icol' to indicate that icolC uses C indexing in [0, N_colors-1] - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp[icolC] = fptype_sv{ 0 }; - else - targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; - } -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - fptype_sv targetamp2[ncolor] = { 0 }; - for( int icolC = 0; icolC < ncolor; icolC++ ) - { - if( icolC == 0 ) - targetamp2[icolC] = fptype_sv{ 0 }; - else - targetamp2[icolC] = targetamp2[icolC - 1]; - // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) - if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; - } + if( icolC == 0 ) + targetamp[icolC] = fptype{ 0 }; + else + targetamp[icolC] = targetamp[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) +#if defined MGONGPU_CPPSIMD + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC][ieppV]; +#else + if( mgOnGpu::icolamp[iconfig - 1][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; #endif - for( int ieppV = 0; ieppV < neppV; ++ieppV ) - { - const int ievt = ievt00 + ieppV; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { -#if defined MGONGPU_CPPSIMD - // Add volatile here to avoid SIGFPE crashes in FPTYPE=f cpp512z builds (#845) - volatile const bool okcol = allrndcol[ievt] < ( targetamp[icolC][ieppV] / targetamp[ncolor - 1][ieppV] ); -#else const bool okcol = allrndcol[ievt] < ( targetamp[icolC] / targetamp[ncolor - 1] ); -#endif if( okcol ) { allselcol[ievt] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] @@ -1349,32 +1339,52 @@ namespace mg5amcCpu break; } } + } + else + { + allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) + } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; + const int ievt2 = ievt00 + ieppV + neppV; + const int igraph1_ievt2 = ( allIgraph != nullptr ) ? allIgraph[ievt2] : 0; // per-event igraph (may differ across SIMD page with MLM) + if( channelId != 0 || igraph1_ievt2 != 0 ) // no event-by-event choice of color if both channelId and igraph are 0 (fix FPE #783) + { + // Determine iconfig2: use per-event MLM graph if provided, otherwise use channel2iconfig + int iconfig2; + if( igraph1_ievt2 != 0 ) + { + iconfig2 = igraph1_ievt2; // use MLM-matched graph directly as iconfig (F-indexed, 1-based) + } + else + { + iconfig2 = mgOnGpu::channel2iconfig[channelId - 1]; // same channelId as for ievt (sanity checks already done above) + } + fptype targetamp2[ncolor] = { 0 }; + for( int icolC = 0; icolC < ncolor; icolC++ ) + { + if( icolC == 0 ) + targetamp2[icolC] = fptype{ 0 }; + else + targetamp2[icolC] = targetamp2[icolC - 1]; + // NB (see #877): in the array icolamp, the input index uses C indexing (iconfig -1) + if( mgOnGpu::icolamp[iconfig2 - 1][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC][ieppV]; + } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt2, allrndcol[ievt2] ); for( int icolC = 0; icolC < ncolor; icolC++ ) { - if( allrndcol[ievt2] < ( targetamp2[icolC][ieppV] / targetamp2[ncolor - 1][ieppV] ) ) + if( allrndcol[ievt2] < ( targetamp2[icolC] / targetamp2[ncolor - 1] ) ) { allselcol[ievt2] = icolC + 1; // NB Fortran [1,ncolor], cudacpp [0,ncolor-1] //printf( "sigmaKin: ievt2=%d icol=%d\n", ievt2, icolC+1 ); break; } } -#endif } - } - else - { - for( int ieppV = 0; ieppV < neppV; ++ieppV ) + else { - const int ievt = ievt00 + ieppV; - allselcol[ievt] = 0; // no color selected in Fortran range [1,ncolor] if both channelId and igraph are 0 (see #931) -#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT - const int ievt2 = ievt00 + ieppV + neppV; allselcol[ievt2] = 0; // no color selected in Fortran range [1,ncolor] if channelId == 0 (see #931) -#endif } +#endif } #endif // multichannel enabled (random color choice) } From c1ff0ad80c6da86c6ad41089a1f76213d678b4b3 Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Thu, 16 Apr 2026 14:53:36 +0200 Subject: [PATCH 14/17] Debug: update workflows to run only failing one and add breakpoint --- .github/workflows/c-cpp.yml | 4 ---- .github/workflows/testsuite_allprocesses.yml | 4 ++-- .github/workflows/testsuite_oneprocess.yml | 8 +++++++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 6636193539..57c221e22a 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -6,10 +6,6 @@ name: C/C++ CI on: - push: - branches: [ master ] - pull_request: - branches: [ master ] workflow_dispatch: jobs: diff --git a/.github/workflows/testsuite_allprocesses.yml b/.github/workflows/testsuite_allprocesses.yml index 28c5b9f9eb..186adb67d2 100644 --- a/.github/workflows/testsuite_allprocesses.yml +++ b/.github/workflows/testsuite_allprocesses.yml @@ -33,8 +33,8 @@ jobs: fail-fast: false # important to see all results even if one fails (fail-fast is true by default) matrix: # FIXME? Can the list of supported processes be specified only once in oneprocess.yml or allprocesses.yml? - process: [ee_mumu, gg_tt, gg_ttg, gg_ttgg, gg_ttggg, gg_tt01g, gq_ttq, pp_tt012j, nobm_pp_ttW, susy_gg_tt, susy_gg_t1t1, smeft_gg_tttt, heft_gg_bb] - suffix: [mad, sa] + process: [pp_tt012j] + suffix: [mad] uses: ./.github/workflows/testsuite_oneprocess.yml with: process: ${{ matrix.process }}.${{ matrix.suffix }} diff --git a/.github/workflows/testsuite_oneprocess.yml b/.github/workflows/testsuite_oneprocess.yml index 478b5eac3c..5b7d907bc2 100644 --- a/.github/workflows/testsuite_oneprocess.yml +++ b/.github/workflows/testsuite_oneprocess.yml @@ -171,7 +171,7 @@ jobs: strategy: fail-fast: false # e.g. do not stop 'd' job if 'f' job has failed (fail-fast is true by default) matrix: - fptype: [d, f, m] + fptype: [d] env: FPTYPE: ${{ matrix.fptype }} @@ -258,6 +258,12 @@ jobs: - name: tput_test run: .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} + - name: Breakpoint + uses: namespacelabs/breakpoint-action@v0 + with: + duration: 30m + authorized-users: oliviermattelaer, qubitol + - name: tmad_test run: .github/workflows/testsuite_oneprocess.sh tmad_test ${{ inputs.process }} From 1bed0fd7752f752f5303a171da8018cdd9d9cfa8 Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Thu, 16 Apr 2026 16:56:06 +0200 Subject: [PATCH 15/17] update the color assignment for MLM --- .github/workflows/testsuite_oneprocess.yml | 12 +++++------- MG5aMC/mg5amcnlo | 2 +- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/testsuite_oneprocess.yml b/.github/workflows/testsuite_oneprocess.yml index 5b7d907bc2..ad30854d88 100644 --- a/.github/workflows/testsuite_oneprocess.yml +++ b/.github/workflows/testsuite_oneprocess.yml @@ -257,13 +257,11 @@ jobs: - name: tput_test run: .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} - - - name: Breakpoint - uses: namespacelabs/breakpoint-action@v0 - with: - duration: 30m - authorized-users: oliviermattelaer, qubitol - + # - name: Breakpoint + # uses: namespacelabs/breakpoint-action@v0 + # with: + # duration: 30m + # authorized-users: oliviermattelaer, qubitol - name: tmad_test run: .github/workflows/testsuite_oneprocess.sh tmad_test ${{ inputs.process }} diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 69b7ec3d48..f0884cb7d1 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 69b7ec3d4839cb59877a877b921a63782a75e2a4 +Subproject commit f0884cb7d1ecb12393ddae54622b2c384bb8e2a8 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 583f3df0c9..b3c0ae1009 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- From 72c860a1970ef3a2695689f91f82004939a023be Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Thu, 16 Apr 2026 18:14:15 +0200 Subject: [PATCH 16/17] Regenerate processes --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 ++++++------ .../ee_mumu.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_epem_mupmum/auto_dsig.f | 4 +++ .../ee_mumu.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 14 +++++----- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 ++++++------- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 4 +++ .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 16 ++++++------ .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++++++------- .../gg_tt01g.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 4 +++ .../SubProcesses/P2_gg_ttxg/auto_dsig.f | 4 +++ .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 2 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++++++------- .../gg_ttg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 4 +++ .../gg_ttg.mad/SubProcesses/cudacpp.mk | 2 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 18 ++++++------- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 ++++++------- .../gg_ttgg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxgg/auto_dsig.f | 4 +++ .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++++++------ .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 24 ++++++++--------- .../gg_ttggg.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxggg/auto_dsig.f | 4 +++ .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 ++++++------ .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 2 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 ++++++++-------- .../gq_ttq.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 4 +++ .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 4 +++ .../gq_ttq.mad/SubProcesses/cudacpp.mk | 2 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 18 ++++++------- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 20 +++++++------- .../heft_gg_bb.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_bbx/auto_dsig.f | 4 +++ .../heft_gg_bb.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 14 +++++----- .../heft_gg_bb.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 22 ++++++++-------- .../nobm_pp_ttW.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_dux_ttxwm/auto_dsig.f | 4 +++ .../SubProcesses/P0_udx_ttxwp/auto_dsig.f | 4 +++ .../SubProcesses/P1_dux_ttxwmg/auto_dsig.f | 4 +++ .../SubProcesses/P1_gd_ttxwmu/auto_dsig.f | 4 +++ .../SubProcesses/P1_gdx_ttxwpux/auto_dsig.f | 4 +++ .../SubProcesses/P1_gu_ttxwpd/auto_dsig.f | 4 +++ .../SubProcesses/P1_gux_ttxwmdx/auto_dsig.f | 4 +++ .../SubProcesses/P1_udx_ttxwpg/auto_dsig.f | 4 +++ .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_pp_tt012j_log.txt | 26 +++++++++---------- .../pp_tt012j.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P0_gg_ttx/auto_dsig.f | 4 +++ .../SubProcesses/P0_uux_ttx/auto_dsig.f | 4 +++ .../SubProcesses/P1_gg_ttxg/auto_dsig.f | 4 +++ .../SubProcesses/P1_gu_ttxu/auto_dsig.f | 4 +++ .../SubProcesses/P1_gux_ttxux/auto_dsig.f | 4 +++ .../SubProcesses/P1_uux_ttxg/auto_dsig.f | 4 +++ .../SubProcesses/P2_gg_ttxgg/auto_dsig.f | 4 +++ .../SubProcesses/P2_gg_ttxuux/auto_dsig.f | 4 +++ .../SubProcesses/P2_gu_ttxgu/auto_dsig.f | 4 +++ .../SubProcesses/P2_gux_ttxgux/auto_dsig.f | 4 +++ .../SubProcesses/P2_uc_ttxuc/auto_dsig.f | 4 +++ .../SubProcesses/P2_ucx_ttxucx/auto_dsig.f | 4 +++ .../SubProcesses/P2_uu_ttxuu/auto_dsig.f | 4 +++ .../SubProcesses/P2_uux_ttxccx/auto_dsig.f | 4 +++ .../SubProcesses/P2_uux_ttxgg/auto_dsig.f | 4 +++ .../SubProcesses/P2_uux_ttxuux/auto_dsig.f | 4 +++ .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f | 4 +++ .../SubProcesses/P2_uxux_ttxuxux/auto_dsig.f | 4 +++ .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 22 ++++++++-------- .../smeft_gg_tttt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttxttx/auto_dsig.f | 4 +++ .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 ++++++------ .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 16 ++++++------ .../susy_gg_t1t1.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_t1t1x/auto_dsig.f | 4 +++ .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 14 +++++----- .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 18 ++++++------- .../susy_gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 4 +++ .../susy_gg_tt.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 16 ++++++------ .../susy_gg_tt.sa/SubProcesses/cudacpp.mk | 2 +- 98 files changed, 402 insertions(+), 246 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 2895cc529b..07988bc235 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003197908401489258  +DEBUG: model prefixing takes 0.003152132034301758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,18 +178,18 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.057 s +Wrote files for 8 helas calls in 0.058 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.126 s +ALOHA: aloha creates 3 routines in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.175 s +ALOHA: aloha creates 7 routines in 0.189 s FFV1 FFV1 FFV2 @@ -220,9 +220,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.099s -user 0m1.796s -sys 0m0.279s +real 0m2.243s +user 0m1.791s +sys 0m0.326s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index bdf200888e..b3ab00b31d 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f index a21b9558bb..1e083ecd15 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index d1896b428c..ff0409b8f3 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0035784244537353516  +DEBUG: model prefixing takes 0.004814624786376953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -167,13 +167,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.172 s +ALOHA: aloha creates 4 routines in 0.180 s FFV1 FFV1 FFV2 @@ -192,7 +192,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.621s -user 0m0.533s -sys 0m0.073s +real 0m0.814s +user 0m0.613s +sys 0m0.104s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 0c0065b306..7ef50d8b31 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0035247802734375  +DEBUG: model prefixing takes 0.0038509368896484375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.007 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -179,15 +179,15 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.118 s +Wrote files for 10 helas calls in 0.066 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.106 s +ALOHA: aloha creates 2 routines in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.088 s +ALOHA: aloha creates 4 routines in 0.160 s VVV1 FFV1 FFV1 @@ -214,9 +214,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.327s -user 0m1.937s -sys 0m0.356s +real 0m2.279s +user 0m1.921s +sys 0m0.327s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index c233771f8c..38c1f98839 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 0ee3fad958..6c6b37db2c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 9e55b4f43b..ddc402e0ef 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0032906532287597656  +DEBUG: model prefixing takes 0.0030803680419921875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.006 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,11 +168,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.099 s +ALOHA: aloha creates 2 routines in 0.096 s VVV1 FFV1 FFV1 @@ -187,7 +187,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.546s -user 0m0.478s -sys 0m0.063s +real 0m0.485s +user 0m0.432s +sys 0m0.048s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index a35d3e30c5..05e779eedd 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0033724308013916016  +DEBUG: model prefixing takes 0.0031867027282714844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -199,22 +199,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s -Wrote files for 46 helas calls in 0.173 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.039 s +Wrote files for 46 helas calls in 0.166 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.211 s +ALOHA: aloha creates 5 routines in 0.229 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.178 s +ALOHA: aloha creates 10 routines in 0.167 s VVV1 VVV1 FFV1 @@ -246,10 +246,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.633s -user 0m2.230s -sys 0m0.370s -Code generation completed in 2 seconds +real 0m2.606s +user 0m2.225s +sys 0m0.354s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index ea8ca8db1f..de51114026 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 0ee3fad958..6c6b37db2c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f index ba1f735272..4282896667 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 8b226dca47..68dda708e1 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0031692981719970703  +DEBUG: model prefixing takes 0.003030061721801758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.018 s +1 processes with 16 diagrams generated in 0.016 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.031 s -Wrote files for 36 helas calls in 0.107 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.029 s +Wrote files for 36 helas calls in 0.087 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.224 s +ALOHA: aloha creates 5 routines in 0.247 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.189 s +ALOHA: aloha creates 10 routines in 0.169 s VVV1 VVV1 FFV1 @@ -225,9 +225,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.578s -user 0m2.210s -sys 0m0.342s +real 0m2.378s +user 0m2.056s +sys 0m0.297s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index 9be80ae5a4..444f1253f8 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index e596678cdc..6dfa640d9e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 6285c8742e..234a77535f 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003176450729370117  +DEBUG: model prefixing takes 0.0030896663665771484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.017 s +1 processes with 16 diagrams generated in 0.018 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.035 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.184 s VVV1 VVV1 FFV1 @@ -195,7 +195,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.890s -user 0m0.803s -sys 0m0.077s -Code generation completed in 1 seconds +real 0m0.672s +user 0m0.601s +sys 0m0.061s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 859a79a2b4..d4f521efbc 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003329753875732422  +DEBUG: model prefixing takes 0.003898143768310547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,22 +178,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.367 s -Wrote files for 222 helas calls in 0.514 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.299 s +Wrote files for 222 helas calls in 0.469 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.218 s +ALOHA: aloha creates 5 routines in 0.184 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.236 s +ALOHA: aloha creates 10 routines in 0.237 s VVV1 VVV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.680s -user 0m3.301s -sys 0m0.345s +real 0m3.393s +user 0m3.029s +sys 0m0.328s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index 8eeac7b204..5ffde659c4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f index ccb857518f..60ddbdbedd 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 41182c8dfd..a4e84691b5 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0036923885345458984  +DEBUG: model prefixing takes 0.003270387649536133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.128 s +1 processes with 123 diagrams generated in 0.120 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.422 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.290 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.226 s +ALOHA: aloha creates 5 routines in 0.177 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.340s -user 0m1.247s -sys 0m0.075s +real 0m1.173s +user 0m1.082s +sys 0m0.081s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 5c8dc17558..d6ebcb3812 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.003306865692138672  +DEBUG: model prefixing takes 0.0031838417053222656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.710 s +1 processes with 1240 diagrams generated in 1.458 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,7 +169,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 6s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 5s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1314]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h @@ -180,22 +180,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.291 s -Wrote files for 2281 helas calls in 13.681 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 4.918 s +Wrote files for 2281 helas calls in 11.612 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.304 s +ALOHA: aloha creates 5 routines in 0.257 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.238 s +ALOHA: aloha creates 10 routines in 0.168 s VVV1 VVV1 FFV1 @@ -230,10 +230,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m26.984s -user 0m26.323s -sys 0m0.498s -Code generation completed in 27 seconds +real 0m23.511s +user 0m22.940s +sys 0m0.432s +Code generation completed in 24 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index d77e39cc0c..e1b2f3835b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f index 89a856a460..7a436cbd5c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 03f105764b..bbd14da044 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -56,7 +56,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0037555694580078125  +DEBUG: model prefixing takes 0.0028781890869140625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.867 s +1 processes with 1240 diagrams generated in 1.565 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -168,14 +168,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.295 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 4.749 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.240 s +ALOHA: aloha creates 5 routines in 0.233 s VVV1 VVV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m10.649s -user 0m10.454s -sys 0m0.140s +real 0m9.477s +user 0m9.321s +sys 0m0.113s Code generation completed in 10 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 8dd042e628..37c4a61024 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0034317970275878906  +DEBUG: model prefixing takes 0.0028390884399414062  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.054 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -212,16 +212,16 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1749]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.026 s -Wrote files for 32 helas calls in 0.129 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.023 s +Wrote files for 32 helas calls in 0.115 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.114 s +ALOHA: aloha creates 2 routines in 0.080 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.081 s +ALOHA: aloha creates 4 routines in 0.065 s FFV1 FFV1 FFV1 @@ -249,10 +249,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.447s -user 0m2.884s -sys 0m0.446s -Code generation completed in 4 seconds +real 0m1.991s +user 0m1.660s +sys 0m0.309s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index 568c75e0e2..7ba8666046 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 85c5157797..37932e73a3 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index 8d9f98f8ac..748758b702 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index b4a42bd09e..e81cd2427b 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0031960010528564453  +DEBUG: model prefixing takes 0.0030303001403808594  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,7 +164,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.108 s +8 processes with 40 diagrams generated in 0.057 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -200,11 +200,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.027 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.023 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.093 s +ALOHA: aloha creates 2 routines in 0.082 s FFV1 FFV1 FFV1 @@ -220,7 +220,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.664s -user 0m0.581s -sys 0m0.074s -Code generation completed in 1 seconds +real 0m0.560s +user 0m0.488s +sys 0m0.060s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 6b28ca9564..2fdbc1fd4e 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.005 s +1 processes with 4 diagrams generated in 0.004 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -150,20 +150,20 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: len(subproc_diagrams_for_config) =  4 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s -Wrote files for 12 helas calls in 0.063 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s +Wrote files for 12 helas calls in 0.067 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.194 s +ALOHA: aloha creates 4 routines in 0.136 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.202 s +ALOHA: aloha creates 8 routines in 0.119 s VVS3 VVV1 FFV1 @@ -192,10 +192,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.521s -user 0m2.176s -sys 0m0.319s -Code generation completed in 3 seconds +real 0m1.949s +user 0m1.650s +sys 0m0.283s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat index 0a3868c028..5c112346ee 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f index f966affe59..3130417167 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index cebb600dfc..232a901b2a 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -121,7 +121,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.005 s +1 processes with 4 diagrams generated in 0.004 s Total: 1 processes with 4 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -140,13 +140,13 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. -Generated helas calls for 1 subprocesses (4 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (4 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.165 s +ALOHA: aloha creates 4 routines in 0.143 s VVS3 VVV1 FFV1 @@ -163,7 +163,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.573s -user 0m0.497s -sys 0m0.068s +real 0m0.486s +user 0m0.429s +sys 0m0.050s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 64af513301..dd7b11482d 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0030832290649414062  +DEBUG: model prefixing takes 0.003126382827758789  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -179,7 +179,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.090 s +4 processes with 8 diagrams generated in 0.080 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -221,7 +221,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.542 s +12 processes with 144 diagrams generated in 0.478 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -349,18 +349,18 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1749]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.149 s -Wrote files for 212 helas calls in 0.683 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.174 s +Wrote files for 212 helas calls in 0.602 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.119 s +ALOHA: aloha creates 3 routines in 0.123 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.117 s +ALOHA: aloha creates 6 routines in 0.154 s FFV1 FFV1 FFV1 @@ -390,9 +390,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.610s -user 0m3.960s -sys 0m0.592s +real 0m4.432s +user 0m3.751s +sys 0m0.613s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat index 03f015e1cd..981120a965 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f index 0dc54b5499..1ed73392bb 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f index b202ccafd8..f4a45af3d4 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f index 98bf3c96c9..c9599a9732 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f index 31d9da001c..461b2a1f4c 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f index 1c15ce9fb2..345754eb7e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f index ecbd058457..ab11a90fc1 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f index c0f3f25286..57090d058f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f index 81ecfc876a..3b09bc2ba0 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f @@ -1241,6 +1241,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index b135c17f32..7a4d32d322 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -55,7 +55,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0034799575805664062  +DEBUG: model prefixing takes 0.0036203861236572266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,7 +166,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.024 s +5 processes with 7 diagrams generated in 0.034 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -206,7 +206,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.106 s +13 processes with 76 diagrams generated in 0.141 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -372,7 +372,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.517 s +65 processes with 1119 diagrams generated in 1.456 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -687,22 +687,22 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1749]  -Generated helas calls for 18 subprocesses (372 diagrams) in 0.974 s -Wrote files for 810 helas calls in 2.288 s +Generated helas calls for 18 subprocesses (372 diagrams) in 0.958 s +Wrote files for 810 helas calls in 2.060 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.181 s +ALOHA: aloha creates 5 routines in 0.223 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.255 s +ALOHA: aloha creates 10 routines in 0.209 s VVV1 VVV1 FFV1 @@ -737,10 +737,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.061s -user 0m8.893s -sys 0m1.041s -Code generation completed in 11 seconds +real 0m9.575s +user 0m8.442s +sys 0m1.003s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index e56d77408c..86d647aa4d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f index 3d6273e9de..690f872c32 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f index 86d509e240..b34048e29d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index e596678cdc..6dfa640d9e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 85c5157797..37932e73a3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index 8d9f98f8ac..748758b702 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f index a9cb66d424..01735be0d3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f index 586a8a369d..e7f590a087 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f index 122d95ac63..5456c9a1d1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f index 7b3432104c..3edd289da8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f index 6cf808b1bf..c858b2c684 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f index 0cf8140c5e..37ff46da63 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f @@ -1245,6 +1245,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f index 1a3666b37c..bfc32bbd25 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f @@ -1251,6 +1251,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f index 26bb17ea64..39a81a621a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f index bf1d38788c..fb7d3f331a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f @@ -1251,6 +1251,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f index 4eb3d980ad..0441af0818 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f index 5bc3293585..016741f374 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f index ddd7ababe0..f93d884900 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f @@ -1245,6 +1245,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f index 932747b697..e28b5f2e76 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f @@ -1243,6 +1243,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 2038fc26a8..a11d2de469 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.05546402931213379  +DEBUG: model prefixing takes 0.04707622528076172  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.878 s +1 processes with 72 diagrams generated in 2.609 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -115,22 +115,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.189 s -Wrote files for 119 helas calls in 0.289 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.124 s +Wrote files for 119 helas calls in 0.265 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.179 s +ALOHA: aloha creates 5 routines in 0.209 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.203 s +ALOHA: aloha creates 10 routines in 0.190 s VVV5 VVV5 FFV1 @@ -162,10 +162,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m6.256s -user 0m5.835s -sys 0m0.356s -Code generation completed in 7 seconds +real 0m5.754s +user 0m5.380s +sys 0m0.307s +Code generation completed in 6 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat index 2b50d9cbfd..9f01c208e8 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f index 0858a86382..b68b2dd12c 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 783771ed66..d4f6b71098 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -71,7 +71,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.06393218040466309  +DEBUG: model prefixing takes 0.05528879165649414  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -86,7 +86,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 2.815 s +1 processes with 72 diagrams generated in 2.631 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -105,14 +105,14 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.137 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.125 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.186 s +ALOHA: aloha creates 5 routines in 0.224 s VVV5 VVV5 FFV1 @@ -132,7 +132,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m4.030s -user 0m3.932s -sys 0m0.073s +real 0m3.755s +user 0m3.653s +sys 0m0.071s Code generation completed in 4 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index e8531e68d4..55b4cd592c 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.096 s +1 processes with 6 diagrams generated in 0.090 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -578,17 +578,17 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1749]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s -Wrote files for 16 helas calls in 0.087 s +Wrote files for 16 helas calls in 0.060 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.105 s +ALOHA: aloha creates 3 routines in 0.089 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.105 s +ALOHA: aloha creates 6 routines in 0.092 s VVV1 VSS1 VSS1 @@ -616,9 +616,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.959s -user 0m2.602s -sys 0m0.336s +real 0m2.497s +user 0m2.197s +sys 0m0.282s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat index e242e6277f..f07e5631fd 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f index b109d1e601..fe7a4274ea 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index d8ff800cdc..245a1c8d30 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.099 s +1 processes with 6 diagrams generated in 0.087 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,12 +567,12 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. -Generated helas calls for 1 subprocesses (6 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.112 s +ALOHA: aloha creates 3 routines in 0.099 s VVV1 VSS1 VSS1 @@ -588,7 +588,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.212s -user 0m1.118s -sys 0m0.083s +real 0m1.068s +user 0m0.985s +sys 0m0.076s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 8249a679e1..6154e42325 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.094 s +1 processes with 3 diagrams generated in 0.084 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,16 +577,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1724]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1748]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1749]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s -Wrote files for 10 helas calls in 0.068 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Wrote files for 10 helas calls in 0.055 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.096 s +ALOHA: aloha creates 2 routines in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.080 s +ALOHA: aloha creates 4 routines in 0.067 s VVV1 FFV1 FFV1 @@ -613,9 +613,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.100s -user 0m2.697s -sys 0m0.373s +real 0m2.530s +user 0m2.212s +sys 0m0.292s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat index a41c9bfc68..3c7c799a87 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat @@ -9,7 +9,7 @@ #* * #* * #* VERSION 3.7.0 2026-01-05 * -#* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +#* GIT r991-8-gf0884cb7d HEAD * #* * #* The MadGraph5_aMC@NLO Development Team - Find us at * #* https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index 0ee3fad958..6c6b37db2c 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -1240,6 +1240,10 @@ SUBROUTINE SELECT_COLOR(RCOL, JAMP2, ICONFIG, IPROC, ICOL, IVEC) ICONFIG = IGRAPHS(1) ELSE ICONFIG = VEC_IGRAPH(IVEC) + IF(ICONFIG.EQ.0)THEN + ICOL =0 + RETURN + ENDIF ENDIF ENDIF diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 241396992d..fd66e883f7 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -15,7 +15,7 @@ It has been validated for the last time with version: 3.6.5 * * * * * * * VERSION 3.7.0 2026-01-05 * -* GIT r991-7-g69b7ec3d4 copilot/fix-mlm-issue-phase-space * +* GIT r991-8-gf0884cb7d HEAD * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * http://madgraph.phys.ucl.ac.be/ * @@ -548,7 +548,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.076 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -567,11 +567,11 @@ INFO: Creating files in directory /home/dmass/Development/madgraph4gpu/copilot-i FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h FileWriter for /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.004 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.082 s +ALOHA: aloha creates 2 routines in 0.071 s VVV1 FFV1 FFV1 @@ -586,7 +586,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /home/dmass/Development/madgraph4gpu/copilot-igraph/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.283s -user 0m1.180s -sys 0m0.084s -Code generation completed in 2 seconds +real 0m0.923s +user 0m0.829s +sys 0m0.081s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index 7969c42777..4cf05f5642 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -89,7 +89,7 @@ endif # Create file with the resolved backend in case user chooses 'cppauto' BACKEND_LOG ?= .resolved-backend ifneq ($(BACKEND_ORIG),$(BACKEND)) - $(file >$(BACKEND_LOG),$(BACKEND)) + $(shell echo '$(BACKEND)' >> $(BACKEND_LOG)) endif #------------------------------------------------------------------------------- From b4c5b7cf71cca4d637efa7c5c7d2027efe15752f Mon Sep 17 00:00:00 2001 From: Daniele Massaro Date: Thu, 16 Apr 2026 18:18:21 +0200 Subject: [PATCH 17/17] Revert "Debug: update workflows to run only failing one and add breakpoint" This reverts commit c1ff0ad80c6da86c6ad41089a1f76213d678b4b3. --- .github/workflows/c-cpp.yml | 4 ++++ .github/workflows/testsuite_allprocesses.yml | 4 ++-- .github/workflows/testsuite_oneprocess.yml | 8 ++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 57c221e22a..6636193539 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -6,6 +6,10 @@ name: C/C++ CI on: + push: + branches: [ master ] + pull_request: + branches: [ master ] workflow_dispatch: jobs: diff --git a/.github/workflows/testsuite_allprocesses.yml b/.github/workflows/testsuite_allprocesses.yml index 186adb67d2..28c5b9f9eb 100644 --- a/.github/workflows/testsuite_allprocesses.yml +++ b/.github/workflows/testsuite_allprocesses.yml @@ -33,8 +33,8 @@ jobs: fail-fast: false # important to see all results even if one fails (fail-fast is true by default) matrix: # FIXME? Can the list of supported processes be specified only once in oneprocess.yml or allprocesses.yml? - process: [pp_tt012j] - suffix: [mad] + process: [ee_mumu, gg_tt, gg_ttg, gg_ttgg, gg_ttggg, gg_tt01g, gq_ttq, pp_tt012j, nobm_pp_ttW, susy_gg_tt, susy_gg_t1t1, smeft_gg_tttt, heft_gg_bb] + suffix: [mad, sa] uses: ./.github/workflows/testsuite_oneprocess.yml with: process: ${{ matrix.process }}.${{ matrix.suffix }} diff --git a/.github/workflows/testsuite_oneprocess.yml b/.github/workflows/testsuite_oneprocess.yml index ad30854d88..478b5eac3c 100644 --- a/.github/workflows/testsuite_oneprocess.yml +++ b/.github/workflows/testsuite_oneprocess.yml @@ -171,7 +171,7 @@ jobs: strategy: fail-fast: false # e.g. do not stop 'd' job if 'f' job has failed (fail-fast is true by default) matrix: - fptype: [d] + fptype: [d, f, m] env: FPTYPE: ${{ matrix.fptype }} @@ -257,11 +257,7 @@ jobs: - name: tput_test run: .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} - # - name: Breakpoint - # uses: namespacelabs/breakpoint-action@v0 - # with: - # duration: 30m - # authorized-users: oliviermattelaer, qubitol + - name: tmad_test run: .github/workflows/testsuite_oneprocess.sh tmad_test ${{ inputs.process }}