Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1605,6 +1605,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
- 0x0100: All DS read instructions may be scheduled across sched_barrier.
- 0x0200: All DS write instructions may be scheduled across sched_barrier.
- 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across sched_barrier.
- 0x0800: All LDSDMA instructions may be scheduled across sched_barrier.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This interface is ludicrous and will require making up new arbitrary masks forever


llvm.amdgcn.sched.group.barrier Creates schedule groups with specific properties to create custom scheduling
pipelines. The ordering between groups is enforced by the instruction scheduler.
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">,
// MASK = 0x0000 0080: ALL DS instructions may be scheduled across SCHED_BARRIER.
// MASK = 0x0000 0100: ALL DS read instructions may be scheduled accoss SCHED_BARRIER.
// MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER.
// MASK = 0x0000 0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across SCHED_BARRIER.
// MASK = 0x0000 0800: All LDSDMA instructions may be scheduled across SCHED_BARRIER.
def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
Expand Down
20 changes: 14 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ enum class SchedGroupMask {
DS_READ = 1u << 8,
DS_WRITE = 1u << 9,
TRANS = 1u << 10,
LDSDMA = 1u << 11,
ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS |
DS_READ | DS_WRITE | TRANS,
DS_READ | DS_WRITE | TRANS | LDSDMA,
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
};

Expand Down Expand Up @@ -2471,7 +2472,7 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
Result = true;

else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
TII->isVMEM(MI))
(TII->isVMEM(MI) || TII->isLDSDMA(MI)))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
Expand All @@ -2498,6 +2499,10 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
TII->isTRANS(MI))
Result = true;

else if (((SGMask & SchedGroupMask::LDSDMA) != SchedGroupMask::NONE) &&
TII->isLDSDMA(MI))
Result = true;

LLVM_DEBUG(
dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true)
<< (Result ? " could classify " : " unable to classify ") << MI);
Expand Down Expand Up @@ -2663,12 +2668,15 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
(InvertedMask & SchedGroupMask::TRANS) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::ALU;

// VMEM implies VMEM_READ, VMEM_WRITE.
// VMEM implies VMEM_READ, VMEM_WRITE, LDSDMA.
if ((InvertedMask & SchedGroupMask::VMEM) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::VMEM_READ & ~SchedGroupMask::VMEM_WRITE;
// VMEM_READ, VMEM_WRITE implies VMEM.
InvertedMask &= ~SchedGroupMask::VMEM_READ & ~SchedGroupMask::VMEM_WRITE &
~SchedGroupMask::LDSDMA;
// VMEM_READ, VMEM_WRITE, LDSDMA implies VMEM.
else if ((InvertedMask & SchedGroupMask::VMEM_READ) == SchedGroupMask::NONE ||
(InvertedMask & SchedGroupMask::VMEM_WRITE) == SchedGroupMask::NONE)
(InvertedMask & SchedGroupMask::VMEM_WRITE) ==
SchedGroupMask::NONE ||
(InvertedMask & SchedGroupMask::LDSDMA) == SchedGroupMask::NONE)
InvertedMask &= ~SchedGroupMask::VMEM;

// DS implies DS_READ, DS_WRITE.
Expand Down
Loading