summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 6878744496cf..57b757c990e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -100,6 +100,26 @@ public:
/// be converted to integer, or violate subtarget's specifications.
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+ /// \returns The required size of workgroups that will be used to execute \p F
+ /// in the \p Dim dimension, if it is known (from `!reqd_work_group_size`
+ /// metadata. Otherwise, returns std::nullopt.
+ std::optional<unsigned> getReqdWorkGroupSize(const Function &F,
+ unsigned Dim) const;
+
+ /// \returns true if \p F will execute in a manner that leaves the X
+ /// dimensions of the workitem ID evenly tiling wavefronts - that is, if X /
+ /// wavefrontsize is uniform. This is true if either the Y and Z block
+ /// dimensions are known to always be 1 or if the X dimension will always be a
+ /// power of 2. If \p RequireUniformYZ is true, it also ensures that the Y and
+ /// Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with
+ /// wavesize64 would ordinarily pass this test, it won't with
+ /// \pRequiresUniformYZ).
+ ///
+ /// This information is currently only gathered from the !reqd_work_group_size
+ /// metadata on \p F, but this may be improved in the future.
+ bool hasWavefrontsEvenlySplittingXDim(const Function &F,
+ bool REquiresUniformYZ = false) const;
+
/// \returns Subtarget's default pair of minimum/maximum number of waves per
/// execution unit for function \p F, or minimum/maximum number of waves per
/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute