diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/TargetParser | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/TargetParser')
| -rw-r--r-- | llvm/lib/TargetParser/Host.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/TargetParser/RISCVTargetParser.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/TargetParser/TargetParser.cpp | 660 | ||||
| -rw-r--r-- | llvm/lib/TargetParser/Triple.cpp | 101 | ||||
| -rw-r--r-- | llvm/lib/TargetParser/X86TargetParser.cpp | 40 |
5 files changed, 408 insertions, 412 deletions
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 24827537eb19..63848160636a 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1396,7 +1396,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) { setFeature(X86::FEATURE_AVX512F); - setFeature(X86::FEATURE_EVEX512); } if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512DQ); @@ -2063,8 +2062,6 @@ StringMap<bool> sys::getHostCPUFeatures() { Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; - if (Features["avx512f"]) - Features["evex512"] = true; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); @@ -2176,11 +2173,8 @@ StringMap<bool> sys::getHostCPUFeatures() { MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); int AVX10Ver = HasLeaf24 && (EBX & 0xff); - int Has512Len = HasLeaf24 && ((EBX >> 18) & 1); - Features["avx10.1-256"] = HasAVX10 && AVX10Ver >= 1; - Features["avx10.1-512"] = HasAVX10 && AVX10Ver >= 1 && Has512Len; - Features["avx10.2-256"] = HasAVX10 && AVX10Ver >= 2; - Features["avx10.2-512"] = HasAVX10 && AVX10Ver >= 2 && Has512Len; + Features["avx10.1"] = HasAVX10 && AVX10Ver >= 1; + Features["avx10.2"] = HasAVX10 && AVX10Ver >= 2; return Features; } diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp index 9957ec0c28d8..b53a1b95431a 100644 --- a/llvm/lib/TargetParser/RISCVTargetParser.cpp +++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp @@ -153,12 +153,13 @@ namespace RISCVVType { // // Bits | Name | Description // -----+------------+------------------------------------------------ +// 8 | altfmt | Alternative format for bf16 // 7 | vma | Vector mask agnostic // 6 | vta | Vector tail agnostic // 5:3 | vsew[2:0] | Standard element width (SEW) setting // 2:0 | vlmul[2:0] | Vector register group multiplier (LMUL) setting unsigned encodeVTYPE(VLMUL VLMul, unsigned SEW, bool TailAgnostic, - bool MaskAgnostic) { + bool MaskAgnostic, bool AltFmt) { assert(isValidSEW(SEW) && "Invalid SEW"); unsigned VLMulBits = static_cast<unsigned>(VLMul); unsigned VSEWBits = encodeSEW(SEW); @@ -167,6 +168,8 @@ unsigned encodeVTYPE(VLMUL VLMul, unsigned SEW, bool TailAgnostic, VTypeI |= 0x40; if (MaskAgnostic) VTypeI |= 0x80; + if (AltFmt) + VTypeI |= 0x100; return VTypeI; } @@ -200,6 +203,10 @@ void printVType(unsigned VType, raw_ostream &OS) { unsigned Sew = getSEW(VType); OS << "e" << Sew; + bool AltFmt = RISCVVType::isAltFmt(VType); + if (AltFmt) + OS << "alt"; + unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = decodeVLMUL(getVLMUL(VType)); diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 50b97d325754..2194ef4df14d 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -364,8 +364,326 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); } -void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { +static std::pair<FeatureError, StringRef> +insertWaveSizeFeature(StringRef GPU, const Triple &T, + const StringMap<bool> &DefaultFeatures, + StringMap<bool> &Features) { + const bool IsNullGPU = GPU.empty(); + const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32"); + const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64"); + const bool HaveWave32 = Features.count("wavefrontsize32"); + const bool HaveWave64 = Features.count("wavefrontsize64"); + if (HaveWave32 && HaveWave64) + return {AMDGPU::INVALID_FEATURE_COMBINATION, + "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; + + if (HaveWave32 && !IsNullGPU && TargetHasWave64) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; + + if (HaveWave64 && !IsNullGPU && TargetHasWave32) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"}; + + // Don't assume any wavesize with an unknown subtarget. + // Default to wave32 if target supports both. + if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 && + !TargetHasWave64) + Features.insert(std::make_pair("wavefrontsize32", true)); + + for (const auto &Entry : DefaultFeatures) { + if (!Features.count(Entry.getKey())) + Features[Entry.getKey()] = Entry.getValue(); + } + + return {NO_ERROR, StringRef()}; +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { + AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); + switch (Kind) { + case GK_GFX1250: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["gfx1250-insts"] = true; + Features["bitop3-insts"] = true; + Features["prng-inst"] = true; + Features["tanh-insts"] = true; + Features["tensor-cvt-lut-insts"] = true; + Features["transpose-load-f4f6-insts"] = true; + Features["bf16-trans-insts"] = true; + Features["bf16-cvt-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["fp8e5m3-insts"] = true; + Features["permlane16-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["vmem-pref-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["setprio-inc-wg-inst"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize32"] = true; + break; + case GK_GFX1201: + case GK_GFX1200: + case GK_GFX12_GENERIC: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot11-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1153: + case GK_GFX1152: + case GK_GFX1151: + case GK_GFX1150: + case GK_GFX1103: + case GK_GFX1102: + case GK_GFX1101: + case GK_GFX1100: + case GK_GFX11_GENERIC: + Features["ci-insts"] = true; + Features["dot5-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1036: + case GK_GFX1035: + case GK_GFX1034: + case GK_GFX1033: + case GK_GFX1032: + case GK_GFX1031: + case GK_GFX1030: + case GK_GFX10_3_GENERIC: + Features["ci-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX1012: + case GK_GFX1011: + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX1013: + case GK_GFX1010: + case GK_GFX10_1_GENERIC: + Features["dl-insts"] = true; + Features["ci-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX950: + Features["bitop3-insts"] = true; + Features["fp6bf6-cvt-scale-insts"] = true; + Features["fp4-cvt-scale-insts"] = true; + Features["bf8-cvt-scale-insts"] = true; + Features["fp8-cvt-scale-insts"] = true; + Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; + Features["f32-to-f16bf16-cvt-sr-insts"] = true; + Features["prng-inst"] = true; + Features["permlane16-swap"] = true; + Features["permlane32-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["dot12-insts"] = true; + Features["dot13-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["gfx950-insts"] = true; + [[fallthrough]]; + case GK_GFX942: + Features["fp8-insts"] = true; + Features["fp8-conversion-insts"] = true; + if (Kind != GK_GFX950) + Features["xf32-insts"] = true; + [[fallthrough]]; + case GK_GFX9_4_GENERIC: + Features["gfx940-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX90A: + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + [[fallthrough]]; + case GK_GFX908: + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + [[fallthrough]]; + case GK_GFX906: + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX90C: + case GK_GFX909: + case GK_GFX904: + case GK_GFX902: + case GK_GFX900: + case GK_GFX9_GENERIC: + Features["gfx9-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; + [[fallthrough]]; + case GK_GFX810: + case GK_GFX805: + case GK_GFX803: + case GK_GFX802: + case GK_GFX801: + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX705: + case GK_GFX704: + case GK_GFX703: + case GK_GFX702: + case GK_GFX701: + case GK_GFX700: + Features["ci-insts"] = true; + [[fallthrough]]; + case GK_GFX602: + case GK_GFX601: + case GK_GFX600: + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_NONE: + break; + default: + llvm_unreachable("Unhandled GPU!"); + } +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +std::pair<FeatureError, StringRef> +AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { // XXX - What does the member GPU mean if device name string passed here? if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) { // AMDGCN SPIRV must support the union of all AMDGCN features. This list @@ -434,276 +752,9 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; } else if (T.isAMDGCN()) { - AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); - switch (Kind) { - case GK_GFX1250: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["gfx1250-insts"] = true; - Features["bitop3-insts"] = true; - Features["prng-inst"] = true; - Features["tanh-insts"] = true; - Features["tensor-cvt-lut-insts"] = true; - Features["transpose-load-f4f6-insts"] = true; - Features["bf16-trans-insts"] = true; - Features["bf16-cvt-insts"] = true; - Features["fp8-conversion-insts"] = true; - Features["fp8e5m3-insts"] = true; - Features["permlane16-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["vmem-pref-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["setprio-inc-wg-inst"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX1201: - case GK_GFX1200: - case GK_GFX12_GENERIC: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot11-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["fp8-conversion-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - break; - case GK_GFX1153: - case GK_GFX1152: - case GK_GFX1151: - case GK_GFX1150: - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - case GK_GFX11_GENERIC: - Features["ci-insts"] = true; - Features["dot5-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["gws"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - break; - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - case GK_GFX10_3_GENERIC: - Features["ci-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX1012: - case GK_GFX1011: - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX1013: - case GK_GFX1010: - case GK_GFX10_1_GENERIC: - Features["dl-insts"] = true; - Features["ci-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX950: - Features["bitop3-insts"] = true; - Features["fp6bf6-cvt-scale-insts"] = true; - Features["fp4-cvt-scale-insts"] = true; - Features["bf8-cvt-scale-insts"] = true; - Features["fp8-cvt-scale-insts"] = true; - Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; - Features["f32-to-f16bf16-cvt-sr-insts"] = true; - Features["prng-inst"] = true; - Features["permlane16-swap"] = true; - Features["permlane32-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["dot12-insts"] = true; - Features["dot13-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["gfx950-insts"] = true; - [[fallthrough]]; - case GK_GFX942: - Features["fp8-insts"] = true; - Features["fp8-conversion-insts"] = true; - if (Kind != GK_GFX950) - Features["xf32-insts"] = true; - [[fallthrough]]; - case GK_GFX9_4_GENERIC: - Features["gfx940-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - Features["ci-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX90A: - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - [[fallthrough]]; - case GK_GFX908: - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - [[fallthrough]]; - case GK_GFX906: - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX90C: - case GK_GFX909: - case GK_GFX904: - case GK_GFX902: - case GK_GFX900: - case GK_GFX9_GENERIC: - Features["gfx9-insts"] = true; - Features["vmem-to-lds-load-insts"] = true; - [[fallthrough]]; - case GK_GFX810: - case GK_GFX805: - case GK_GFX803: - case GK_GFX802: - case GK_GFX801: - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - Features["ci-insts"] = true; - Features["image-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - break; - case GK_GFX705: - case GK_GFX704: - case GK_GFX703: - case GK_GFX702: - case GK_GFX701: - case GK_GFX700: - Features["ci-insts"] = true; - [[fallthrough]]; - case GK_GFX602: - case GK_GFX601: - case GK_GFX600: - Features["image-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_NONE: - break; - default: - llvm_unreachable("Unhandled GPU!"); - } + StringMap<bool> DefaultFeatures; + fillAMDGCNFeatureMap(GPU, T, DefaultFeatures); + return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features); } else { if (GPU.empty()) GPU = "r600"; @@ -732,70 +783,5 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, llvm_unreachable("Unhandled GPU!"); } } -} - -static bool isWave32Capable(StringRef GPU, const Triple &T) { - bool IsWave32Capable = false; - // XXX - What does the member GPU mean if device name string passed here? - if (T.isAMDGCN()) { - switch (parseArchAMDGCN(GPU)) { - case GK_GFX1250: - case GK_GFX1201: - case GK_GFX1200: - case GK_GFX1153: - case GK_GFX1152: - case GK_GFX1151: - case GK_GFX1150: - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - case GK_GFX1012: - case GK_GFX1011: - case GK_GFX1013: - case GK_GFX1010: - case GK_GFX12_GENERIC: - case GK_GFX11_GENERIC: - case GK_GFX10_3_GENERIC: - case GK_GFX10_1_GENERIC: - IsWave32Capable = true; - break; - default: - break; - } - } - return IsWave32Capable; -} - -std::pair<FeatureError, StringRef> -AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { - bool IsWave32Capable = isWave32Capable(GPU, T); - const bool IsNullGPU = GPU.empty(); - const bool HaveWave32 = Features.count("wavefrontsize32"); - const bool HaveWave64 = Features.count("wavefrontsize64"); - if (HaveWave32 && HaveWave64) { - return {AMDGPU::INVALID_FEATURE_COMBINATION, - "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; - } - if (HaveWave32 && !IsNullGPU && !IsWave32Capable) { - return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; - } - // Don't assume any wavesize with an unknown subtarget. - if (!IsNullGPU) { - // Default to wave32 if available, or wave64 if not - if (!HaveWave32 && !HaveWave64) { - StringRef DefaultWaveSizeFeature = - IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; - Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); - } - } return {NO_ERROR, StringRef()}; } diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 6acb0bc49ecf..ac3626db46ea 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) { return "dxilv1.7"; case Triple::DXILSubArch_v1_8: return "dxilv1.8"; + case Triple::DXILSubArch_v1_9: + return "dxilv1.9"; default: break; } @@ -329,6 +331,8 @@ StringRef Triple::getOSTypeName(OSType Kind) { case LiteOS: return "liteos"; case XROS: return "xros"; case Vulkan: return "vulkan"; + case CheriotRTOS: + return "cheriotrtos"; } llvm_unreachable("Invalid OSType"); @@ -387,6 +391,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case Callable: return "callable"; case Mesh: return "mesh"; case Amplification: return "amplification"; + case RootSignature: + return "rootsignature"; case OpenCL: return "opencl"; case OpenHOS: return "ohos"; @@ -648,6 +654,8 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3", "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8", Triple::dxil) + // Note: Cases has max limit of 10. + .Case("dxilv1.9", Triple::dxil) .Case("xtensa", Triple::xtensa) .Default(Triple::UnknownArch); @@ -687,49 +695,50 @@ static Triple::VendorType parseVendor(StringRef VendorName) { static Triple::OSType parseOS(StringRef OSName) { return StringSwitch<Triple::OSType>(OSName) - .StartsWith("darwin", Triple::Darwin) - .StartsWith("dragonfly", Triple::DragonFly) - .StartsWith("freebsd", Triple::FreeBSD) - .StartsWith("fuchsia", Triple::Fuchsia) - .StartsWith("ios", Triple::IOS) - .StartsWith("kfreebsd", Triple::KFreeBSD) - .StartsWith("linux", Triple::Linux) - .StartsWith("lv2", Triple::Lv2) - .StartsWith("macos", Triple::MacOSX) - .StartsWith("managarm", Triple::Managarm) - .StartsWith("netbsd", Triple::NetBSD) - .StartsWith("openbsd", Triple::OpenBSD) - .StartsWith("solaris", Triple::Solaris) - .StartsWith("uefi", Triple::UEFI) - .StartsWith("win32", Triple::Win32) - .StartsWith("windows", Triple::Win32) - .StartsWith("zos", Triple::ZOS) - .StartsWith("haiku", Triple::Haiku) - .StartsWith("rtems", Triple::RTEMS) - .StartsWith("aix", Triple::AIX) - .StartsWith("cuda", Triple::CUDA) - .StartsWith("nvcl", Triple::NVCL) - .StartsWith("amdhsa", Triple::AMDHSA) - .StartsWith("ps4", Triple::PS4) - .StartsWith("ps5", Triple::PS5) - .StartsWith("elfiamcu", Triple::ELFIAMCU) - .StartsWith("tvos", Triple::TvOS) - .StartsWith("watchos", Triple::WatchOS) - .StartsWith("bridgeos", Triple::BridgeOS) - .StartsWith("driverkit", Triple::DriverKit) - .StartsWith("xros", Triple::XROS) - .StartsWith("visionos", Triple::XROS) - .StartsWith("mesa3d", Triple::Mesa3D) - .StartsWith("amdpal", Triple::AMDPAL) - .StartsWith("hermit", Triple::HermitCore) - .StartsWith("hurd", Triple::Hurd) - .StartsWith("wasi", Triple::WASI) - .StartsWith("emscripten", Triple::Emscripten) - .StartsWith("shadermodel", Triple::ShaderModel) - .StartsWith("liteos", Triple::LiteOS) - .StartsWith("serenity", Triple::Serenity) - .StartsWith("vulkan", Triple::Vulkan) - .Default(Triple::UnknownOS); + .StartsWith("darwin", Triple::Darwin) + .StartsWith("dragonfly", Triple::DragonFly) + .StartsWith("freebsd", Triple::FreeBSD) + .StartsWith("fuchsia", Triple::Fuchsia) + .StartsWith("ios", Triple::IOS) + .StartsWith("kfreebsd", Triple::KFreeBSD) + .StartsWith("linux", Triple::Linux) + .StartsWith("lv2", Triple::Lv2) + .StartsWith("macos", Triple::MacOSX) + .StartsWith("managarm", Triple::Managarm) + .StartsWith("netbsd", Triple::NetBSD) + .StartsWith("openbsd", Triple::OpenBSD) + .StartsWith("solaris", Triple::Solaris) + .StartsWith("uefi", Triple::UEFI) + .StartsWith("win32", Triple::Win32) + .StartsWith("windows", Triple::Win32) + .StartsWith("zos", Triple::ZOS) + .StartsWith("haiku", Triple::Haiku) + .StartsWith("rtems", Triple::RTEMS) + .StartsWith("aix", Triple::AIX) + .StartsWith("cuda", Triple::CUDA) + .StartsWith("nvcl", Triple::NVCL) + .StartsWith("amdhsa", Triple::AMDHSA) + .StartsWith("ps4", Triple::PS4) + .StartsWith("ps5", Triple::PS5) + .StartsWith("elfiamcu", Triple::ELFIAMCU) + .StartsWith("tvos", Triple::TvOS) + .StartsWith("watchos", Triple::WatchOS) + .StartsWith("bridgeos", Triple::BridgeOS) + .StartsWith("driverkit", Triple::DriverKit) + .StartsWith("xros", Triple::XROS) + .StartsWith("visionos", Triple::XROS) + .StartsWith("mesa3d", Triple::Mesa3D) + .StartsWith("amdpal", Triple::AMDPAL) + .StartsWith("hermit", Triple::HermitCore) + .StartsWith("hurd", Triple::Hurd) + .StartsWith("wasi", Triple::WASI) + .StartsWith("emscripten", Triple::Emscripten) + .StartsWith("shadermodel", Triple::ShaderModel) + .StartsWith("liteos", Triple::LiteOS) + .StartsWith("serenity", Triple::Serenity) + .StartsWith("vulkan", Triple::Vulkan) + .StartsWith("cheriotrtos", Triple::CheriotRTOS) + .Default(Triple::UnknownOS); } static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { @@ -780,6 +789,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("callable", Triple::Callable) .StartsWith("mesh", Triple::Mesh) .StartsWith("amplification", Triple::Amplification) + .StartsWith("rootsignature", Triple::RootSignature) .StartsWith("opencl", Triple::OpenCL) .StartsWith("ohos", Triple::OpenHOS) .StartsWith("pauthtest", Triple::PAuthTest) @@ -839,6 +849,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { .EndsWith("v1.6", Triple::DXILSubArch_v1_6) .EndsWith("v1.7", Triple::DXILSubArch_v1_7) .EndsWith("v1.8", Triple::DXILSubArch_v1_8) + .EndsWith("v1.9", Triple::DXILSubArch_v1_9) .Default(Triple::NoSubArch); StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); @@ -1108,7 +1119,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { VersionTuple Ver = parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel"))); // Default DXIL minor version when Shader Model version is anything other - // than 6.[0...8] or 6.x (which translates to latest current SM version) + // than 6.[0...9] or 6.x (which translates to latest current SM version) const unsigned SMMajor = 6; if (!Ver.empty()) { if (Ver.getMajor() == SMMajor) { @@ -1132,6 +1143,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7); case 8: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8); + case 9: + return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9); default: report_fatal_error("Unsupported Shader Model version", false); } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index b72096553ad9..edca7c18062a 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -72,7 +72,7 @@ constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF | constexpr FeatureBitset FeaturesX86_64_V3 = FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C | FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE; -constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 | +constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureAVX512BW | FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512VL; @@ -95,9 +95,8 @@ constexpr FeatureBitset FeaturesBroadwell = // Intel Knights Landing and Knights Mill // Knights Landing has feature parity with Broadwell. -constexpr FeatureBitset FeaturesKNL = FeaturesBroadwell | FeatureAES | - FeatureAVX512F | FeatureEVEX512 | - FeatureAVX512CD; +constexpr FeatureBitset FeaturesKNL = + FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD; constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ; // Intel Skylake processors. @@ -107,9 +106,9 @@ constexpr FeatureBitset FeaturesSkylakeClient = // SkylakeServer inherits all SkylakeClient features except SGX. // FIXME: That doesn't match gcc. constexpr FeatureBitset FeaturesSkylakeServer = - (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureEVEX512 | - FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | - FeatureCLWB | FeaturePKU; + (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB | + FeaturePKU; constexpr FeatureBitset FeaturesCascadeLake = FeaturesSkylakeServer | FeatureAVX512VNNI; constexpr FeatureBitset FeaturesCooperLake = @@ -117,9 +116,9 @@ constexpr FeatureBitset FeaturesCooperLake = // Intel 10nm processors. constexpr FeatureBitset FeaturesCannonlake = - FeaturesSkylakeClient | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD | - FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | - FeatureAVX512VBMI | FeaturePKU | FeatureSHA; + FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | + FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | + FeaturePKU | FeatureSHA; constexpr FeatureBitset FeaturesICLClient = FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureGFNI | FeatureRDPID | @@ -139,7 +138,7 @@ constexpr FeatureBitset FeaturesSapphireRapids = constexpr FeatureBitset FeaturesGraniteRapids = FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI; constexpr FeatureBitset FeaturesDiamondRapids = - FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2_512 | + FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2 | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 | @@ -244,11 +243,10 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | FeatureINVPCID | FeaturePKU | FeatureVAES | FeatureVPCLMULQDQ; static constexpr FeatureBitset FeaturesZNVER4 = - FeaturesZNVER3 | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD | - FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | - FeatureAVX512VBMI | FeatureAVX512VBMI2 | FeatureAVX512VNNI | - FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | - FeatureGFNI | FeatureSHSTK; + FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | + FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | + FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG | + FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI | FeatureSHSTK; static constexpr FeatureBitset FeaturesZNVER5 = FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B | @@ -394,7 +392,7 @@ constexpr ProcInfo Processors[] = { // Clearwaterforest microarchitecture based processors. { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false }, // Diamond Rapids microarchitecture based processors. - { {"diamondrapids"}, CK_Diamondrapids, FEATURE_AVX10_2_512, FeaturesDiamondRapids, 'z', false }, + { {"diamondrapids"}, CK_Diamondrapids, FEATURE_AVX10_2, FeaturesDiamondRapids, 'z', false }, // Knights Landing processor. { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false }, { {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true }, @@ -616,7 +614,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_MOVRS = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 = - FeatureAMX_TILE | FeatureAVX10_2_512; + FeatureAMX_TILE | FeatureAVX10_2; constexpr FeatureBitset ImpliedFeaturesAMX_TF32 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; @@ -642,11 +640,9 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 = FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ | FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 | FeatureAVX512DQ | FeatureAVX512VL; -constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = - FeatureAVX10_1 | FeatureEVEX512; constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1; -constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = - FeatureAVX10_2 | FeatureAVX10_1_512; +constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1; +constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2; // APX Features constexpr FeatureBitset ImpliedFeaturesEGPR = {}; |
