diff options
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 425 |
1 files changed, 265 insertions, 160 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c16b69ba8756..96dcf6283f9f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -581,38 +581,19 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, Args); } -// Emit a simple mangled intrinsic that has 1 argument and a return type -// matching the argument type. -static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, - unsigned IntrinsicID, - llvm::StringRef Name = "") { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0, Name); -} - -// Emit an intrinsic that has 2 operands of the same type as its result. -static Value *emitBinaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, { Src0, Src1 }); -} - -// Emit an intrinsic that has 3 operands of the same type as its result. -static Value *emitTernaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); +// Emit a simple intrinsic that has N scalar arguments and a return type +// matching the argument type. It is assumed that only the first argument is +// overloaded. +template <unsigned N> +Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, + unsigned IntrinsicID, + llvm::StringRef Name = "") { + static_assert(N, "expect non-empty argument"); + SmallVector<Value *, N> Args; + for (unsigned I = 0; I < N; ++I) + Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType()); + return CGF.Builder.CreateCall(F, Args, Name); } // Emit an intrinsic that has 1 float or double operand, and 1 integer. @@ -734,17 +715,14 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } -static Value *emitRangedBuiltin(CodeGenFunction &CGF, - unsigned IntrinsicID, +static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high) { - llvm::MDBuilder MDHelper(CGF.getLLVMContext()); - llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); - llvm::Instruction *Call = CGF.Builder.CreateCall(F); - Call->setMetadata(llvm::LLVMContext::MD_range, RNode); - Call->setMetadata(llvm::LLVMContext::MD_noundef, - llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); - return Call; + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::CallInst *Call = CGF.Builder.CreateCall(F); + llvm::ConstantRange CR(APInt(32, low), APInt(32, high)); + Call->addRangeRetAttr(CR); + Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef); + return Call; } namespace { @@ -2681,7 +2659,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_copysignf16: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); case Builtin::BIcos: case Builtin::BIcosf: @@ -2726,7 +2705,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // TODO: strictfp support if (Builder.getIsFPConstrained()) break; - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10)); } case Builtin::BIfabs: case Builtin::BIfabsf: @@ -2736,7 +2716,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fabsf16: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs)); case Builtin::BIfloor: case Builtin::BIfloorf: @@ -2923,6 +2904,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SetSqrtFPAccuracy(Call); return RValue::get(Call); } + + case Builtin::BItan: + case Builtin::BItanf: + case Builtin::BItanl: + case Builtin::BI__builtin_tan: + case Builtin::BI__builtin_tanf: + case Builtin::BI__builtin_tanf16: + case Builtin::BI__builtin_tanl: + case Builtin::BI__builtin_tanf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); + case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: @@ -3407,13 +3400,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_byteswap_ushort: case Builtin::BI_byteswap_ulong: case Builtin::BI_byteswap_uint64: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap)); } case Builtin::BI__builtin_bitreverse8: case Builtin::BI__builtin_bitreverse16: case Builtin::BI__builtin_bitreverse32: case Builtin::BI__builtin_bitreverse64: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse)); } case Builtin::BI__builtin_rotateleft8: case Builtin::BI__builtin_rotateleft16: @@ -3510,6 +3505,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_trap: EmitTrapCall(Intrinsic::trap); return RValue::get(nullptr); + case Builtin::BI__builtin_verbose_trap: { + llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation(); + if (getDebugInfo()) { + TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor( + TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()), + *E->getArg(1)->tryEvaluateString(getContext())); + } + ApplyDebugLocation ApplyTrapDI(*this, TrapLocation); + // Currently no attempt is made to prevent traps from being merged. + EmitTrapCall(Intrinsic::trap); + return RValue::get(nullptr); + } case Builtin::BI__debugbreak: EmitTrapCall(Intrinsic::debugtrap); return RValue::get(nullptr); @@ -3690,69 +3697,90 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), Builder.getFalse(), nullptr, "elt.abs"); else - Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); + Result = emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::fabs, "elt.abs"); return RValue::get(Result); } - + case Builtin::BI__builtin_elementwise_acos: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::acos, "elt.acos")); + case Builtin::BI__builtin_elementwise_asin: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::asin, "elt.asin")); + case Builtin::BI__builtin_elementwise_atan: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::atan, "elt.atan")); case Builtin::BI__builtin_elementwise_ceil: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::ceil, "elt.ceil")); case Builtin::BI__builtin_elementwise_exp: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::exp, "elt.exp")); case Builtin::BI__builtin_elementwise_exp2: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::exp2, "elt.exp2")); case Builtin::BI__builtin_elementwise_log: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log, "elt.log")); case Builtin::BI__builtin_elementwise_log2: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log2, "elt.log2")); case Builtin::BI__builtin_elementwise_log10: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log10, "elt.log10")); case Builtin::BI__builtin_elementwise_pow: { - return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow)); + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow)); } case Builtin::BI__builtin_elementwise_bitreverse: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse, - "elt.bitreverse")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse")); case Builtin::BI__builtin_elementwise_cos: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::cos, "elt.cos")); + case Builtin::BI__builtin_elementwise_cosh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::cosh, "elt.cosh")); case Builtin::BI__builtin_elementwise_floor: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_roundeven: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, - "elt.roundeven")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); case Builtin::BI__builtin_elementwise_round: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round, - "elt.round")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::round, "elt.round")); case Builtin::BI__builtin_elementwise_rint: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint, - "elt.rint")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::rint, "elt.rint")); case Builtin::BI__builtin_elementwise_nearbyint: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint, - "elt.nearbyint")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint")); case Builtin::BI__builtin_elementwise_sin: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::sin, "elt.sin")); + case Builtin::BI__builtin_elementwise_sinh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::sinh, "elt.sinh")); case Builtin::BI__builtin_elementwise_tan: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::tan, "elt.tan")); + case Builtin::BI__builtin_elementwise_tanh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::tanh, "elt.tanh")); case Builtin::BI__builtin_elementwise_trunc: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: - return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign)); + return RValue::get(emitBuiltinWithOneOverloadedType<2>( + *this, E, llvm::Intrinsic::copysign)); case Builtin::BI__builtin_elementwise_fma: - return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma)); + return RValue::get( + emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -3819,7 +3847,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmax; }; - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } @@ -3838,24 +3866,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return llvm::Intrinsic::vector_reduce_fmin; }; - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_add: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); case Builtin::BI__builtin_reduce_mul: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); case Builtin::BI__builtin_reduce_xor: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); case Builtin::BI__builtin_reduce_or: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or")); case Builtin::BI__builtin_reduce_and: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and")); case Builtin::BI__builtin_matrix_transpose: { @@ -5273,6 +5301,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__iso_volatile_store64: return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__builtin_ptrauth_sign_constant: + return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); + case Builtin::BI__builtin_ptrauth_auth: case Builtin::BI__builtin_ptrauth_auth_and_resign: case Builtin::BI__builtin_ptrauth_blend_discriminator: @@ -5871,7 +5902,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_canonicalizef: case Builtin::BI__builtin_canonicalizef16: case Builtin::BI__builtin_canonicalizel: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize)); case Builtin::BI__builtin_thread_pointer: { if (!getContext().getTargetInfo().isTLSSupported()) @@ -5992,8 +6024,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // If this is a predefined lib function (e.g. malloc), emit the call // using exactly the normal call path. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) - return emitLibraryCall( - *this, FD, E, cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); + return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD)); // Check that a call to a target specific builtin has the correct target // features. @@ -14180,7 +14211,7 @@ Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) { ArgStr.split(Features, "+"); for (auto &Feature : Features) { Feature = Feature.trim(); - if (!llvm::AArch64::parseArchExtension(Feature)) + if (!llvm::AArch64::parseFMVExtension(Feature)) return Builder.getFalse(); if (Feature != "default") Features.push_back(Feature); @@ -18128,9 +18159,35 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, break; } + // Some of the atomic builtins take the scope as a string name. StringRef scp; - llvm::getConstantStringInfo(Scope, scp); - SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + if (llvm::getConstantStringInfo(Scope, scp)) { + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + return; + } + + // Older builtins had an enum argument for the memory scope. + int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue(); + switch (scope) { + case 0: // __MEMORY_SCOPE_SYSTEM + SSID = llvm::SyncScope::System; + break; + case 1: // __MEMORY_SCOPE_DEVICE + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + break; + case 2: // __MEMORY_SCOPE_WRKGRP + SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); + break; + case 3: // __MEMORY_SCOPE_WVFRNT + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + break; + case 4: // __MEMORY_SCOPE_SINGLE + SSID = llvm::SyncScope::SingleThread; + break; + default: + SSID = llvm::SyncScope::System; + break; + } } llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, @@ -18319,8 +18376,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("rsqrt operand must have a float representation"); return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt, - ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt"); + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), + ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { return EmitRuntimeCall(CGM.CreateRuntimeFunction( @@ -18398,9 +18455,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_ds_swizzle: - return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_ds_swizzle); case AMDGPU::BI__builtin_amdgcn_mov_dpp8: - return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; @@ -18420,42 +18479,63 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: + return emitBuiltinWithOneOverloadedType<6>( + *this, E, + BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 + ? Intrinsic::amdgcn_permlane16 + : Intrinsic::amdgcn_permlanex16); + case AMDGPU::BI__builtin_amdgcn_permlane64: + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_permlane64); + case AMDGPU::BI__builtin_amdgcn_readlane: + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_readlane); + case AMDGPU::BI__builtin_amdgcn_readfirstlane: + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_readfirstlane); case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_div_fixup); case AMDGPU::BI__builtin_amdgcn_trig_preop: case AMDGPU::BI__builtin_amdgcn_trig_preopf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); case AMDGPU::BI__builtin_amdgcn_rcp: case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp); case AMDGPU::BI__builtin_amdgcn_sqrt: case AMDGPU::BI__builtin_amdgcn_sqrtf: case AMDGPU::BI__builtin_amdgcn_sqrth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq); case AMDGPU::BI__builtin_amdgcn_rsq_clamp: case AMDGPU::BI__builtin_amdgcn_rsq_clampf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_rsq_clamp); case AMDGPU::BI__builtin_amdgcn_sinf: case AMDGPU::BI__builtin_amdgcn_sinh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin); case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); case AMDGPU::BI__builtin_amdgcn_logf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log); case AMDGPU::BI__builtin_amdgcn_exp2f: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_exp2); case AMDGPU::BI__builtin_amdgcn_log_clampf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); @@ -18476,7 +18556,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_frexp_mant); case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); @@ -18493,13 +18574,17 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fract: case AMDGPU::BI__builtin_amdgcn_fractf: case AMDGPU::BI__builtin_amdgcn_fracth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_lerp); case AMDGPU::BI__builtin_amdgcn_ubfe: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_ubfe); case AMDGPU::BI__builtin_amdgcn_sbfe: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_ballot_w32: case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); @@ -18537,7 +18622,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_ds_append: case AMDGPU::BI__builtin_amdgcn_ds_consume: { Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? @@ -18546,14 +18632,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { Intrinsic::ID Intrin; switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - Intrin = Intrinsic::amdgcn_ds_fadd; - break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: Intrin = Intrinsic::amdgcn_ds_fmin; break; @@ -18644,35 +18726,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: { - Intrinsic::ID IID; - llvm::Type *ArgTy; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: - ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); - IID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_ds_fadd; - break; - } - llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); - llvm::Value *Val = EmitScalarExpr(E->getArg(1)); - llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue( - llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true)); - llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue( - llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0)); - llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); - return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19007,7 +19060,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: - return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::r600_recipsqrt_ieee); case AMDGPU::BI__builtin_r600_read_tidig_x: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); case AMDGPU::BI__builtin_r600_read_tidig_y: @@ -19032,7 +19086,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: case AMDGPU::BI__builtin_amdgcn_atomic_dec32: - case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19043,23 +19102,54 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_atomic_dec64: BinOp = llvm::AtomicRMWInst::UDecWrap; break; + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: + BinOp = llvm::AtomicRMWInst::FAdd; + break; } Address Ptr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Type *OrigTy = Val->getType(); + QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); - ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(3)), AO, SSID); + bool Volatile; - QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); - bool Volatile = - PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf) { + // __builtin_amdgcn_ds_faddf has an explicit volatile argument + Volatile = + cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue(); + } else { + // Infer volatile from the passed type. + Volatile = + PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + } + + if (E->getNumArgs() >= 4) { + // Some of the builtins have explicit ordering and scope arguments. + ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), AO, SSID); + } else { + // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. + SSID = llvm::SyncScope::System; + AO = AtomicOrdering::SequentiallyConsistent; + + // The v2bf16 builtin uses i16 instead of a natural bfloat type. + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + llvm::Type *V2BF16Ty = FixedVectorType::get( + llvm::Type::getBFloatTy(Builder.getContext()), 2); + Val = Builder.CreateBitCast(Val, V2BF16Ty); + } + } llvm::AtomicRMWInst *RMW = Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); - return RMW; + return Builder.CreateBitCast(RMW, OrigTy); } case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { @@ -19070,6 +19160,17 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } + case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: + return emitBuiltinWithOneOverloadedType<4>( + *this, E, Intrinsic::amdgcn_make_buffer_rsrc); + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store); default: return nullptr; } @@ -21137,6 +21238,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } + case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: @@ -21146,10 +21249,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *C = EmitScalarExpr(E->getArg(2)); unsigned IntNo; switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: IntNo = Intrinsic::wasm_relaxed_madd; break; + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: IntNo = Intrinsic::wasm_relaxed_nmadd; |
