4 files changed, 23 insertions, 32 deletions
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 452b2e685867..8660373c3927 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -337,7 +337,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
     return false;
 
   return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
-         (D->hasAttr<OpenCLKernelAttr>() ||
+         (D->hasAttr<DeviceKernelAttr>() ||
           (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
           (isa<VarDecl>(D) &&
            (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
@@ -350,7 +350,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
   const auto *ReqdWGS =
       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
   const bool IsOpenCLKernel =
-      M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
+      M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>();
   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
 
   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
@@ -572,7 +572,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
 void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
     const FunctionType *&FT) const {
   FT = getABIInfo().getContext().adjustFunctionType(
-      FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+      FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
 }
 
 /// Return IR struct type for rtinfo struct in rocm-device-libs used for device
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 0ceca6192d8e..ad802c9131de 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -260,40 +260,31 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
 
   llvm::Function *F = cast<llvm::Function>(GV);
 
-  // Perform special handling in OpenCL mode
-  if (M.getLangOpts().OpenCL) {
-    // Use OpenCL function attributes to check for kernel functions
+  // Perform special handling in OpenCL/CUDA mode
+  if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
+    // Use function attributes to check for kernel functions
     // By default, all functions are device functions
-    if (FD->hasAttr<OpenCLKernelAttr>()) {
-      // OpenCL __kernel functions get kernel metadata
+    if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
+      // OpenCL/CUDA kernel functions get kernel metadata
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
       // And kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
+      if (FD->hasAttr<CUDAGlobalAttr>()) {
+        SmallVector<int, 10> GCI;
+        for (auto IV : llvm::enumerate(FD->parameters()))
+          if (IV.value()->hasAttr<CUDAGridConstantAttr>())
+            // For some reason arg indices are 1-based in NVVM
+            GCI.push_back(IV.index() + 1);
+        // Create !{<func-ref>, metadata !"kernel", i32 1} node
+        F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+        addGridConstantNVVMMetadata(F, GCI);
+      }
+      if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+        M.handleCUDALaunchBoundsAttr(F, Attr);
     }
   }
-
-  // Perform special handling in CUDA mode.
-  if (M.getLangOpts().CUDA) {
-    // CUDA __global__ functions get a kernel metadata entry.  Since
-    // __global__ functions cannot be called from the device, we do not
-    // need to set the noinline attribute.
-    if (FD->hasAttr<CUDAGlobalAttr>()) {
-      SmallVector<int, 10> GCI;
-      for (auto IV : llvm::enumerate(FD->parameters()))
-        if (IV.value()->hasAttr<CUDAGridConstantAttr>())
-          // For some reason arg indices are 1-based in NVVM
-          GCI.push_back(IV.index() + 1);
-      // Create !{<func-ref>, metadata !"kernel", i32 1} node
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
-      addGridConstantNVVMMetadata(F, GCI);
-    }
-    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
-      M.handleCUDALaunchBoundsAttr(F, Attr);
-  }
-
   // Attach kernel metadata directly if compiling for NVPTX.
-  if (FD->hasAttr<NVPTXKernelAttr>()) {
+  if (FD->hasAttr<DeviceKernelAttr>()) {
     F->setCallingConv(llvm::CallingConv::PTX_Kernel);
   }
 }
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 92ae46234e6b..2f1e43cdc8cc 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -228,7 +228,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   // Convert HIP kernels to SPIR-V kernels.
   if (getABIInfo().getContext().getLangOpts().HIP) {
     FT = getABIInfo().getContext().adjustFunctionType(
-        FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+        FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
     return;
   }
 }
diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp
index f3685ccd9825..df49aea49a1e 100644
--- a/clang/lib/CodeGen/Targets/TCE.cpp
+++ b/clang/lib/CodeGen/Targets/TCE.cpp
@@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes(
   llvm::Function *F = cast<llvm::Function>(GV);
 
   if (M.getLangOpts().OpenCL) {
-    if (FD->hasAttr<OpenCLKernelAttr>()) {
+    if (FD->hasAttr<DeviceKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
       const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();