diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /libclc/clc | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'libclc/clc')
54 files changed, 234 insertions, 167 deletions
diff --git a/libclc/clc/include/clc/clc_convert.h b/libclc/clc/include/clc/clc_convert.h index ab41e5abb9d6..97cfa667c8b1 100644 --- a/libclc/clc/include/clc/clc_convert.h +++ b/libclc/clc/include/clc/clc_convert.h @@ -9,7 +9,7 @@ #ifndef __CLC_CLC_CONVERT_H__ #define __CLC_CLC_CONVERT_H__ -#include <clc/clcmacro.h> +#include <clc/internal/clc.h> #define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \ _CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x); diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h deleted file mode 100644 index 9fa11489b145..000000000000 --- a/libclc/clc/include/clc/clcmacro.h +++ /dev/null @@ -1,69 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef __CLC_CLCMACRO_H__ -#define __CLC_CLCMACRO_H__ - -#include <clc/internal/clc.h> -#include <clc/utils.h> - -#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \ - ADDR_SPACE, ARG2_TYPE) \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 4))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 8))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 16))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \ - __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \ - __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \ - __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \ - __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \ - } - -#endif // __CLC_CLCMACRO_H__ diff --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h index 9db49fa54d2e..93d2b5b391c5 100644 --- a/libclc/clc/include/clc/float/definitions.h +++ b/libclc/clc/include/clc/float/definitions.h @@ -70,10 +70,6 @@ #define M_SQRT2 0x1.6a09e667f3bcdp+0 #define M_SQRT1_2 0x1.6a09e667f3bcdp-1 -#ifdef __CLC_INTERNAL -#define M_LOG210 0x1.a934f0979a371p+1 -#endif - #endif #ifdef cl_khr_fp16 diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h index 2321634c7684..0776caddde0d 100644 --- a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h +++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h @@ -10,8 +10,10 @@ #define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ #include <clc/internal/clc.h> +#include <clc/mem_fence/clc_mem_semantic.h> -_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope, - int memory_order); +_CLC_OVERLOAD _CLC_DECL void +__clc_mem_fence(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics); #endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h new file mode 100644 index 000000000000..4d9f5f1db8ee --- /dev/null +++ b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ +#define __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ + +// The memory or address space to which the memory ordering is applied. +typedef enum __CLC_MemorySemantics { + __CLC_MEMORY_PRIVATE = 1 << 0, + __CLC_MEMORY_GLOBAL = 1 << 1, + __CLC_MEMORY_CONSTANT = 1 << 2, + __CLC_MEMORY_LOCAL = 1 << 3, + __CLC_MEMORY_GENERIC = 1 << 4, +} __CLC_MemorySemantics; + +#endif // __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc new file mode 100644 index 000000000000..544057b0e137 --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/utils.h> + +#if __CLC_VECSIZE_OR_1 >= 2 + +#ifndef __CLC_IMPL_FUNCTION +#define __CLC_IMPL_FUNCTION __CLC_FUNCTION +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) { + union { + __CLC_GENTYPE vec; + __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1]; + } u_x, u_result; + u_x.vec = x; + for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) + u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i]); + return u_result.vec; +} + +#endif // __CLC_VECSIZE_OR_1 >= 2 diff --git a/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc new file mode 100644 index 000000000000..fff91d36e626 --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc @@ -0,0 +1,106 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/utils.h> + +#ifdef __CLC_SCALAR + +#ifndef __CLC_IMPL_FUNCTION +#define __CLC_IMPL_FUNCTION __CLC_FUNCTION +#endif + +#ifndef __CLC_RET_TYPE +#define __CLC_RET_TYPE __CLC_GENTYPE +#endif + +#ifndef __CLC_ARG1_TYPE +#define __CLC_ARG1_TYPE __CLC_GENTYPE +#endif + +#ifndef __CLC_ARG2_TYPE +#define __CLC_ARG2_TYPE __CLC_GENTYPE +#endif + +#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE) +#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE) +#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE) + +#define __CLC_VECTOR_SIZE 2 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 3 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 4 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 8 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3), + __CLC_IMPL_FUNCTION(x.s4, p + 4), + __CLC_IMPL_FUNCTION(x.s5, p + 5), + __CLC_IMPL_FUNCTION(x.s6, p + 6), + __CLC_IMPL_FUNCTION(x.s7, p + 7)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 16 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3), + __CLC_IMPL_FUNCTION(x.s4, p + 4), + __CLC_IMPL_FUNCTION(x.s5, p + 5), + __CLC_IMPL_FUNCTION(x.s6, p + 6), + __CLC_IMPL_FUNCTION(x.s7, p + 7), + __CLC_IMPL_FUNCTION(x.s8, p + 8), + __CLC_IMPL_FUNCTION(x.s9, p + 9), + __CLC_IMPL_FUNCTION(x.sa, p + 10), + __CLC_IMPL_FUNCTION(x.sb, p + 11), + __CLC_IMPL_FUNCTION(x.sc, p + 12), + __CLC_IMPL_FUNCTION(x.sd, p + 13), + __CLC_IMPL_FUNCTION(x.se, p + 14), + __CLC_IMPL_FUNCTION(x.sf, p + 15)); +} +#undef __CLC_VECTOR_SIZE + +#undef __CLC_RET_VECTYPE +#undef __CLC_ARG1_VECTYPE +#undef __CLC_ARG2_VECTYPE + +#endif // __CLC_SCALAR diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h index 5f864e1057b8..34745bd47c06 100644 --- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h +++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h @@ -10,8 +10,10 @@ #define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ #include <clc/internal/clc.h> +#include <clc/mem_fence/clc_mem_semantic.h> -_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope, - int memory_order); +_CLC_OVERLOAD _CLC_DECL void +__clc_work_group_barrier(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics); #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ diff --git a/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl b/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl index 90bd50ac1551..611f60d1f563 100644 --- a/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl +++ b/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_ldexp.h> diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl index 9e6460313718..6d2a0962ba20 100644 --- a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl @@ -8,30 +8,50 @@ #include <clc/mem_fence/clc_mem_fence.h> -void __clc_amdgcn_s_waitcnt(unsigned flags); +#define BUILTIN_FENCE_ORDER(memory_order, ...) \ + switch (memory_order) { \ + case __ATOMIC_ACQUIRE: \ + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, __VA_ARGS__); \ + break; \ + case __ATOMIC_RELEASE: \ + __builtin_amdgcn_fence(__ATOMIC_RELEASE, __VA_ARGS__); \ + break; \ + case __ATOMIC_ACQ_REL: \ + __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, __VA_ARGS__); \ + break; \ + case __ATOMIC_SEQ_CST: \ + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, __VA_ARGS__); \ + break; \ + default: \ + __builtin_unreachable(); \ + } \ + break; -// s_waitcnt takes 16bit argument with a combined number of maximum allowed -// pending operations: -// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages -// [7] -- undefined -// [6:4] -- exports, GDS, and mem write -// [3:0] -- vector memory operations +#define BUILTIN_FENCE(memory_scope, memory_order, ...) \ + switch (memory_scope) { \ + case __MEMORY_SCOPE_DEVICE: \ + BUILTIN_FENCE_ORDER(memory_order, "agent", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_WRKGRP: \ + BUILTIN_FENCE_ORDER(memory_order, "workgroup", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_WVFRNT: \ + BUILTIN_FENCE_ORDER(memory_order, "wavefront", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_SINGLE: \ + BUILTIN_FENCE_ORDER(memory_order, "singlethread", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_SYSTEM: \ + default: \ + BUILTIN_FENCE_ORDER(memory_order, "", ##__VA_ARGS__) \ + } -// Newer clang supports __builtin_amdgcn_s_waitcnt -#if __clang_major__ >= 5 -#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) -#else -#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) -_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); -#endif - -_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, - int memory_order) { - if (memory_scope & __MEMORY_SCOPE_DEVICE) { - // scalar loads are counted with LGKM but we don't know whether - // the compiler turned any loads to scalar - __waitcnt(0); - } else if (memory_scope & __MEMORY_SCOPE_WRKGRP) - __waitcnt(0xff); // LGKM is [12:8] +_CLC_OVERLOAD _CLC_DEF void +__clc_mem_fence(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics) { + if (memory_semantics == __CLC_MEMORY_LOCAL) { + BUILTIN_FENCE(memory_scope, memory_order, "local") + } else if (memory_semantics == __CLC_MEMORY_GLOBAL) { + BUILTIN_FENCE(memory_scope, memory_order, "global") + } else if (memory_semantics == (__CLC_MEMORY_LOCAL | __CLC_MEMORY_GLOBAL)) { + BUILTIN_FENCE(memory_scope, memory_order, "local", "global") + } else { + BUILTIN_FENCE(memory_scope, memory_order) + } } -#undef __waitcnt diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl index ff3628fa7c33..034e6e7bd8ed 100644 --- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl @@ -9,8 +9,9 @@ #include <clc/mem_fence/clc_mem_fence.h> #include <clc/synchronization/clc_work_group_barrier.h> -_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, - int memory_order) { - __clc_mem_fence(memory_scope, memory_order); +_CLC_OVERLOAD _CLC_DEF void +__clc_work_group_barrier(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics) { + __clc_mem_fence(memory_scope, memory_order, memory_semantics); __builtin_amdgcn_s_barrier(); } diff --git a/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl b/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl index 5959ea2786a5..a940323b60a7 100644 --- a/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl +++ b/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #define __CLC_FLOAT_ONLY diff --git a/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl index d2790f3a8260..3a4310baa224 100644 --- a/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl +++ b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fma.h> #include <clc/math/clc_ldexp.h> diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl index 1dc9a0e6407b..c28b9441b05f 100644 --- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl +++ b/libclc/clc/lib/clspv/math/clc_sw_fma.cl @@ -11,7 +11,6 @@ // been updated as appropriate. #include <clc/clc_as_type.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/integer/clc_abs.h> #include <clc/integer/clc_clz.h> diff --git a/libclc/clc/lib/generic/common/clc_degrees.cl b/libclc/clc/lib/generic/common/clc_degrees.cl index b5b07df5075e..79b97f0ca75b 100644 --- a/libclc/clc/lib/generic/common/clc_degrees.cl +++ b/libclc/clc/lib/generic/common/clc_degrees.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #define DEGREES_SINGLE_DEF(TYPE, LITERAL) \ diff --git a/libclc/clc/lib/generic/common/clc_radians.cl b/libclc/clc/lib/generic/common/clc_radians.cl index 497263a15f51..3f013019ad97 100644 --- a/libclc/clc/lib/generic/common/clc_radians.cl +++ b/libclc/clc/lib/generic/common/clc_radians.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #define __CLC_RADIANS_SINGLE_DEF(TYPE, LITERAL) \ diff --git a/libclc/clc/lib/generic/common/clc_smoothstep.cl b/libclc/clc/lib/generic/common/clc_smoothstep.cl index cf143b8fac23..b409c7d7b644 100644 --- a/libclc/clc/lib/generic/common/clc_smoothstep.cl +++ b/libclc/clc/lib/generic/common/clc_smoothstep.cl @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/shared/clc_clamp.h> diff --git a/libclc/clc/lib/generic/common/clc_step.cl b/libclc/clc/lib/generic/common/clc_step.cl index c21c27a3e940..721489948233 100644 --- a/libclc/clc/lib/generic/common/clc_step.cl +++ b/libclc/clc/lib/generic/common/clc_step.cl @@ -6,7 +6,5 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> - #define __CLC_BODY <clc_step.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl b/libclc/clc/lib/generic/integer/clc_clz.cl index 37ba4dbfd809..48c4400f199f 100644 --- a/libclc/clc/lib/generic/integer/clc_clz.cl +++ b/libclc/clc/lib/generic/integer/clc_clz.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> diff --git a/libclc/clc/lib/generic/integer/clc_ctz.cl b/libclc/clc/lib/generic/integer/clc_ctz.cl index 5cf4c19811db..42cfa90c3e66 100644 --- a/libclc/clc/lib/generic/integer/clc_ctz.cl +++ b/libclc/clc/lib/generic/integer/clc_ctz.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/integer/clc_ctz.h> #include <clc/internal/clc.h> diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl index 7c6aaffe6c5c..93088dc9f0b6 100644 --- a/libclc/clc/lib/generic/integer/clc_mad_sat.cl +++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/integer/clc_add_sat.h> #include <clc/integer/clc_mad24.h> #include <clc/integer/clc_mul_hi.h> diff --git a/libclc/clc/lib/generic/math/clc_cbrt.cl b/libclc/clc/lib/generic/math/clc_cbrt.cl index 26869085933a..105f6329d5ba 100644 --- a/libclc/clc/lib/generic/math/clc_cbrt.cl +++ b/libclc/clc/lib/generic/math/clc_cbrt.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_copysign.h> #include <clc/math/clc_fabs.h> diff --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl index 0c9dc287aa0b..e7e4d6ad39ed 100644 --- a/libclc/clc/lib/generic/math/clc_cos.cl +++ b/libclc/clc/lib/generic/math/clc_cos.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> diff --git a/libclc/clc/lib/generic/math/clc_erf.cl b/libclc/clc/lib/generic/math/clc_erf.cl index 34c7d586131e..61a7c9d684aa 100644 --- a/libclc/clc/lib/generic/math/clc_erf.cl +++ b/libclc/clc/lib/generic/math/clc_erf.cl @@ -507,5 +507,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erf(half x) { #endif #define __CLC_FUNCTION __clc_erf -#define __CLC_BODY <clc/shared/unary_def_scalarize.inc> +#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_erfc.cl b/libclc/clc/lib/generic/math/clc_erfc.cl index 7922807818ea..01dbcd0c39ae 100644 --- a/libclc/clc/lib/generic/math/clc_erfc.cl +++ b/libclc/clc/lib/generic/math/clc_erfc.cl @@ -518,5 +518,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erfc(half x) { #endif #define __CLC_FUNCTION __clc_erfc -#define __CLC_BODY <clc/shared/unary_def_scalarize.inc> +#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl index 04e912ed9888..0c394ee19475 100644 --- a/libclc/clc/lib/generic/math/clc_exp10.cl +++ b/libclc/clc/lib/generic/math/clc_exp10.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fma.h> #include <clc/math/clc_ldexp.h> diff --git a/libclc/clc/lib/generic/math/clc_fmod.cl b/libclc/clc/lib/generic/math/clc_fmod.cl index 3cb01e67292e..f17757c9c370 100644 --- a/libclc/clc/lib/generic/math/clc_fmod.cl +++ b/libclc/clc/lib/generic/math/clc_fmod.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> #include <clc/math/clc_floor.h> diff --git a/libclc/clc/lib/generic/math/clc_fract.cl b/libclc/clc/lib/generic/math/clc_fract.cl index 7db43ef87871..681077cb9c89 100644 --- a/libclc/clc/lib/generic/math/clc_fract.cl +++ b/libclc/clc/lib/generic/math/clc_fract.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_floor.h> #include <clc/math/clc_fmin.h> diff --git a/libclc/clc/lib/generic/math/clc_frexp.inc b/libclc/clc/lib/generic/math/clc_frexp.inc index d212b6a1b337..763266bc0493 100644 --- a/libclc/clc/lib/generic/math/clc_frexp.inc +++ b/libclc/clc/lib/generic/math/clc_frexp.inc @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/utils.h> #if __CLC_FPSIZE == 32 diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl index 6990be3083eb..c934ab29da91 100644 --- a/libclc/clc/lib/generic/math/clc_hypot.cl +++ b/libclc/clc/lib/generic/math/clc_hypot.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/integer/clc_abs.h> #include <clc/internal/clc.h> #include <clc/math/clc_fma.h> diff --git a/libclc/clc/lib/generic/math/clc_ilogb.cl b/libclc/clc/lib/generic/math/clc_ilogb.cl index c33ed9fe9b04..1ccc5754ca7f 100644 --- a/libclc/clc/lib/generic/math/clc_ilogb.cl +++ b/libclc/clc/lib/generic/math/clc_ilogb.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> diff --git a/libclc/clc/lib/generic/math/clc_ldexp.cl b/libclc/clc/lib/generic/math/clc_ldexp.cl index a5327c58e424..f9252a75ab4b 100644 --- a/libclc/clc/lib/generic/math/clc_ldexp.cl +++ b/libclc/clc/lib/generic/math/clc_ldexp.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/integer/clc_add_sat.h> #include <clc/internal/clc.h> #include <clc/math/clc_subnormal_config.h> @@ -15,9 +14,7 @@ #include <clc/relational/clc_isnan.h> #include <clc/shared/clc_clamp.h> -#define _CLC_DEF_ldexp _CLC_DEF __attribute__((weak)) - -_CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) { +_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { if (!__clc_fp32_subnormals_supported()) { // This treats subnormals as zeros @@ -90,7 +87,7 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) { +_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { long l = __clc_as_ulong(x); int e = (l >> 52) & 0x7ff; long s = l & 0x8000000000000000; @@ -125,14 +122,13 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) { #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) { +_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) { return (half)__clc_ldexp((float)x, n); } #endif #define __CLC_FUNCTION __clc_ldexp -#define __CLC_DEF_SPEC _CLC_DEF_ldexp #define __CLC_ARG2_TYPE int #define __CLC_BODY <clc/shared/binary_def_scalarize.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl index ffacfc17a4d0..2322ad54508f 100644 --- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl +++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl @@ -279,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) { return r; } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float, - private, int) - #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable // ==================================================== @@ -585,8 +582,6 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) { return r; } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double, - private, int) #endif #ifdef cl_khr_fp16 @@ -597,11 +592,17 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) { return (half)__clc_lgamma_r((float)x, iptr); } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half, - private, int); - #endif +#define __CLC_FUNCTION __clc_lgamma_r +#define __CLC_ARG2_TYPE int +#define __CLC_ADDRSPACE private +#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc> +#include <clc/math/gentype.inc> +#undef __CLC_ADDRSPACE +#undef __CLC_ARG2_TYPE +#undef __CLC_FUNCTION + #define __CLC_ADDRSPACE global #define __CLC_BODY <clc_lgamma_r.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_log.cl b/libclc/clc/lib/generic/math/clc_log.cl index 7eb0180de971..74ee1ed71375 100644 --- a/libclc/clc/lib/generic/math/clc_log.cl +++ b/libclc/clc/lib/generic/math/clc_log.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/internal/clc.h> #include <clc/math/clc_log2.h> diff --git a/libclc/clc/lib/generic/math/clc_log10.cl b/libclc/clc/lib/generic/math/clc_log10.cl index 35489f467562..741d21b5d831 100644 --- a/libclc/clc/lib/generic/math/clc_log10.cl +++ b/libclc/clc/lib/generic/math/clc_log10.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/tables.h> diff --git a/libclc/clc/lib/generic/math/clc_log2.cl b/libclc/clc/lib/generic/math/clc_log2.cl index d9d8cef54255..432ea5737bc2 100644 --- a/libclc/clc/lib/generic/math/clc_log2.cl +++ b/libclc/clc/lib/generic/math/clc_log2.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/tables.h> diff --git a/libclc/clc/lib/generic/math/clc_logb.cl b/libclc/clc/lib/generic/math/clc_logb.cl index f571a11d0c38..879676eedff1 100644 --- a/libclc/clc/lib/generic/math/clc_logb.cl +++ b/libclc/clc/lib/generic/math/clc_logb.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> diff --git a/libclc/clc/lib/generic/math/clc_nextafter.cl b/libclc/clc/lib/generic/math/clc_nextafter.cl index 40e1b5a2c57f..48e28c08edf7 100644 --- a/libclc/clc/lib/generic/math/clc_nextafter.cl +++ b/libclc/clc/lib/generic/math/clc_nextafter.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/relational/clc_isnan.h> diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl index 3d371938e401..70d3d614a8d3 100644 --- a/libclc/clc/lib/generic/math/clc_pow.cl +++ b/libclc/clc/lib/generic/math/clc_pow.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_fma.h> diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl index 074f212a31b7..5aa9560174b9 100644 --- a/libclc/clc/lib/generic/math/clc_pown.cl +++ b/libclc/clc/lib/generic/math/clc_pown.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_fma.h> diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl index c596a552f07f..0556ec97d6f3 100644 --- a/libclc/clc/lib/generic/math/clc_powr.cl +++ b/libclc/clc/lib/generic/math/clc_powr.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_fma.h> diff --git a/libclc/clc/lib/generic/math/clc_remainder.cl b/libclc/clc/lib/generic/math/clc_remainder.cl index 488dde73f67a..622f05f85100 100644 --- a/libclc/clc/lib/generic/math/clc_remainder.cl +++ b/libclc/clc/lib/generic/math/clc_remainder.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> #include <clc/math/clc_floor.h> diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl index 1505d1c3fb62..fd83ead06d89 100644 --- a/libclc/clc/lib/generic/math/clc_remquo.cl +++ b/libclc/clc/lib/generic/math/clc_remquo.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/integer/clc_clz.h> #include <clc/internal/clc.h> #include <clc/math/clc_floor.h> diff --git a/libclc/clc/lib/generic/math/clc_rsqrt.inc b/libclc/clc/lib/generic/math/clc_rsqrt.inc index 4c04155a932c..07aad16f9191 100644 --- a/libclc/clc/lib/generic/math/clc_rsqrt.inc +++ b/libclc/clc/lib/generic/math/clc_rsqrt.inc @@ -6,8 +6,7 @@ // //===----------------------------------------------------------------------===// -__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__clc_rsqrt(__CLC_GENTYPE val) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rsqrt(__CLC_GENTYPE val) { #pragma clang fp contract(fast) return __CLC_FP_LIT(1.0) / __builtin_elementwise_sqrt(val); } diff --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl index 0ff9739c6a84..741383f94c45 100644 --- a/libclc/clc/lib/generic/math/clc_sin.cl +++ b/libclc/clc/lib/generic/math/clc_sin.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> diff --git a/libclc/clc/lib/generic/math/clc_sqrt.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc index 61e341993f5c..e15dcf75ac3f 100644 --- a/libclc/clc/lib/generic/math/clc_sqrt.inc +++ b/libclc/clc/lib/generic/math/clc_sqrt.inc @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__clc_sqrt(__CLC_GENTYPE val) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) { return __builtin_elementwise_sqrt(val); } diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl index 550ca5e18f3f..606e4df320a8 100644 --- a/libclc/clc/lib/generic/math/clc_sw_fma.cl +++ b/libclc/clc/lib/generic/math/clc_sw_fma.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_as_type.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/integer/clc_abs.h> #include <clc/integer/clc_clz.h> diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.cl b/libclc/clc/lib/generic/relational/clc_bitselect.cl index 9c6e82e6be4b..b58fb8d3233f 100644 --- a/libclc/clc/lib/generic/relational/clc_bitselect.cl +++ b/libclc/clc/lib/generic/relational/clc_bitselect.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> #define __CLC_BODY <clc_bitselect.inc> diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl index b3e2375e755a..5f96ef547764 100644 --- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl @@ -8,8 +8,9 @@ #include <clc/mem_fence/clc_mem_fence.h> -_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, - int memory_order) { +_CLC_OVERLOAD _CLC_DEF void +__clc_mem_fence(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics) { if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP)) __nvvm_membar_cta(); } diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl index 6cb37a38f06a..349c0f484513 100644 --- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl @@ -8,7 +8,8 @@ #include <clc/synchronization/clc_work_group_barrier.h> -_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, - int memory_order) { +_CLC_OVERLOAD _CLC_DEF void +__clc_work_group_barrier(int memory_scope, int memory_order, + __CLC_MemorySemantics memory_semantics) { __syncthreads(); } diff --git a/libclc/clc/lib/r600/math/clc_native_rsqrt.cl b/libclc/clc/lib/r600/math/clc_native_rsqrt.cl index da001574afc0..cef106e3b4e9 100644 --- a/libclc/clc/lib/r600/math/clc_native_rsqrt.cl +++ b/libclc/clc/lib/r600/math/clc_native_rsqrt.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> _CLC_OVERLOAD _CLC_DEF float __clc_native_rsqrt(float x) { diff --git a/libclc/clc/lib/r600/math/clc_rsqrt_override.cl b/libclc/clc/lib/r600/math/clc_rsqrt_override.cl index f20046b34117..5ef9ad541afb 100644 --- a/libclc/clc/lib/r600/math/clc_rsqrt_override.cl +++ b/libclc/clc/lib/r600/math/clc_rsqrt_override.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> _CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) { diff --git a/libclc/clc/lib/spirv/math/clc_fmax.cl b/libclc/clc/lib/spirv/math/clc_fmax.cl index f63a9d00c642..101f35c7108c 100644 --- a/libclc/clc/lib/spirv/math/clc_fmax.cl +++ b/libclc/clc/lib/spirv/math/clc_fmax.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> _CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) { diff --git a/libclc/clc/lib/spirv/math/clc_fmin.cl b/libclc/clc/lib/spirv/math/clc_fmin.cl index ddb0126035f8..9bef3a92d5ba 100644 --- a/libclc/clc/lib/spirv/math/clc_fmin.cl +++ b/libclc/clc/lib/spirv/math/clc_fmin.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> _CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) { |
