summaryrefslogtreecommitdiff
path: root/libclc
diff options
context:
space:
mode:
Diffstat (limited to 'libclc')
-rw-r--r--libclc/CMakeLists.txt7
-rw-r--r--libclc/clc/include/clc/clc_convert.h2
-rw-r--r--libclc/clc/include/clc/clcmacro.h69
-rw-r--r--libclc/clc/include/clc/float/definitions.h4
-rw-r--r--libclc/clc/include/clc/mem_fence/clc_mem_fence.h6
-rw-r--r--libclc/clc/include/clc/mem_fence/clc_mem_semantic.h21
-rw-r--r--libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc28
-rw-r--r--libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc106
-rw-r--r--libclc/clc/include/clc/synchronization/clc_work_group_barrier.h6
-rw-r--r--libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl1
-rw-r--r--libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl68
-rw-r--r--libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl7
-rw-r--r--libclc/clc/lib/amdgpu/math/clc_native_exp2.cl1
-rw-r--r--libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl1
-rw-r--r--libclc/clc/lib/clspv/math/clc_sw_fma.cl1
-rw-r--r--libclc/clc/lib/generic/common/clc_degrees.cl1
-rw-r--r--libclc/clc/lib/generic/common/clc_radians.cl1
-rw-r--r--libclc/clc/lib/generic/common/clc_smoothstep.cl1
-rw-r--r--libclc/clc/lib/generic/common/clc_step.cl2
-rw-r--r--libclc/clc/lib/generic/integer/clc_clz.cl1
-rw-r--r--libclc/clc/lib/generic/integer/clc_ctz.cl1
-rw-r--r--libclc/clc/lib/generic/integer/clc_mad_sat.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_cbrt.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_cos.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_erf.cl2
-rw-r--r--libclc/clc/lib/generic/math/clc_erfc.cl2
-rw-r--r--libclc/clc/lib/generic/math/clc_exp10.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_fmod.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_fract.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_frexp.inc1
-rw-r--r--libclc/clc/lib/generic/math/clc_hypot.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_ilogb.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_ldexp.cl10
-rw-r--r--libclc/clc/lib/generic/math/clc_lgamma_r.cl17
-rw-r--r--libclc/clc/lib/generic/math/clc_log.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_log10.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_log2.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_logb.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_nextafter.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_pow.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_pown.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_powr.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_remainder.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_remquo.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_rsqrt.inc3
-rw-r--r--libclc/clc/lib/generic/math/clc_sin.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_sqrt.inc3
-rw-r--r--libclc/clc/lib/generic/math/clc_sw_fma.cl1
-rw-r--r--libclc/clc/lib/generic/relational/clc_bitselect.cl1
-rw-r--r--libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl5
-rw-r--r--libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl5
-rw-r--r--libclc/clc/lib/r600/math/clc_native_rsqrt.cl1
-rw-r--r--libclc/clc/lib/r600/math/clc_rsqrt_override.cl1
-rw-r--r--libclc/clc/lib/spirv/math/clc_fmax.cl1
-rw-r--r--libclc/clc/lib/spirv/math/clc_fmin.cl1
-rw-r--r--libclc/cmake/modules/AddLibclc.cmake20
-rw-r--r--libclc/opencl/include/clc/opencl/synchronization/utils.h14
-rw-r--r--libclc/opencl/lib/amdgcn/mem_fence/fence.cl5
-rw-r--r--libclc/opencl/lib/amdgcn/synchronization/barrier.cl5
-rw-r--r--libclc/opencl/lib/generic/common/sign.cl1
-rw-r--r--libclc/opencl/lib/generic/common/smoothstep.cl1
-rw-r--r--libclc/opencl/lib/generic/math/atan2.cl1
-rw-r--r--libclc/opencl/lib/generic/math/atan2pi.cl1
-rw-r--r--libclc/opencl/lib/generic/math/log.cl1
-rw-r--r--libclc/opencl/lib/generic/math/log10.cl1
-rw-r--r--libclc/opencl/lib/generic/math/log2.cl1
-rw-r--r--libclc/opencl/lib/generic/math/nan.cl1
-rw-r--r--libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl5
-rw-r--r--libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl5
69 files changed, 280 insertions, 190 deletions
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 756e097dcf12..c75f450d8d3a 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -99,6 +99,7 @@ else()
# Setup the paths where libclc runtimes should be stored. By default, in an
# in-tree build we place the libraries in clang's resource driectory.
+ include(GetClangResourceDir)
get_clang_resource_dir( LIBCLC_OUTPUT_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. )
# Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR.
@@ -393,6 +394,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
message( STATUS " device: ${d} ( ${${d}_aliases} )" )
+ set( MACRO_ARCH ${ARCH} )
if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
set( build_flags -O0 -finline-hint-functions -DCLC_SPIRV )
set( opt_flags )
@@ -411,7 +413,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
else()
set( build_flags )
set( opt_flags -O3 )
- set( MACRO_ARCH ${ARCH} )
endif()
set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
@@ -460,14 +461,12 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
-D__CLC_GENERIC_ADDRSPACE_VAL=${generic_addrspace_val}
)
- set( clc_build_flags ${build_flags} -DCLC_INTERNAL )
-
add_libclc_builtin_set(
CLC_INTERNAL
ARCH ${ARCH}
ARCH_SUFFIX clc-${arch_suffix}
TRIPLE ${clang_triple}
- COMPILE_FLAGS ${clc_build_flags}
+ COMPILE_FLAGS ${build_flags}
OPT_FLAGS ${opt_flags}
LIB_FILES ${clc_lib_files}
GEN_FILES ${clc_gen_files}
diff --git a/libclc/clc/include/clc/clc_convert.h b/libclc/clc/include/clc/clc_convert.h
index ab41e5abb9d6..97cfa667c8b1 100644
--- a/libclc/clc/include/clc/clc_convert.h
+++ b/libclc/clc/include/clc/clc_convert.h
@@ -9,7 +9,7 @@
#ifndef __CLC_CLC_CONVERT_H__
#define __CLC_CLC_CONVERT_H__
-#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
deleted file mode 100644
index 9fa11489b145..000000000000
--- a/libclc/clc/include/clc/clcmacro.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef __CLC_CLCMACRO_H__
-#define __CLC_CLCMACRO_H__
-
-#include <clc/internal/clc.h>
-#include <clc/utils.h>
-
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
- ADDR_SPACE, ARG2_TYPE) \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 4))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 8))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 16))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
- __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
- __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
- __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
- __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
- }
-
-#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h
index 9db49fa54d2e..93d2b5b391c5 100644
--- a/libclc/clc/include/clc/float/definitions.h
+++ b/libclc/clc/include/clc/float/definitions.h
@@ -70,10 +70,6 @@
#define M_SQRT2 0x1.6a09e667f3bcdp+0
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
-#ifdef __CLC_INTERNAL
-#define M_LOG210 0x1.a934f0979a371p+1
-#endif
-
#endif
#ifdef cl_khr_fp16
diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
index 2321634c7684..0776caddde0d 100644
--- a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
@@ -10,8 +10,10 @@
#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_semantic.h>
-_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope,
- int memory_order);
+_CLC_OVERLOAD _CLC_DECL void
+__clc_mem_fence(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics);
#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h
new file mode 100644
index 000000000000..4d9f5f1db8ee
--- /dev/null
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__
+#define __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__
+
+// The memory or address space to which the memory ordering is applied.
+typedef enum __CLC_MemorySemantics {
+ __CLC_MEMORY_PRIVATE = 1 << 0,
+ __CLC_MEMORY_GLOBAL = 1 << 1,
+ __CLC_MEMORY_CONSTANT = 1 << 2,
+ __CLC_MEMORY_LOCAL = 1 << 3,
+ __CLC_MEMORY_GENERIC = 1 << 4,
+} __CLC_MemorySemantics;
+
+#endif // __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__
diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
new file mode 100644
index 000000000000..544057b0e137
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/utils.h>
+
+#if __CLC_VECSIZE_OR_1 >= 2
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) {
+ union {
+ __CLC_GENTYPE vec;
+ __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
+ } u_x, u_result;
+ u_x.vec = x;
+ for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i)
+ u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i]);
+ return u_result.vec;
+}
+
+#endif // __CLC_VECSIZE_OR_1 >= 2
diff --git a/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
new file mode 100644
index 000000000000..fff91d36e626
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/utils.h>
+
+#ifdef __CLC_SCALAR
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+#ifndef __CLC_RET_TYPE
+#define __CLC_RET_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG1_TYPE
+#define __CLC_ARG1_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG2_TYPE
+#define __CLC_ARG2_TYPE __CLC_GENTYPE
+#endif
+
+#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE)
+
+#define __CLC_VECTOR_SIZE 2
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 3
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 4
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 8
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3),
+ __CLC_IMPL_FUNCTION(x.s4, p + 4),
+ __CLC_IMPL_FUNCTION(x.s5, p + 5),
+ __CLC_IMPL_FUNCTION(x.s6, p + 6),
+ __CLC_IMPL_FUNCTION(x.s7, p + 7));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 16
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3),
+ __CLC_IMPL_FUNCTION(x.s4, p + 4),
+ __CLC_IMPL_FUNCTION(x.s5, p + 5),
+ __CLC_IMPL_FUNCTION(x.s6, p + 6),
+ __CLC_IMPL_FUNCTION(x.s7, p + 7),
+ __CLC_IMPL_FUNCTION(x.s8, p + 8),
+ __CLC_IMPL_FUNCTION(x.s9, p + 9),
+ __CLC_IMPL_FUNCTION(x.sa, p + 10),
+ __CLC_IMPL_FUNCTION(x.sb, p + 11),
+ __CLC_IMPL_FUNCTION(x.sc, p + 12),
+ __CLC_IMPL_FUNCTION(x.sd, p + 13),
+ __CLC_IMPL_FUNCTION(x.se, p + 14),
+ __CLC_IMPL_FUNCTION(x.sf, p + 15));
+}
+#undef __CLC_VECTOR_SIZE
+
+#undef __CLC_RET_VECTYPE
+#undef __CLC_ARG1_VECTYPE
+#undef __CLC_ARG2_VECTYPE
+
+#endif // __CLC_SCALAR
diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index 5f864e1057b8..34745bd47c06 100644
--- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -10,8 +10,10 @@
#define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_semantic.h>
-_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope,
- int memory_order);
+_CLC_OVERLOAD _CLC_DECL void
+__clc_work_group_barrier(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics);
#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl b/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl
index 90bd50ac1551..611f60d1f563 100644
--- a/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_ldexp.h>
diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
index 9e6460313718..6d2a0962ba20 100644
--- a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
+++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
@@ -8,30 +8,50 @@
#include <clc/mem_fence/clc_mem_fence.h>
-void __clc_amdgcn_s_waitcnt(unsigned flags);
+#define BUILTIN_FENCE_ORDER(memory_order, ...) \
+ switch (memory_order) { \
+ case __ATOMIC_ACQUIRE: \
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, __VA_ARGS__); \
+ break; \
+ case __ATOMIC_RELEASE: \
+ __builtin_amdgcn_fence(__ATOMIC_RELEASE, __VA_ARGS__); \
+ break; \
+ case __ATOMIC_ACQ_REL: \
+ __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, __VA_ARGS__); \
+ break; \
+ case __ATOMIC_SEQ_CST: \
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, __VA_ARGS__); \
+ break; \
+ default: \
+ __builtin_unreachable(); \
+ } \
+ break;
-// s_waitcnt takes 16bit argument with a combined number of maximum allowed
-// pending operations:
-// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
-// [7] -- undefined
-// [6:4] -- exports, GDS, and mem write
-// [3:0] -- vector memory operations
+#define BUILTIN_FENCE(memory_scope, memory_order, ...) \
+ switch (memory_scope) { \
+ case __MEMORY_SCOPE_DEVICE: \
+ BUILTIN_FENCE_ORDER(memory_order, "agent", ##__VA_ARGS__) \
+ case __MEMORY_SCOPE_WRKGRP: \
+ BUILTIN_FENCE_ORDER(memory_order, "workgroup", ##__VA_ARGS__) \
+ case __MEMORY_SCOPE_WVFRNT: \
+ BUILTIN_FENCE_ORDER(memory_order, "wavefront", ##__VA_ARGS__) \
+ case __MEMORY_SCOPE_SINGLE: \
+ BUILTIN_FENCE_ORDER(memory_order, "singlethread", ##__VA_ARGS__) \
+ case __MEMORY_SCOPE_SYSTEM: \
+ default: \
+ BUILTIN_FENCE_ORDER(memory_order, "", ##__VA_ARGS__) \
+ }
-// Newer clang supports __builtin_amdgcn_s_waitcnt
-#if __clang_major__ >= 5
-#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
-#else
-#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
-_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
-#endif
-
-_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
- int memory_order) {
- if (memory_scope & __MEMORY_SCOPE_DEVICE) {
- // scalar loads are counted with LGKM but we don't know whether
- // the compiler turned any loads to scalar
- __waitcnt(0);
- } else if (memory_scope & __MEMORY_SCOPE_WRKGRP)
- __waitcnt(0xff); // LGKM is [12:8]
+_CLC_OVERLOAD _CLC_DEF void
+__clc_mem_fence(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics) {
+ if (memory_semantics == __CLC_MEMORY_LOCAL) {
+ BUILTIN_FENCE(memory_scope, memory_order, "local")
+ } else if (memory_semantics == __CLC_MEMORY_GLOBAL) {
+ BUILTIN_FENCE(memory_scope, memory_order, "global")
+ } else if (memory_semantics == (__CLC_MEMORY_LOCAL | __CLC_MEMORY_GLOBAL)) {
+ BUILTIN_FENCE(memory_scope, memory_order, "local", "global")
+ } else {
+ BUILTIN_FENCE(memory_scope, memory_order)
+ }
}
-#undef __waitcnt
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index ff3628fa7c33..034e6e7bd8ed 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -9,8 +9,9 @@
#include <clc/mem_fence/clc_mem_fence.h>
#include <clc/synchronization/clc_work_group_barrier.h>
-_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
- int memory_order) {
- __clc_mem_fence(memory_scope, memory_order);
+_CLC_OVERLOAD _CLC_DEF void
+__clc_work_group_barrier(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics) {
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
__builtin_amdgcn_s_barrier();
}
diff --git a/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl b/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl
index 5959ea2786a5..a940323b60a7 100644
--- a/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl
+++ b/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#define __CLC_FLOAT_ONLY
diff --git a/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
index d2790f3a8260..3a4310baa224 100644
--- a/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
+++ b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
diff --git a/libclc/clc/lib/clspv/math/clc_sw_fma.cl b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
index 1dc9a0e6407b..c28b9441b05f 100644
--- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
@@ -11,7 +11,6 @@
// been updated as appropriate.
#include <clc/clc_as_type.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_abs.h>
#include <clc/integer/clc_clz.h>
diff --git a/libclc/clc/lib/generic/common/clc_degrees.cl b/libclc/clc/lib/generic/common/clc_degrees.cl
index b5b07df5075e..79b97f0ca75b 100644
--- a/libclc/clc/lib/generic/common/clc_degrees.cl
+++ b/libclc/clc/lib/generic/common/clc_degrees.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#define DEGREES_SINGLE_DEF(TYPE, LITERAL) \
diff --git a/libclc/clc/lib/generic/common/clc_radians.cl b/libclc/clc/lib/generic/common/clc_radians.cl
index 497263a15f51..3f013019ad97 100644
--- a/libclc/clc/lib/generic/common/clc_radians.cl
+++ b/libclc/clc/lib/generic/common/clc_radians.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#define __CLC_RADIANS_SINGLE_DEF(TYPE, LITERAL) \
diff --git a/libclc/clc/lib/generic/common/clc_smoothstep.cl b/libclc/clc/lib/generic/common/clc_smoothstep.cl
index cf143b8fac23..b409c7d7b644 100644
--- a/libclc/clc/lib/generic/common/clc_smoothstep.cl
+++ b/libclc/clc/lib/generic/common/clc_smoothstep.cl
@@ -5,7 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/shared/clc_clamp.h>
diff --git a/libclc/clc/lib/generic/common/clc_step.cl b/libclc/clc/lib/generic/common/clc_step.cl
index c21c27a3e940..721489948233 100644
--- a/libclc/clc/lib/generic/common/clc_step.cl
+++ b/libclc/clc/lib/generic/common/clc_step.cl
@@ -6,7 +6,5 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
-
#define __CLC_BODY <clc_step.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl b/libclc/clc/lib/generic/integer/clc_clz.cl
index 37ba4dbfd809..48c4400f199f 100644
--- a/libclc/clc/lib/generic/integer/clc_clz.cl
+++ b/libclc/clc/lib/generic/integer/clc_clz.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
diff --git a/libclc/clc/lib/generic/integer/clc_ctz.cl b/libclc/clc/lib/generic/integer/clc_ctz.cl
index 5cf4c19811db..42cfa90c3e66 100644
--- a/libclc/clc/lib/generic/integer/clc_ctz.cl
+++ b/libclc/clc/lib/generic/integer/clc_ctz.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/integer/clc_ctz.h>
#include <clc/internal/clc.h>
diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl
index 7c6aaffe6c5c..93088dc9f0b6 100644
--- a/libclc/clc/lib/generic/integer/clc_mad_sat.cl
+++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/integer/clc_add_sat.h>
#include <clc/integer/clc_mad24.h>
#include <clc/integer/clc_mul_hi.h>
diff --git a/libclc/clc/lib/generic/math/clc_cbrt.cl b/libclc/clc/lib/generic/math/clc_cbrt.cl
index 26869085933a..105f6329d5ba 100644
--- a/libclc/clc/lib/generic/math/clc_cbrt.cl
+++ b/libclc/clc/lib/generic/math/clc_cbrt.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_copysign.h>
#include <clc/math/clc_fabs.h>
diff --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl
index 0c9dc287aa0b..e7e4d6ad39ed 100644
--- a/libclc/clc/lib/generic/math/clc_cos.cl
+++ b/libclc/clc/lib/generic/math/clc_cos.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
diff --git a/libclc/clc/lib/generic/math/clc_erf.cl b/libclc/clc/lib/generic/math/clc_erf.cl
index 34c7d586131e..61a7c9d684aa 100644
--- a/libclc/clc/lib/generic/math/clc_erf.cl
+++ b/libclc/clc/lib/generic/math/clc_erf.cl
@@ -507,5 +507,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erf(half x) {
#endif
#define __CLC_FUNCTION __clc_erf
-#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_erfc.cl b/libclc/clc/lib/generic/math/clc_erfc.cl
index 7922807818ea..01dbcd0c39ae 100644
--- a/libclc/clc/lib/generic/math/clc_erfc.cl
+++ b/libclc/clc/lib/generic/math/clc_erfc.cl
@@ -518,5 +518,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erfc(half x) {
#endif
#define __CLC_FUNCTION __clc_erfc
-#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl
index 04e912ed9888..0c394ee19475 100644
--- a/libclc/clc/lib/generic/math/clc_exp10.cl
+++ b/libclc/clc/lib/generic/math/clc_exp10.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
diff --git a/libclc/clc/lib/generic/math/clc_fmod.cl b/libclc/clc/lib/generic/math/clc_fmod.cl
index 3cb01e67292e..f17757c9c370 100644
--- a/libclc/clc/lib/generic/math/clc_fmod.cl
+++ b/libclc/clc/lib/generic/math/clc_fmod.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_floor.h>
diff --git a/libclc/clc/lib/generic/math/clc_fract.cl b/libclc/clc/lib/generic/math/clc_fract.cl
index 7db43ef87871..681077cb9c89 100644
--- a/libclc/clc/lib/generic/math/clc_fract.cl
+++ b/libclc/clc/lib/generic/math/clc_fract.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_floor.h>
#include <clc/math/clc_fmin.h>
diff --git a/libclc/clc/lib/generic/math/clc_frexp.inc b/libclc/clc/lib/generic/math/clc_frexp.inc
index d212b6a1b337..763266bc0493 100644
--- a/libclc/clc/lib/generic/math/clc_frexp.inc
+++ b/libclc/clc/lib/generic/math/clc_frexp.inc
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/utils.h>
#if __CLC_FPSIZE == 32
diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl
index 6990be3083eb..c934ab29da91 100644
--- a/libclc/clc/lib/generic/math/clc_hypot.cl
+++ b/libclc/clc/lib/generic/math/clc_hypot.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/integer/clc_abs.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
diff --git a/libclc/clc/lib/generic/math/clc_ilogb.cl b/libclc/clc/lib/generic/math/clc_ilogb.cl
index c33ed9fe9b04..1ccc5754ca7f 100644
--- a/libclc/clc/lib/generic/math/clc_ilogb.cl
+++ b/libclc/clc/lib/generic/math/clc_ilogb.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
diff --git a/libclc/clc/lib/generic/math/clc_ldexp.cl b/libclc/clc/lib/generic/math/clc_ldexp.cl
index a5327c58e424..f9252a75ab4b 100644
--- a/libclc/clc/lib/generic/math/clc_ldexp.cl
+++ b/libclc/clc/lib/generic/math/clc_ldexp.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/integer/clc_add_sat.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_subnormal_config.h>
@@ -15,9 +14,7 @@
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_clamp.h>
-#define _CLC_DEF_ldexp _CLC_DEF __attribute__((weak))
-
-_CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
+_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
if (!__clc_fp32_subnormals_supported()) {
// This treats subnormals as zeros
@@ -90,7 +87,7 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
+_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
long l = __clc_as_ulong(x);
int e = (l >> 52) & 0x7ff;
long s = l & 0x8000000000000000;
@@ -125,14 +122,13 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) {
+_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) {
return (half)__clc_ldexp((float)x, n);
}
#endif
#define __CLC_FUNCTION __clc_ldexp
-#define __CLC_DEF_SPEC _CLC_DEF_ldexp
#define __CLC_ARG2_TYPE int
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index ffacfc17a4d0..2322ad54508f 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -279,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) {
return r;
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
- private, int)
-
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// ====================================================
@@ -585,8 +582,6 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
return r;
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double,
- private, int)
#endif
#ifdef cl_khr_fp16
@@ -597,11 +592,17 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) {
return (half)__clc_lgamma_r((float)x, iptr);
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half,
- private, int);
-
#endif
+#define __CLC_FUNCTION __clc_lgamma_r
+#define __CLC_ARG2_TYPE int
+#define __CLC_ADDRSPACE private
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_ADDRSPACE
+#undef __CLC_ARG2_TYPE
+#undef __CLC_FUNCTION
+
#define __CLC_ADDRSPACE global
#define __CLC_BODY <clc_lgamma_r.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_log.cl b/libclc/clc/lib/generic/math/clc_log.cl
index 7eb0180de971..74ee1ed71375 100644
--- a/libclc/clc/lib/generic/math/clc_log.cl
+++ b/libclc/clc/lib/generic/math/clc_log.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_log2.h>
diff --git a/libclc/clc/lib/generic/math/clc_log10.cl b/libclc/clc/lib/generic/math/clc_log10.cl
index 35489f467562..741d21b5d831 100644
--- a/libclc/clc/lib/generic/math/clc_log10.cl
+++ b/libclc/clc/lib/generic/math/clc_log10.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/tables.h>
diff --git a/libclc/clc/lib/generic/math/clc_log2.cl b/libclc/clc/lib/generic/math/clc_log2.cl
index d9d8cef54255..432ea5737bc2 100644
--- a/libclc/clc/lib/generic/math/clc_log2.cl
+++ b/libclc/clc/lib/generic/math/clc_log2.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/tables.h>
diff --git a/libclc/clc/lib/generic/math/clc_logb.cl b/libclc/clc/lib/generic/math/clc_logb.cl
index f571a11d0c38..879676eedff1 100644
--- a/libclc/clc/lib/generic/math/clc_logb.cl
+++ b/libclc/clc/lib/generic/math/clc_logb.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
diff --git a/libclc/clc/lib/generic/math/clc_nextafter.cl b/libclc/clc/lib/generic/math/clc_nextafter.cl
index 40e1b5a2c57f..48e28c08edf7 100644
--- a/libclc/clc/lib/generic/math/clc_nextafter.cl
+++ b/libclc/clc/lib/generic/math/clc_nextafter.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/relational/clc_isnan.h>
diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
index 3d371938e401..70d3d614a8d3 100644
--- a/libclc/clc/lib/generic/math/clc_pow.cl
+++ b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_fma.h>
diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl
index 074f212a31b7..5aa9560174b9 100644
--- a/libclc/clc/lib/generic/math/clc_pown.cl
+++ b/libclc/clc/lib/generic/math/clc_pown.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_fma.h>
diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl
index c596a552f07f..0556ec97d6f3 100644
--- a/libclc/clc/lib/generic/math/clc_powr.cl
+++ b/libclc/clc/lib/generic/math/clc_powr.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_fma.h>
diff --git a/libclc/clc/lib/generic/math/clc_remainder.cl b/libclc/clc/lib/generic/math/clc_remainder.cl
index 488dde73f67a..622f05f85100 100644
--- a/libclc/clc/lib/generic/math/clc_remainder.cl
+++ b/libclc/clc/lib/generic/math/clc_remainder.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_floor.h>
diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl
index 1505d1c3fb62..fd83ead06d89 100644
--- a/libclc/clc/lib/generic/math/clc_remquo.cl
+++ b/libclc/clc/lib/generic/math/clc_remquo.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_floor.h>
diff --git a/libclc/clc/lib/generic/math/clc_rsqrt.inc b/libclc/clc/lib/generic/math/clc_rsqrt.inc
index 4c04155a932c..07aad16f9191 100644
--- a/libclc/clc/lib/generic/math/clc_rsqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_rsqrt.inc
@@ -6,8 +6,7 @@
//
//===----------------------------------------------------------------------===//
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_rsqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rsqrt(__CLC_GENTYPE val) {
#pragma clang fp contract(fast)
return __CLC_FP_LIT(1.0) / __builtin_elementwise_sqrt(val);
}
diff --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl
index 0ff9739c6a84..741383f94c45 100644
--- a/libclc/clc/lib/generic/math/clc_sin.cl
+++ b/libclc/clc/lib/generic/math/clc_sin.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
diff --git a/libclc/clc/lib/generic/math/clc_sqrt.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc
index 61e341993f5c..e15dcf75ac3f 100644
--- a/libclc/clc/lib/generic/math/clc_sqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_sqrt.inc
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_sqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
return __builtin_elementwise_sqrt(val);
}
diff --git a/libclc/clc/lib/generic/math/clc_sw_fma.cl b/libclc/clc/lib/generic/math/clc_sw_fma.cl
index 550ca5e18f3f..606e4df320a8 100644
--- a/libclc/clc/lib/generic/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/generic/math/clc_sw_fma.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_as_type.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_abs.h>
#include <clc/integer/clc_clz.h>
diff --git a/libclc/clc/lib/generic/relational/clc_bitselect.cl b/libclc/clc/lib/generic/relational/clc_bitselect.cl
index 9c6e82e6be4b..b58fb8d3233f 100644
--- a/libclc/clc/lib/generic/relational/clc_bitselect.cl
+++ b/libclc/clc/lib/generic/relational/clc_bitselect.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#define __CLC_BODY <clc_bitselect.inc>
diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
index b3e2375e755a..5f96ef547764 100644
--- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
@@ -8,8 +8,9 @@
#include <clc/mem_fence/clc_mem_fence.h>
-_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
- int memory_order) {
+_CLC_OVERLOAD _CLC_DEF void
+__clc_mem_fence(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics) {
if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
__nvvm_membar_cta();
}
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 6cb37a38f06a..349c0f484513 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -8,7 +8,8 @@
#include <clc/synchronization/clc_work_group_barrier.h>
-_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
- int memory_order) {
+_CLC_OVERLOAD _CLC_DEF void
+__clc_work_group_barrier(int memory_scope, int memory_order,
+ __CLC_MemorySemantics memory_semantics) {
__syncthreads();
}
diff --git a/libclc/clc/lib/r600/math/clc_native_rsqrt.cl b/libclc/clc/lib/r600/math/clc_native_rsqrt.cl
index da001574afc0..cef106e3b4e9 100644
--- a/libclc/clc/lib/r600/math/clc_native_rsqrt.cl
+++ b/libclc/clc/lib/r600/math/clc_native_rsqrt.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
_CLC_OVERLOAD _CLC_DEF float __clc_native_rsqrt(float x) {
diff --git a/libclc/clc/lib/r600/math/clc_rsqrt_override.cl b/libclc/clc/lib/r600/math/clc_rsqrt_override.cl
index f20046b34117..5ef9ad541afb 100644
--- a/libclc/clc/lib/r600/math/clc_rsqrt_override.cl
+++ b/libclc/clc/lib/r600/math/clc_rsqrt_override.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
_CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) {
diff --git a/libclc/clc/lib/spirv/math/clc_fmax.cl b/libclc/clc/lib/spirv/math/clc_fmax.cl
index f63a9d00c642..101f35c7108c 100644
--- a/libclc/clc/lib/spirv/math/clc_fmax.cl
+++ b/libclc/clc/lib/spirv/math/clc_fmax.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
diff --git a/libclc/clc/lib/spirv/math/clc_fmin.cl b/libclc/clc/lib/spirv/math/clc_fmin.cl
index ddb0126035f8..9bef3a92d5ba 100644
--- a/libclc/clc/lib/spirv/math/clc_fmin.cl
+++ b/libclc/clc/lib/spirv/math/clc_fmin.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index 5cc202ddbaa8..aa8dd9859cd2 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -92,19 +92,35 @@ function(link_bc)
${ARGN}
)
- set( LINK_INPUT_ARG ${ARG_INPUTS} )
+ if( ARG_INTERNALIZE )
+ set( inputs_with_flag ${ARG_INPUTS} )
+ else()
+ # Add the --override flag for non-generic bitcode files so that their
+ # symbols can override definitions in generic bitcode files.
+ set( inputs_with_flag )
+ foreach( file IN LISTS ARG_INPUTS )
+ string( FIND ${file} "/generic/" is_generic )
+ if( is_generic LESS 0 )
+ list( APPEND inputs_with_flag "--override" )
+ endif()
+ list( APPEND inputs_with_flag ${file} )
+ endforeach()
+ endif()
+
if( WIN32 OR CYGWIN )
# Create a response file in case the number of inputs exceeds command-line
# character limits on certain platforms.
file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
# Turn it into a space-separate list of input files
- list( JOIN ARG_INPUTS " " RSP_INPUT )
+ list( JOIN inputs_with_flag " " RSP_INPUT )
file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
# Ensure that if this file is removed, we re-run CMake
set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${RSP_FILE}
)
set( LINK_INPUT_ARG "@${RSP_FILE}" )
+ else()
+ set( LINK_INPUT_ARG ${inputs_with_flag} )
endif()
if( ARG_INTERNALIZE )
diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h
index cf3baf28cb5f..a8841658598c 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/utils.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -10,9 +10,10 @@
#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_semantic.h>
#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
-_CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
+_CLC_INLINE int __opencl_get_memory_scope(cl_mem_fence_flags flag) {
int memory_scope = 0;
if (flag & CLK_GLOBAL_MEM_FENCE)
memory_scope |= __MEMORY_SCOPE_DEVICE;
@@ -21,4 +22,15 @@ _CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
return memory_scope;
}
+_CLC_INLINE __CLC_MemorySemantics
+__opencl_get_memory_semantics(cl_mem_fence_flags flag) {
+ if ((flag & CLK_LOCAL_MEM_FENCE) && (flag & CLK_GLOBAL_MEM_FENCE))
+ return __CLC_MEMORY_LOCAL | __CLC_MEMORY_GLOBAL;
+ if (flag & CLK_LOCAL_MEM_FENCE)
+ return __CLC_MEMORY_LOCAL;
+ if (flag & CLK_GLOBAL_MEM_FENCE)
+ return __CLC_MEMORY_GLOBAL;
+ __builtin_unreachable();
+}
+
#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 81216d6a26cf..963380761b46 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -11,9 +11,10 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = getCLCMemoryScope(flags);
+ int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
- __clc_mem_fence(memory_scope, memory_order);
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
// We don't have separate mechanism for read and write fences
diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
index c8322e602302..dd7d1507f5ad 100644
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
@@ -11,7 +11,8 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- int memory_scope = getCLCMemoryScope(flags);
+ int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
- __clc_work_group_barrier(memory_scope, memory_order);
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_work_group_barrier(memory_scope, memory_order, memory_semantics);
}
diff --git a/libclc/opencl/lib/generic/common/sign.cl b/libclc/opencl/lib/generic/common/sign.cl
index 7add739be6a9..5508cd7bfab1 100644
--- a/libclc/opencl/lib/generic/common/sign.cl
+++ b/libclc/opencl/lib/generic/common/sign.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/common/clc_sign.h>
#include <clc/opencl/common/sign.h>
diff --git a/libclc/opencl/lib/generic/common/smoothstep.cl b/libclc/opencl/lib/generic/common/smoothstep.cl
index fdf9a89dcbfd..84ed7417de3c 100644
--- a/libclc/opencl/lib/generic/common/smoothstep.cl
+++ b/libclc/opencl/lib/generic/common/smoothstep.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/common/clc_smoothstep.h>
#include <clc/opencl/common/smoothstep.h>
diff --git a/libclc/opencl/lib/generic/math/atan2.cl b/libclc/opencl/lib/generic/math/atan2.cl
index 7db630608867..5f4a9d991cd9 100644
--- a/libclc/opencl/lib/generic/math/atan2.cl
+++ b/libclc/opencl/lib/generic/math/atan2.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_atan2.h>
#include <clc/opencl/math/atan2.h>
diff --git a/libclc/opencl/lib/generic/math/atan2pi.cl b/libclc/opencl/lib/generic/math/atan2pi.cl
index ed57c920f1b2..4e577c775ebb 100644
--- a/libclc/opencl/lib/generic/math/atan2pi.cl
+++ b/libclc/opencl/lib/generic/math/atan2pi.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_atan2pi.h>
#include <clc/opencl/math/atan2pi.h>
diff --git a/libclc/opencl/lib/generic/math/log.cl b/libclc/opencl/lib/generic/math/log.cl
index 06209999bf2f..4862b81f1512 100644
--- a/libclc/opencl/lib/generic/math/log.cl
+++ b/libclc/opencl/lib/generic/math/log.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_log.h>
#include <clc/opencl/math/log.h>
diff --git a/libclc/opencl/lib/generic/math/log10.cl b/libclc/opencl/lib/generic/math/log10.cl
index 466b602e1896..6702b4485b51 100644
--- a/libclc/opencl/lib/generic/math/log10.cl
+++ b/libclc/opencl/lib/generic/math/log10.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_log10.h>
#include <clc/opencl/math/log10.h>
diff --git a/libclc/opencl/lib/generic/math/log2.cl b/libclc/opencl/lib/generic/math/log2.cl
index a52a52e28f63..d072b313f838 100644
--- a/libclc/opencl/lib/generic/math/log2.cl
+++ b/libclc/opencl/lib/generic/math/log2.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_log2.h>
#include <clc/opencl/math/log2.h>
diff --git a/libclc/opencl/lib/generic/math/nan.cl b/libclc/opencl/lib/generic/math/nan.cl
index 5597df8b2ac6..df0629fbf55a 100644
--- a/libclc/opencl/lib/generic/math/nan.cl
+++ b/libclc/opencl/lib/generic/math/nan.cl
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcmacro.h>
#include <clc/math/clc_nan.h>
#define __CLC_BODY <nan.inc>
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index e22ed870a7e6..19721574e405 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -11,9 +11,10 @@
#include <clc/opencl/synchronization/utils.h>
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
- int memory_scope = getCLCMemoryScope(flags);
+ int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
- __clc_mem_fence(memory_scope, memory_order);
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_mem_fence(memory_scope, memory_order, memory_semantics);
}
// We do not have separate mechanism for read and write fences.
diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
index c8322e602302..dd7d1507f5ad 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
@@ -11,7 +11,8 @@
#include <clc/synchronization/clc_work_group_barrier.h>
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
- int memory_scope = getCLCMemoryScope(flags);
+ int memory_scope = __opencl_get_memory_scope(flags);
int memory_order = __ATOMIC_SEQ_CST;
- __clc_work_group_barrier(memory_scope, memory_order);
+ __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags);
+ __clc_work_group_barrier(memory_scope, memory_order, memory_semantics);
}