diff options
-rw-r--r-- | include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 | ||||
-rw-r--r-- | include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h | 4 |
2 files changed, 1 insertions, 5 deletions
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index b7756df15f62..1532d1362778 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -440,7 +440,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& #endif // Practically this is the better value that was found - auto __iters_per_witem = decltype(__wgroup_size)(16); + constexpr decltype(__wgroup_size) __iters_per_witem = 16; auto __size_per_wg = __iters_per_witem * __wgroup_size; auto __n_groups = (__n - 1) / __size_per_wg + 1; // Storage for the results of scan for each workgroup diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index 4895be8cec49..241dec088393 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -543,7 +543,6 @@ struct __scan __local_acc[__local_id] = __partial_sums; __item.barrier(sycl::access::fence_space::local_space); __adder = __local_acc[__wgroup_size - 1]; - __item.barrier(sycl::access::fence_space::local_space); if (__adjusted_global_id + __shift < __n) __gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id); @@ -627,7 +626,6 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va __local_acc[__local_id] = __data_acc(__adjusted_global_id, __acc); else __local_acc[__local_id] = _Tp{0}; // for plus only - __item.barrier(sycl::access::fence_space::local_space); // the result of __unary_op must be convertible to _Tp _Tp __old_value = __unary_op(__local_id, __local_acc); @@ -635,13 +633,11 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va __old_value = __bin_op(__adder, __old_value); else if (__adjusted_global_id == 0) __use_init(__init, __old_value, __bin_op); - __item.barrier(sycl::access::fence_space::local_space); __local_acc[__local_id] = sycl::ONEAPI::inclusive_scan(__item.get_group(), __old_value, __bin_op); __item.barrier(sycl::access::fence_space::local_space); __adder = __local_acc[__wgroup_size - 1]; - __item.barrier(sycl::access::fence_space::local_space); if (__adjusted_global_id + __shift < __n) __gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id); |