From 9dd50af416dfcc4a688a331459d88b066ae2884f Mon Sep 17 00:00:00 2001 From: Evgeniy Pavlov Date: Mon, 15 Feb 2021 14:24:25 +0300 Subject: Remove unnecessary barriers and add constexpr attribute in scan brick (#119) * Add constexpr attribute to__iters_per_witem variable Signed-off-by: Pavlov, Evgeniy * Remove unnesessary barriers Signed-off-by: Pavlov, Evgeniy * Remove auto to decltype Signed-off-by: Pavlov, Evgeniy --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 +- include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index b7756df15f62..1532d1362778 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -440,7 +440,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& #endif // Practically this is the better value that was found - auto __iters_per_witem = decltype(__wgroup_size)(16); + constexpr decltype(__wgroup_size) __iters_per_witem = 16; auto __size_per_wg = __iters_per_witem * __wgroup_size; auto __n_groups = (__n - 1) / __size_per_wg + 1; // Storage for the results of scan for each workgroup diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index 4895be8cec49..241dec088393 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -543,7 +543,6 @@ struct __scan __local_acc[__local_id] = __partial_sums; __item.barrier(sycl::access::fence_space::local_space); __adder = __local_acc[__wgroup_size - 1]; - __item.barrier(sycl::access::fence_space::local_space); if (__adjusted_global_id + __shift < __n) __gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id); @@ -627,7 +626,6 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus