aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeniy Pavlov <evgeniy.pavlov@intel.com>2021-02-15 14:24:25 +0300
committerGitHub <noreply@github.com>2021-02-15 14:24:25 +0300
commit9dd50af416dfcc4a688a331459d88b066ae2884f (patch)
treea7844098be84ac1a404239be918b227ae414842f
parentExtend CI tests (#110) (diff)
downloadllvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.tar.gz
llvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.tar.bz2
llvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.zip
Remove unnecessary barriers and add constexpr attribute in scan brick (#119)
* Add constexpr attribute to__iters_per_witem variable Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com> * Remove unnesessary barriers Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com> * Remove auto to decltype Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com>
-rw-r--r--include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h2
-rw-r--r--include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h4
2 files changed, 1 insertions, 5 deletions
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
index b7756df15f62..1532d1362778 100644
--- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
+++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
@@ -440,7 +440,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&&
#endif
// Practically this is the better value that was found
- auto __iters_per_witem = decltype(__wgroup_size)(16);
+ constexpr decltype(__wgroup_size) __iters_per_witem = 16;
auto __size_per_wg = __iters_per_witem * __wgroup_size;
auto __n_groups = (__n - 1) / __size_per_wg + 1;
// Storage for the results of scan for each workgroup
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
index 4895be8cec49..241dec088393 100644
--- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
+++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
@@ -543,7 +543,6 @@ struct __scan
__local_acc[__local_id] = __partial_sums;
__item.barrier(sycl::access::fence_space::local_space);
__adder = __local_acc[__wgroup_size - 1];
- __item.barrier(sycl::access::fence_space::local_space);
if (__adjusted_global_id + __shift < __n)
__gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id);
@@ -627,7 +626,6 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va
__local_acc[__local_id] = __data_acc(__adjusted_global_id, __acc);
else
__local_acc[__local_id] = _Tp{0}; // for plus only
- __item.barrier(sycl::access::fence_space::local_space);
// the result of __unary_op must be convertible to _Tp
_Tp __old_value = __unary_op(__local_id, __local_acc);
@@ -635,13 +633,11 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va
__old_value = __bin_op(__adder, __old_value);
else if (__adjusted_global_id == 0)
__use_init(__init, __old_value, __bin_op);
- __item.barrier(sycl::access::fence_space::local_space);
__local_acc[__local_id] = sycl::ONEAPI::inclusive_scan(__item.get_group(), __old_value, __bin_op);
__item.barrier(sycl::access::fence_space::local_space);
__adder = __local_acc[__wgroup_size - 1];
- __item.barrier(sycl::access::fence_space::local_space);
if (__adjusted_global_id + __shift < __n)
__gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id);