Skip to content

Commit

Permalink
add type trait 'remove_restrict'
Browse files Browse the repository at this point in the history
fix #1472

Provide a type trait to remove __restrict__ from a type.
  • Loading branch information
psychocoderHPC authored and j-stephan committed Nov 29, 2021
1 parent c0510df commit e1308c8
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 7 deletions.
1 change: 1 addition & 0 deletions include/alpaka/alpaka.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
#include <alpaka/core/Hip.hpp>
#include <alpaka/core/OmpSchedule.hpp>
#include <alpaka/core/Positioning.hpp>
#include <alpaka/core/RemoveRestrict.hpp>
#include <alpaka/core/Unroll.hpp>
#include <alpaka/core/Unused.hpp>
#include <alpaka/core/Utility.hpp>
Expand Down
40 changes: 40 additions & 0 deletions include/alpaka/core/RemoveRestrict.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* Copyright 2021 Rene Widera
*
* This file is part of alpaka.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

#pragma once

#include <alpaka/core/BoostPredef.hpp>

namespace alpaka
{
//! Removes __restrict__ from a type
template<typename T>
struct remove_restrict
{
using type = T;
};

#if BOOST_COMP_MSVC
template<typename T>
struct remove_restrict<T* __restrict>
{
using type = T*;
};
#else
template<typename T>
struct remove_restrict<T* __restrict__>
{
using type = T*;
};
#endif

//! Helper to remove __restrict__ from a type
template<typename T>
using remove_restrict_t = typename remove_restrict<T>::type;
} // namespace alpaka
15 changes: 8 additions & 7 deletions include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
// Implementation details.
# include <alpaka/acc/AccGpuUniformCudaHipRt.hpp>
# include <alpaka/core/Decay.hpp>
# include <alpaka/core/RemoveRestrict.hpp>
# include <alpaka/core/Unused.hpp>
# include <alpaka/dev/DevUniformCudaHipRt.hpp>
# include <alpaka/kernel/Traits.hpp>
Expand Down Expand Up @@ -141,7 +142,7 @@ namespace alpaka
}

TKernelFnObj m_kernelFnObj;
std::tuple<std::decay_t<TArgs>...> m_args;
std::tuple<remove_restrict_t<std::decay_t<TArgs>>...> m_args;
};

namespace traits
Expand Down Expand Up @@ -227,7 +228,7 @@ namespace alpaka

// Get the size of the block shared dynamic memory.
auto const blockSharedMemDynSizeBytes = meta::apply(
[&](ALPAKA_DECAY_T(TArgs) const&... args) {
[&](remove_restrict_t<ALPAKA_DECAY_T(TArgs)> const&... args) {
return getBlockSharedMemDynSizeBytes<TAcc>(
task.m_kernelFnObj,
blockThreadExtent,
Expand All @@ -242,7 +243,7 @@ namespace alpaka
<< std::endl;
# endif
auto kernelName = uniform_cuda_hip::detail::
uniformCudaHipKernel<TAcc, TDim, TIdx, TKernelFnObj, std::decay_t<TArgs>...>;
uniformCudaHipKernel<TAcc, TDim, TIdx, TKernelFnObj, remove_restrict_t<std::decay_t<TArgs>>...>;

# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
Expand All @@ -268,7 +269,7 @@ namespace alpaka
// (MSVC). If not given by value, the kernel launch code does not copy the value but the pointer to the
// value location.
meta::apply(
[&](ALPAKA_DECAY_T(TArgs) const&... args)
[&](remove_restrict_t<ALPAKA_DECAY_T(TArgs)> const&... args)
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
kernelName<<<
Expand Down Expand Up @@ -357,7 +358,7 @@ namespace alpaka

// Get the size of the block shared dynamic memory.
auto const blockSharedMemDynSizeBytes = meta::apply(
[&](ALPAKA_DECAY_T(TArgs) const&... args) {
[&](remove_restrict_t<ALPAKA_DECAY_T(TArgs)> const&... args) {
return getBlockSharedMemDynSizeBytes<TAcc>(
task.m_kernelFnObj,
blockThreadExtent,
Expand All @@ -373,7 +374,7 @@ namespace alpaka
# endif

auto kernelName = uniform_cuda_hip::detail::
uniformCudaHipKernel<TAcc, TDim, TIdx, TKernelFnObj, std::decay_t<TArgs>...>;
uniformCudaHipKernel<TAcc, TDim, TIdx, TKernelFnObj, remove_restrict_t<std::decay_t<TArgs>>...>;
# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
// hipFuncAttributes not ported from HIP to HIP.
// TODO why this is currently not possible
Expand All @@ -396,7 +397,7 @@ namespace alpaka

// Enqueue the kernel execution.
meta::apply(
[&](ALPAKA_DECAY_T(TArgs) const&... args)
[&](remove_restrict_t<ALPAKA_DECAY_T(TArgs)> const&... args)
{
# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
kernelName<<<
Expand Down

0 comments on commit e1308c8

Please sign in to comment.