Program Listing for File memory.hpp¶

↰ Return to documentation for file (include/ripple/utility/memory.hpp)
#ifndef RIPPLE_UTILITY_MEMORY_HPP
#define RIPPLE_UTILITY_MEMORY_HPP

#include "portability.hpp"
#include <cassert>
#include <cstdint>

namespace ripple {

ripple_all static inline auto
offset_ptr(const void* ptr, uint32_t amount) noexcept -> void* {
  return reinterpret_cast<void*>(uintptr_t(ptr) + amount);
}

ripple_all static inline auto
align_ptr(const void* ptr, size_t alignment) noexcept -> void* {
  assert(
    !(alignment & (alignment - 1)) &&
    "Alignment must be a power of two for linear allocation!");
  return reinterpret_cast<void*>(
    (uintptr_t(ptr) + alignment - 1) & ~(alignment - 1));
}

namespace gpu {

/*==--- [device to device]--------------------------------------------------==*/

template <typename DevPtr>
static inline auto memcpy_device_to_device(
  DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpy(dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice)));
}

template <typename DevPtr>
static inline auto memcpy_device_to_device_async(
  DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpyAsync(dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice)));
}

template <typename DevPtr>
static inline auto memcpy_device_to_device_async(
  DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes, GpuStream stream)
  -> void {
  ripple_check_cuda_result(ripple_if_cuda(cudaMemcpyAsync(
    dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice, stream)));
}

/*==--- [host to device] ---------------------------------------------------==*/

template <typename DevPtr, typename HostPtr>
static inline auto
memcpy_host_to_device(DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes)
  -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpy(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice)));
}

template <typename DevPtr, typename HostPtr>
static inline auto memcpy_host_to_device_async(
  DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpyAsync(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice)));
}

template <typename DevPtr, typename HostPtr>
static inline auto memcpy_host_to_device_async(
  DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes, GpuStream stream)
  -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpyAsync(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice, stream)));
}

/*==--- [device to host] ---------------------------------------------------==*/

template <typename HostPtr, typename DevPtr>
static inline auto
memcpy_device_to_host(HostPtr* host_ptr, const DevPtr* dev_ptr, size_t bytes)
  -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpy(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost)));
}

template <typename HostPtr, typename DevPtr>
static inline auto memcpy_device_to_host_async(
  HostPtr* host_ptr, const DevPtr* dev_ptr, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpyAsync(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost)));
}

template <typename HostPtr, typename DevPtr>
static inline auto memcpy_device_to_host_async(
  HostPtr*         host_ptr,
  const DevPtr*    dev_ptr,
  size_t           bytes,
  const GpuStream& stream) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaMemcpyAsync(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost, stream)));
}

/*==--- [allocation device] ------------------------------------------------==*/

template <typename Ptr>
static inline auto allocate_device(Ptr** dev_ptr, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(cudaMalloc((void**)dev_ptr, bytes)));
}

template <typename Ptr>
static inline auto free_device(Ptr* ptr) -> void {
  ripple_check_cuda_result(ripple_if_cuda(cudaFree(ptr)));
}

} // namespace gpu

namespace cpu {

template <typename Ptr>
static inline auto allocate_host_pinned(Ptr** host_ptr, size_t bytes) -> void {
  ripple_check_cuda_result(ripple_if_cuda(
    cudaHostAlloc((void**)host_ptr, bytes, cudaHostAllocPortable)));
}

template <typename Ptr>
static inline auto free_host_pinned(Ptr* ptr) -> void {
  ripple_check_cuda_result(ripple_if_cuda(cudaFreeHost(ptr)));
}

} // namespace cpu
} // namespace ripple

#endif // RIPPLE_UTILITY_MEMORY_HPP
Program Listing for File memory.hpp¶

Docs

Tutorials

Examples