Program Listing for File memory.hpp¶
↰ Return to documentation for file (include/ripple/utility/memory.hpp
)
#ifndef RIPPLE_UTILITY_MEMORY_HPP
#define RIPPLE_UTILITY_MEMORY_HPP
#include "portability.hpp"
#include <cassert>
#include <cstdint>
namespace ripple {
ripple_all static inline auto
offset_ptr(const void* ptr, uint32_t amount) noexcept -> void* {
return reinterpret_cast<void*>(uintptr_t(ptr) + amount);
}
ripple_all static inline auto
align_ptr(const void* ptr, size_t alignment) noexcept -> void* {
assert(
!(alignment & (alignment - 1)) &&
"Alignment must be a power of two for linear allocation!");
return reinterpret_cast<void*>(
(uintptr_t(ptr) + alignment - 1) & ~(alignment - 1));
}
namespace gpu {
/*==--- [device to device]--------------------------------------------------==*/
template <typename DevPtr>
static inline auto memcpy_device_to_device(
DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpy(dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice)));
}
template <typename DevPtr>
static inline auto memcpy_device_to_device_async(
DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpyAsync(dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice)));
}
template <typename DevPtr>
static inline auto memcpy_device_to_device_async(
DevPtr* dev_ptr_out, const DevPtr* dev_ptr_in, size_t bytes, GpuStream stream)
-> void {
ripple_check_cuda_result(ripple_if_cuda(cudaMemcpyAsync(
dev_ptr_out, dev_ptr_in, bytes, cudaMemcpyDeviceToDevice, stream)));
}
/*==--- [host to device] ---------------------------------------------------==*/
template <typename DevPtr, typename HostPtr>
static inline auto
memcpy_host_to_device(DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes)
-> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpy(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice)));
}
template <typename DevPtr, typename HostPtr>
static inline auto memcpy_host_to_device_async(
DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpyAsync(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice)));
}
template <typename DevPtr, typename HostPtr>
static inline auto memcpy_host_to_device_async(
DevPtr* dev_ptr, const HostPtr* host_ptr, size_t bytes, GpuStream stream)
-> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpyAsync(dev_ptr, host_ptr, bytes, cudaMemcpyHostToDevice, stream)));
}
/*==--- [device to host] ---------------------------------------------------==*/
template <typename HostPtr, typename DevPtr>
static inline auto
memcpy_device_to_host(HostPtr* host_ptr, const DevPtr* dev_ptr, size_t bytes)
-> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpy(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost)));
}
template <typename HostPtr, typename DevPtr>
static inline auto memcpy_device_to_host_async(
HostPtr* host_ptr, const DevPtr* dev_ptr, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpyAsync(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost)));
}
template <typename HostPtr, typename DevPtr>
static inline auto memcpy_device_to_host_async(
HostPtr* host_ptr,
const DevPtr* dev_ptr,
size_t bytes,
const GpuStream& stream) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaMemcpyAsync(host_ptr, dev_ptr, bytes, cudaMemcpyDeviceToHost, stream)));
}
/*==--- [allocation device] ------------------------------------------------==*/
template <typename Ptr>
static inline auto allocate_device(Ptr** dev_ptr, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(cudaMalloc((void**)dev_ptr, bytes)));
}
template <typename Ptr>
static inline auto free_device(Ptr* ptr) -> void {
ripple_check_cuda_result(ripple_if_cuda(cudaFree(ptr)));
}
} // namespace gpu
namespace cpu {
template <typename Ptr>
static inline auto allocate_host_pinned(Ptr** host_ptr, size_t bytes) -> void {
ripple_check_cuda_result(ripple_if_cuda(
cudaHostAlloc((void**)host_ptr, bytes, cudaHostAllocPortable)));
}
template <typename Ptr>
static inline auto free_host_pinned(Ptr* ptr) -> void {
ripple_check_cuda_result(ripple_if_cuda(cudaFreeHost(ptr)));
}
} // namespace cpu
} // namespace ripple
#endif // RIPPLE_UTILITY_MEMORY_HPP