• Docs >
  • Program Listing for File device_block.hpp
Shortcuts

Program Listing for File device_block.hpp

Return to documentation for file (include/ripple/container/device_block.hpp)

#ifndef RIPPLE_CONTAINER_DEVICE_BLOCK_HPP
#define RIPPLE_CONTAINER_DEVICE_BLOCK_HPP

#include "block_traits.hpp"
#include "block_memory_properties.hpp"
#include <ripple/allocation/multiarch_allocator.hpp>
#include <ripple/iterator/block_iterator.hpp>
#include <ripple/utility/memory.hpp>

namespace ripple {

template <typename T, size_t Dimensions>
class DeviceBlock {
  // clang-format off
  using Traits    = BlockTraits<DeviceBlock<T, Dimensions>>;
  using Allocator = typename Traits::Allocator;
  using Space     = typename Traits::Space;
  using Ptr       = void*;
  using Value     = typename Traits::Value;
  using Iter      = typename Traits::Iter;
  using ConstIter = const Iter;
  using HostBlock = HostBlock<T, Dimensions>;
  using Stream    = GpuStream;
  // clang-format on

  friend HostBlock;

  template <typename Type, size_t Dims>
  friend struct Block;

 public:
  // clang-format off
  using Padding      = typename Space::Padding;
  using AllocatorPtr = MultiarchAllocator*;
  // clang-format on

  friend auto swap(DeviceBlock& lhs, DeviceBlock& rhs) noexcept -> void {
    using std::swap;
    swap(lhs.data_, rhs.data_);
    swap(lhs.allocator_, rhs.allocator_);
    swap(lhs.mem_props_, rhs.mem_props_);
    swap(lhs.space_, rhs.space_);
  }

  /*==--- [construction] ---------------------------------------------------==*/

  DeviceBlock() noexcept {
    gpu::create_stream(&stream_);
  }

  DeviceBlock(Stream stream, AllocatorPtr allocator = nullptr) noexcept
  : allocator_{allocator}, stream_{stream} {}

  DeviceBlock(AllocatorPtr allocator) noexcept : allocator_{allocator} {
    gpu::create_stream(&stream_);
  }

  DeviceBlock(Stream stream) noexcept : stream_{stream} {}

  ~DeviceBlock() noexcept {
    cleanup();
  }

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  DeviceBlock(Sizes&&... sizes) : space_{ripple_forward(sizes)...} {
    gpu::create_stream(&stream_);
    allocate();
  }

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  DeviceBlock(Padding padding, Sizes&&... sizes)
  : space_{padding, ripple_forward(sizes)...} {
    gpu::create_stream(&stream_);
    allocate();
  }

  DeviceBlock(const DeviceBlock& other)
  : space_{other.space_}, stream_{other.stream_} {
    gpu::create_stream(&stream_);
    allocate();
    copy_from_device(other);
  }

  DeviceBlock(DeviceBlock&& other) noexcept : space_{other.space_} {
    data_       = other.data_;
    device_id_  = other.device_id_;
    stream_     = other.stream_;
    other.data_ = nullptr;
    mem_props_  = other.mem_props_;
    other.mem_props_.reset();
  }

  DeviceBlock(const HostBlock& other)
  : allocator_{other.allocator_}, space_{other.space_} {
    gpu::create_stream(&stream_);
    allocate();
    copy_from_host(other);
  }

  /*==--- [operator overloads] ---------------------------------------------==*/

  auto operator=(const DeviceBlock& other) -> DeviceBlock& {
    stream_ = other.stream_;
    space_  = other.space_;
    reallocate();
    copy_from_device(other);
    return *this;
  }

  auto operator=(const HostBlock& other) -> DeviceBlock& {
    space_     = other._space;
    allocator_ = other.allocator_;
    gpu::create_stream(&stream_);
    reallocate();
    copy_from_host(other);
    return *this;
  }

  template <typename... Indices>
  ripple_all auto operator()(Indices&&... is) noexcept -> Iter {
    return Iter{
      Allocator::create(
        data_, space_, ripple_forward(is) + space_.padding()...),
      space_};
  }

  template <typename... Indices>
  ripple_all auto
  operator()(Indices&&... is) const noexcept -> ConstIter {
    return ConstIter{
      Allocator::create(
        data_, space_, ripple_forward(is) + space_.padding()...),
      space_};
  }

  /*==--- [interface] ------------------------------------------------------==*/

  auto
  as_host(BlockOpKind op_kind = BlockOpKind::synchronous) const -> HostBlock {
    return HostBlock{*this, op_kind};
  }

  auto copy_data(const HostBlock& other) noexcept -> void {
    copy_from_host(other);
  }

  ripple_all auto begin(int padding_mod = 0) noexcept -> Iter {
    // Modify the iteration space based on the padding parameter:
    auto space = space_;
    if (padding_mod != 0) {
      space.padding() = space_.padding() - padding_mod;
      unrolled_for<Dimensions>([&](auto i) {
        space.resize_dim(
          i, space_.internal_size(i) + static_cast<int>(2 * padding_mod));
      });
    }
    auto it = ConstIter{Allocator::create(data_, space), space};
    unrolled_for<Dimensions>([&](auto dim) { it.shift(dim, space.padding()); });
    /*
        auto it = Iter{Allocator::create(data_, space_), space_};
        unrolled_for<Dimensions>(
          [&](auto dim) { it.shift(dim, space_.padding()); });
    */
    return it;
  }

  ripple_all auto
  begin(int padding_mod = 0) const noexcept -> ConstIter {
    // Modify the iteration space based on the padding parameter:
    auto space = space_;
    if (padding_mod != 0) {
      space.padding() = space_.padding() - padding_mod;
      unrolled_for<Dimensions>([&](auto i) {
        space.resize_dim(
          i, space_.internal_size(i) + static_cast<int>(2 * padding_mod));
      });
    }
    auto it = ConstIter{Allocator::create(data_, space), space};
    unrolled_for<Dimensions>([&](auto dim) { it.shift(dim, space.padding()); });
    /*
        auto it = ConstIter{Allocator::create(data_, space_), space_};
        unrolled_for<Dimensions>(
          [&](auto dim) { it.shift(dim, space_.padding()); });
    */
    return it;
  }

  auto reallocate() -> void {
    cleanup();
    allocate();
  }

  template <typename Dim>
  auto resize_dim(Dim&& dim, size_t size) noexcept -> void {
    space_[dim] = size;
  }

  template <typename... Sizes>
  auto resize(Sizes&&... sizes) -> void {
    space_.resize(ripple_forward(sizes)...);
    reallocate();
  }

  auto size() const noexcept -> size_t {
    return space_.internal_size();
  }

  template <typename Dim>
  auto size(Dim&& dim) const -> size_t {
    return space_.internal_size(ripple_forward(dim));
  }

  template <typename Dim>
  constexpr auto pitch(Dim&& dim) const noexcept -> size_t {
    return space_.size(ripple_forward(dim));
  }

  constexpr auto dimensions() const noexcept -> size_t {
    return Dimensions;
  }

  auto set_padding(Padding padding) noexcept -> void {
    space_.padding() = padding;
  }

  auto padding() const noexcept -> Padding {
    return space_.padding();
  }

  auto mem_requirement() const noexcept -> size_t {
    return Allocator::allocation_size(space_.size());
  }

  auto set_device_id(uint32_t device_id) noexcept -> void {
    device_id_ = device_id;
  }

  auto device_id() const noexcept -> uint32_t {
    return device_id_;
  }

  auto stream() const noexcept -> Stream {
    return stream_;
  }

  auto set_stream(Stream stream) noexcept -> void {
    stream_ = stream;
  }

  auto destroy_stream() noexcept -> void {
    gpu::set_device(device_id_);
    gpu::destroy_stream(stream_);
  }

  template <typename Block>
  constexpr auto get_copy_type() const noexcept -> cudaMemcpyKind {
    return is_host_block_v<Block> ? cudaMemcpyDeviceToHost
                                  : cudaMemcpyDeviceToDevice;
  }

 private:
  Ptr              data_      = nullptr;
  AllocatorPtr     allocator_ = nullptr;
  Space            space_;
  Stream           stream_;
  uint32_t         device_id_ = 0;
  BlockMemoryProps mem_props_;

  auto allocate() -> void {
    // Can only allocate if the memory is not allocated, and if we own it.
    if (data_ == nullptr && !mem_props_.allocated) {
      gpu::set_device(device_id_);
      if (allocator_ != nullptr) {
        data_ = allocator_->gpu_allocator(device_id_)
                  .alloc(mem_requirement(), Traits::alignment);
      } else {
        gpu::allocate_device(
          reinterpret_cast<void**>(&data_), mem_requirement());
      }
      mem_props_.allocated = true;
      mem_props_.must_free = true;
    }
  }

  auto cleanup() -> void {
    if (data_ != nullptr && mem_props_.must_free) {
      gpu::set_device(device_id_);
      if (allocator_ != nullptr) {
        allocator_->gpu_allocator(device_id_).free(data_);
      } else {
        gpu::free_device(data_);
      }
      data_                = nullptr;
      mem_props_.must_free = false;
      mem_props_.allocated = false;
    }
  }

  auto copy_from_host(const HostBlock& other) noexcept -> void {
    const auto alloc_size = Allocator::allocation_size(space_.size());
    gpu::set_device(device_id_);
    gpu::memcpy_host_to_device_async(data_, other.data_, alloc_size, stream_);
  }

  auto copy_from_device(const DeviceBlock& other) noexcept -> void {
    const auto alloc_size = Allocator::allocation_size(space_.size());
    gpu::set_device(device_id_);
    gpu::memcpy_device_to_device_async(
      data_, other.data_, alloc_size, other.stream());
  }
};

} // namespace ripple

#endif // RIPPLE_CONTAINER_DEVICE_BLOCK_HPP

Docs

Access comprehensive developer documentation for Ripple

View Docs

Tutorials

Get tutorials to help with understand all features

View Tutorials

Examples

Find examples to help get started

View Examples