• Docs >
  • Program Listing for File host_block.hpp
Shortcuts

Program Listing for File host_block.hpp

Return to documentation for file (include/ripple/container/host_block.hpp)

#ifndef RIPPLE_CONTAINER_HOST_BLOCK_HPP
#define RIPPLE_CONTAINER_HOST_BLOCK_HPP

#include "block_memory_properties.hpp"
#include "block_traits.hpp"
#include <ripple/allocation/multiarch_allocator.hpp>
#include <ripple/iterator/block_iterator.hpp>
#include <ripple/utility/memory.hpp>

namespace ripple {

template <typename T, size_t Dimensions>
class HostBlock {
  //==--- [traits] ---------------------------------------------------------==//
  // clang-format off
  using Traits      = BlockTraits<DeviceBlock<T, Dimensions>>;
  using Allocator   = typename Traits::Allocator;
  using Space       = typename Traits::Space;
  using Ptr         = void*;
  using Value       = typename Traits::Value;
  using Iter        = typename Traits::Iter;
  using ConstIter   = const Iter;
  using DeviceBlock = DeviceBlock<T, Dimensions>;
  using Stream      = cudaStream_t;
  // clang-format on

  friend DeviceBlock;

  template <typename Type, size_t Dims>
  friend struct Block;

 public:
  // clang-format off
  using Padding      = typename Space::Padding;
  using AllocatorPtr = MultiarchAllocator*;
  // clang-format on

  friend auto swap(HostBlock& lhs, HostBlock& rhs) noexcept -> void {
    using std::swap;
    swap(lhs.data_, rhs.data_);
    swap(lhs.allocator_, rhs.allocator_);
    swap(lhs.mem_props_, rhs.mem_props_);
    swap(lhs.space_, rhs.space_);
  }

  /*==--- [construction] ---------------------------------------------------==*/

  HostBlock() noexcept = default;

  HostBlock(MultiarchAllocator* allocator) noexcept : allocator_{allocator_} {}

  ~HostBlock() noexcept {
    cleanup();
  }

  //==--- [synchronous constuction] -----------------------------------------=//

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  HostBlock(Sizes&&... sizes) noexcept : space_{ripple_forward(sizes)...} {
    allocate();
  }

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  HostBlock(Padding padding, Sizes&&... sizes)
  : space_{padding, ripple_forward(sizes)...} {
    allocate();
  }

  HostBlock(const HostBlock& other) : space_{other.space_} {
    const auto bytes = Allocator::allocation_size(space_.size());
    mem_props_       = other.mem_props_;
    std::memcpy(data_, other.data_, bytes);
  }

  HostBlock(HostBlock&& other) noexcept : space_{other.space_} {
    data_       = other.data_;
    other.data_ = nullptr;
    mem_props_  = other.mem_props_;
    other.mem_props_.reset();
  }

  HostBlock(
    const DeviceBlock& other, BlockOpKind op_kind = BlockOpKind::synchronous)
  : allocator_{other.allocator_}, space_{other.space_} {
    set_op_kind(op_kind);
    reallocate();
    copy_from_device(other);
  }

  /*==--- [asynchrnous construction] ---------------------------------------==*/

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  HostBlock(BlockOpKind op_kind, Sizes&&... sizes)
  : space_{ripple_forward(sizes)...} {
    set_op_kind(op_kind);
    allocate();
  }

  template <
    typename... Sizes,
    all_arithmetic_size_enable_t<Dimensions, Sizes...> = 0>
  HostBlock(BlockOpKind op_kind, size_t padding, Sizes&&... sizes)
  : space_{padding, ripple_forward(sizes)...} {
    set_op_kind(op_kind);
    allocate();
  }

  /*==--- [operator overloads] ---------------------------------------------==*/

  auto operator=(const HostBlock& other) -> HostBlock& {
    space_ = other.space_;
    reallocate();
    const auto bytes = Allocator::allocation_size(space_.size());
    std::memcpy(data_, other.data_, bytes);
    return *this;
  }

  auto operator=(HostBlock&& other) noexcept -> HostBlock& {
    space_      = other.space_;
    data_       = other.data_;
    other.data_ = nullptr;
    mem_props_  = other.mem_props_;
    other.mem_props_.reset();
    return *this;
  }

  auto operator=(const DeviceBlock& other) -> HostBlock& {
    set_op_kind(BlockOpKind::synchronous);
    allocator_ = other.allocator_;
    space_     = other.space_;
    reallocate();
    copy_from_device(other);
    return *this;
  }

  template <typename... Indices>
  auto operator()(Indices&&... is) noexcept -> Iter {
    return Iter{
      Allocator::create(data_, space_, ripple_forward(is) + padding()...),
      space_};
  }

  template <typename... Indices>
  auto operator()(Indices&&... is) const noexcept -> ConstIter {
    return ConstIter{
      Allocator::create(data_, space_, ripple_forward(is) + padding()...),
      space_};
  }

  /*==--- [interface] ------------------------------------------------------==*/

  auto as_device() const -> DeviceBlock {
    return DeviceBlock{*this};
  }

  auto copy_data(const DeviceBlock& other) noexcept -> void {
    copy_from_device(other);
  }

  auto begin() noexcept -> Iter {
    auto it = Iter{Allocator::create(data_, space_), space_};
    shift_iterator(it);
    return it;
  }

  auto begin() const -> ConstIter {
    auto it = ConstIter{Allocator::create(data_, space_), space_};
    shift_iterator(it);
    return it;
  }

  auto reallocate() -> void {
    cleanup();
    allocate();
  }

  template <typename... Args>
  auto reallocate_and_init(Args&&... args) -> void {
    cleanup();
    allocate_and_init(ripple_forward(args)...);
  }

  template <typename Dim>
  auto resize_dim(Dim&& dim, size_t size) noexcept -> void {
    space_[dim] = size;
  }

  template <typename... Sizes>
  auto resize(Sizes&&... sizes) -> void {
    space_.resize(ripple_forward(sizes)...);
    reallocate();
  }

  auto size() const noexcept -> size_t {
    return space_.internal_size();
  }

  template <typename Dim>
  auto size(Dim&& dim) const noexcept -> size_t {
    return space_.internal_size(ripple_forward(dim));
  }

  template <typename Dim>
  constexpr auto pitch(Dim&& dim) const noexcept -> size_t {
    return space_.size(ripple_forward(dim));
  }

  constexpr auto dimensions() const noexcept -> size_t {
    return Dimensions;
  }

  auto set_op_kind(BlockOpKind op_kind) noexcept -> void {
    if (op_kind == BlockOpKind::asynchronous) {
      mem_props_.pinned     = true;
      mem_props_.async_copy = true;
      return;
    }
    mem_props_.pinned     = false;
    mem_props_.async_copy = false;
  }

  auto set_padding(Padding padding) noexcept -> void {
    space_.padding() = padding;
  }

  auto padding() const noexcept -> Padding {
    return space_.padding();
  }

  auto mem_requirement() const noexcept -> size_t {
    return Allocator::allocation_size(space_.size());
  }

  auto stream() const -> GpuStream {
    return default_gpu_stream;
  }

  template <typename Block>
  constexpr auto get_copy_type() const -> cudaMemcpyKind {
    return is_host_block_v<Block> ? cudaMemcpyHostToHost
                                  : cudaMemcpyHostToDevice;
  }

 private:
  Ptr              data_      = nullptr;
  AllocatorPtr     allocator_ = nullptr;
  Space            space_;
  BlockMemoryProps mem_props_;

  auto allocate() -> void {
    if (data_ == nullptr && !mem_props_.allocated) {
      if (allocator_ != nullptr) {
        data_ = allocator_->cpu_allocator().alloc(
          mem_requirement(), Traits::alignment);
      } else if (mem_props_.pinned) {
        cpu::allocate_host_pinned(
          reinterpret_cast<void**>(&data_), mem_requirement());
      } else {
        data_ = malloc(mem_requirement());
      }
      mem_props_.must_free = true;
      mem_props_.allocated = true;
    }
  }

  template <typename... Args>
  auto allocate_and_init(Args&&... args) -> void {
    allocate();
    invoke(
      *this,
      [](auto&& it, auto&&... as) {
        using ItType = std::decay_t<decltype(*it)>;
        new (&(*it)) ItType{ripple_forward(as)...};
      },
      ripple_forward(args)...);
  }

  auto cleanup() -> void {
    if (data_ != nullptr && mem_props_.must_free && mem_props_.allocated) {
      if (allocator_ != nullptr) {
        allocator_->cpu_allocator().free(data_);
      } else if (mem_props_.pinned) {
        cpu::free_host_pinned(data_);
      } else {
        free(data_);
      }
      data_                = nullptr;
      mem_props_.allocated = false;
      mem_props_.must_free = false;
    }
  }

  auto copy_from_device(const DeviceBlock& other) noexcept -> void {
    const auto alloc_size = Allocator::allocation_size(space_.size());
    gpu::memcpy_device_to_host_async(
      data_, other.data_, alloc_size, other.stream());
    if (!mem_props_.async_copy) {
      gpu::synchronize_stream(other.stream());
    }
  }

  auto shift_iterator(Iter& it) const noexcept -> void {
    unrolled_for<Dimensions>(
      [&](auto dim) { it.shift(dim, space_.padding()); });
  }
};

} // namespace ripple

#endif // RIPPLE_CONTAINER_HOST_BLOCK_HPP

Docs

Access comprehensive developer documentation for Ripple

View Docs

Tutorials

Get tutorials to help with understand all features

View Tutorials

Examples

Find examples to help get started

View Examples