• Docs >
  • Program Listing for File gpu_info.hpp
Shortcuts

Program Listing for File gpu_info.hpp

Return to documentation for file (include/ripple/arch/gpu_info.hpp)

#ifndef RIPPLE_ARCH_GPU_INFO_HPP
#define RIPPLE_ARCH_GPU_INFO_HPP

#include "gpu_utils.hpp"
#include <ripple/storage/storage_traits.hpp>
#include <ripple/utility/range.hpp>
#include <ripple/utility/portability.hpp>
#include <array>
#include <vector>

namespace ripple {

struct GpuInfo {
  // clang-format off
  static constexpr size_t compute_streams  = 1;
  static constexpr size_t transfer_streams = 2;
  static constexpr size_t total_streams    = compute_streams + transfer_streams;
  // clang-format on

  struct Stream {
    GpuStream stream = nullptr;
    bool      set    = false;

    auto create() noexcept -> void {
      if (set) {
        return;
      }
      gpu::create_nonblocking_stream(&stream);
    }

    auto destroy() noexcept -> void {
      if (!set) {
        return;
      }

      gpu::destroy_stream(stream);
      set = false;
    }
  };

  /*==--- [aliases] --------------------------------------------------------==*/

  // clang-format off
  using Index           = uint32_t;
  using PeerContainer   = std::vector<Index>;
  using StreamContainer = std::array<Stream, total_streams>;
  using Id              = uint8_t;
  // clang-format on

  /*==--- [constants] ------------------------------------------------------==*/

  static constexpr Index invalid = 0xFFFFFFFF;

  // clang-format off
  static constexpr size_t padding_size = avoid_false_sharing_size - ((
    sizeof(PeerContainer)   +
    sizeof(Index)           +
    sizeof(uint64_t)        +
    sizeof(uint64_t)        +
    sizeof(uint8_t)         +
    sizeof(uint8_t)         +
    sizeof(bool)            +
    sizeof(StreamContainer)
  ) % avoid_false_sharing_size);
  // clang-format on

  /*==--- [constructor] ----------------------------------------------------==*/

  GpuInfo(Index idx) noexcept : peers{idx}, index{idx} {
    gpu::set_device(index);
    for (auto& stream : streams) {
      stream.create();
    }
  }

  ~GpuInfo() noexcept {
    gpu::set_device(index);
    for (auto& stream : streams) {
      stream.destroy();
    }
  }

  /*==--- [interface] ------------------------------------------------------==*/

  static auto create_for_all_devices() -> std::vector<GpuInfo> {
    Index num_devices = device_count();
    auto  devices     = std::vector<GpuInfo>();

#if defined(ripple_cuda_available)
    cudaDeviceProp device_props;
    int            can_access_peer;
    for (auto dev : range(num_devices)) {
      // Constructor sets the device to the current device.
      gpu::set_device(dev);
      cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
      cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
      auto& info = devices.emplace_back(dev);
      cudaGetDeviceProperties(&device_props, dev);
      info.mem_size = device_props.totalGlobalMem;

      // Determine if peer to peer is supported:
      for (Index i = 0; i < num_devices; ++i) {
        if (i == dev) {
          continue;
        }
        cudaDeviceCanAccessPeer(&can_access_peer, dev, i);
        if (can_access_peer) {
          info.peers.emplace_back(i);
          cudaDeviceEnablePeerAccess(i, 0);
        }
      }
    }
#endif
    return devices;
  }

  static auto device_count() noexcept -> uint32_t {
    int count = 0;
    ripple_if_cuda(cudaGetDeviceCount(&count));
    return static_cast<uint32_t>(count);
  }

  auto is_invalid() const noexcept -> bool {
    return index == invalid;
  }

  auto peer_to_peer_available() const noexcept -> bool {
    return !peers.empty();
  }

  auto peer_to_peer_available(Index other_id) const noexcept -> bool {
    for (auto& peer_id : peers) {
      if (peer_id == other_id) {
        return true;
      }
    }
    return false;
  }

  auto next_compute_stream_id() noexcept -> Id {
    Id id      = compute_id;
    compute_id = (compute_id + 1) % compute_streams;
    return id;
  }

  auto next_transfer_stream_id() noexcept -> Id {
    Id id = transfer_id;

    // Need to change the id in the range [transfer_streams, total_streams):
    transfer_id =
      (transfer_id + 1 - compute_streams) % (total_streams - compute_streams) +
      compute_streams;
    return id;
  }

  auto synchronize_streams() const noexcept -> void {
    if (index == invalid) {
      return;
    }

    gpu::set_device(index);
    for (auto& stream : streams) {
      if (!stream.set) {
        continue;
      }
      gpu::synchronize_stream(stream.stream);
    }
  }

  auto prepare_barrier() noexcept -> void {
    barrier_up = true;
  }

  auto execute_barrier() noexcept -> void {
    if (index == invalid) {
      return;
    }

    gpu::set_device(index);
    gpu::synchronize_device();
    barrier_up = false;
  }

  auto is_barrier_down() const noexcept -> bool {
    return !barrier_up;
  }

  StreamContainer streams     = {};
  PeerContainer   peers       = {};
  Index           index       = invalid;
  uint64_t        mem_size    = 0;
  uint64_t        mem_alloc   = 0;
  Id              compute_id  = 0;
  Id              transfer_id = 0;
  bool            barrier_up  = false;
  uint8_t         pad[padding_size];
};

} // namespace ripple

#endif // RIPPLE_ARCH_GPU_INFO_HPP

Docs

Access comprehensive developer documentation for Ripple

View Docs

Tutorials

Get tutorials to help with understand all features

View Tutorials

Examples

Find examples to help get started

View Examples