Skip to content

Commit

Permalink
support older CUDA versions
Browse files Browse the repository at this point in the history
  • Loading branch information
breyerml committed Dec 3, 2024
1 parent bac9573 commit cadcd4b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
4 changes: 4 additions & 0 deletions include/hws/gpu_nvidia/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,17 @@ namespace hws::detail {
#define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func;
#endif

#if CUDA_VERSION >= 12000

/**
* @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
* @param[in] clocks_event_reasons the bitmask to convert to a string
* @return all event throttle reasons (`[[nodiscard]]`)
*/
[[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons);

#endif

} // namespace hws::detail

#endif // HWS_GPU_NVIDIA_UTILITY_HPP_
4 changes: 4 additions & 0 deletions src/hws/gpu_nvidia/hardware_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(clock_mem) };
}

#if CUDA_VERSION >= 12000
decltype(clock_samples_.throttle_reason_)::value_type::value_type clock_throttle_reason{};
if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ clock_throttle_reason };
clock_samples_.throttle_reason_string_ = decltype(clock_samples_.throttle_reason_string_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) };
}
#endif

nvmlEnableState_t mode{};
nvmlEnableState_t default_mode{};
Expand Down Expand Up @@ -464,12 +466,14 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(value));
}

#if CUDA_VERSION >= 12000
if (clock_samples_.throttle_reason_string_.has_value()) {
decltype(clock_samples_.throttle_reason_)::value_type::value_type value{};
HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value))
clock_samples_.throttle_reason_->push_back(value);
clock_samples_.throttle_reason_string_->push_back(detail::throttle_event_reason_to_string(value));
}
#endif

if (clock_samples_.auto_boosted_clock_.has_value()) {
nvmlEnableState_t mode{};
Expand Down
4 changes: 4 additions & 0 deletions src/hws/gpu_nvidia/utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

namespace hws::detail {

#if CUDA_VERSION >= 12000

std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) {
if (clocks_event_reasons == 0ull) {
return "None";
Expand Down Expand Up @@ -52,4 +54,6 @@ std::string throttle_event_reason_to_string(const unsigned long long clocks_even
}
}

#endif

} // namespace hws::detail

0 comments on commit cadcd4b

Please sign in to comment.