Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into realloc-policy-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi authored Feb 12, 2025
2 parents 128b2c3 + 57dc53c commit fbdf4b8
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 21 deletions.
11 changes: 8 additions & 3 deletions cpp/include/cudf/utilities/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,11 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
* @param idx the index of the element to access
* @return A reference to the idx-th element of the sequence, i.e., `data()[idx]`
*/
constexpr typename base::reference operator[](size_type idx) const { return this->_data[idx]; }
constexpr typename base::reference operator[](typename base::size_type idx) const
{
static_assert(sizeof(idx) >= sizeof(size_t), "index type must not be smaller than size_t");
return this->_data[idx];
}

// not noexcept due to undefined behavior when size = 0
/**
Expand Down Expand Up @@ -402,8 +406,9 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
* @param idx the index of the element to access
* @return A reference to the idx-th element of the sequence, i.e., `data()[idx]`
*/
__device__ constexpr typename base::reference operator[](size_type idx) const
__device__ constexpr typename base::reference operator[](typename base::size_type idx) const
{
static_assert(sizeof(idx) >= sizeof(size_t), "index type must not be smaller than size_t");
return this->_data[idx];
}

Expand Down Expand Up @@ -512,7 +517,7 @@ class base_2dspan {
* @param row the index of the element to access
* @return A reference to the row-th element of the sequence, i.e., `data()[row]`
*/
CUDF_HOST_DEVICE constexpr RowType<T, dynamic_extent> operator[](size_t row) const
CUDF_HOST_DEVICE constexpr RowType<T, dynamic_extent> operator[](std::size_t row) const
{
return _flat.subspan(row * _size.second, _size.second);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/compute_aggregations.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
// for shared memory aggregations
auto const size = cudf::type_dispatcher<cudf::dispatch_storage_type>(request.values.type(),
size_of_functor{});
return static_cast<size_type>(data_buffer_size) >= (size * GROUPBY_CARDINALITY_THRESHOLD);
return data_buffer_size >= (size * GROUPBY_CARDINALITY_THRESHOLD);
});

// Performs naive global memory aggregations when the workload is not compatible with shared
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/groupby/hash/compute_shared_memory_aggs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ __device__ void initialize_shmem_aggregations(cooperative_groups::thread_block c
idx);
}
}
block.sync();
}

__device__ void compute_pre_aggregrations(cudf::size_type col_start,
Expand Down Expand Up @@ -174,7 +173,6 @@ __device__ void compute_final_aggregations(cooperative_groups::thread_block cons
idx);
}
}
block.sync();
}

/* Takes the local_mapping_index and global_mapping_index to compute
Expand Down Expand Up @@ -213,6 +211,7 @@ CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows,
block.sync();

while (col_end < num_cols) {
block.sync();
if (block.thread_rank() == 0) {
calculate_columns_to_aggregate(col_start,
col_end,
Expand All @@ -234,6 +233,7 @@ CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows,
shmem_agg_mask_offsets,
cardinality,
d_agg_kinds);
block.sync();

compute_pre_aggregrations(col_start,
col_end,
Expand Down Expand Up @@ -263,7 +263,7 @@ CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows,
}
} // namespace

std::size_t get_available_shared_memory_size(cudf::size_type grid_size)
size_type get_available_shared_memory_size(cudf::size_type grid_size)
{
auto const active_blocks_per_sm =
cudf::util::div_rounding_up_safe(grid_size, cudf::detail::num_multiprocessors());
Expand All @@ -276,7 +276,7 @@ std::size_t get_available_shared_memory_size(cudf::size_type grid_size)
}

void compute_shared_memory_aggs(cudf::size_type grid_size,
std::size_t available_shmem_size,
size_type available_shmem_size,
cudf::size_type num_input_rows,
bitmask_type const* row_bitmask,
bool skip_rows_with_nulls,
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/groupby/hash/compute_shared_memory_aggs.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,15 +22,15 @@
#include <rmm/cuda_stream_view.hpp>

namespace cudf::groupby::detail::hash {
std::size_t get_available_shared_memory_size(cudf::size_type grid_size);
size_type get_available_shared_memory_size(cudf::size_type grid_size);

std::size_t constexpr compute_shmem_offsets_size(cudf::size_type num_cols)
size_type constexpr compute_shmem_offsets_size(cudf::size_type num_cols)
{
return sizeof(cudf::size_type) * num_cols;
return static_cast<size_type>(sizeof(cudf::size_type) * num_cols);
}

void compute_shared_memory_aggs(cudf::size_type grid_size,
std::size_t available_shmem_size,
cudf::size_type available_shmem_size,
cudf::size_type num_input_rows,
bitmask_type const* row_bitmask,
bool skip_rows_with_nulls,
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/groupby/hash/shared_memory_aggregator.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,11 @@
#include <cuda/std/type_traits>

namespace cudf::groupby::detail::hash {
__device__ constexpr void set_mask(bool* mask)
{
if (not *mask) { cudf::detail::atomic_max(mask, true); }
}

template <typename Source, cudf::aggregation::Kind k, typename Enable = void>
struct update_target_element_shmem {
__device__ void operator()(
Expand Down Expand Up @@ -52,7 +57,7 @@ struct update_target_element_shmem<
cudf::detail::atomic_min(&target_casted[target_index],
static_cast<DeviceTarget>(source.element<DeviceSource>(source_index)));

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand All @@ -74,7 +79,7 @@ struct update_target_element_shmem<
cudf::detail::atomic_max(&target_casted[target_index],
static_cast<DeviceTarget>(source.element<DeviceSource>(source_index)));

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand All @@ -97,7 +102,7 @@ struct update_target_element_shmem<
cudf::detail::atomic_add(&target_casted[target_index],
static_cast<DeviceTarget>(source.element<DeviceSource>(source_index)));

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand All @@ -117,7 +122,7 @@ struct update_target_element_shmem<
auto value = static_cast<Target>(source.element<Source>(source_index));
cudf::detail::atomic_add(&target_casted[target_index], value * value);

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand All @@ -137,7 +142,7 @@ struct update_target_element_shmem<
cudf::detail::atomic_mul(&target_casted[target_index],
static_cast<Target>(source.element<Source>(source_index)));

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand Down Expand Up @@ -202,7 +207,7 @@ struct update_target_element_shmem<
}
}

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand All @@ -228,7 +233,7 @@ struct update_target_element_shmem<
}
}

if (!target_mask[target_index]) { target_mask[target_index] = true; }
set_mask(target_mask + target_index);
}
};

Expand Down

0 comments on commit fbdf4b8

Please sign in to comment.