From f0d9f7c5489b11cc39b26102852e78cf8ffd0ff6 Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Tue, 4 Feb 2025 09:24:03 +0100 Subject: [PATCH] chore(gpu): track noise/degree in scalar bitops --- .../cuda/include/integer/integer.h | 5 +- .../cuda/src/integer/bitwise_ops.cuh | 20 +++-- .../cuda/src/integer/cmux.cuh | 2 +- .../cuda/src/integer/integer.cuh | 15 +++- .../cuda/src/integer/radix_ciphertext.cuh | 29 ++++++- .../cuda/src/integer/scalar_bitops.cu | 15 ++-- .../cuda/src/integer/scalar_bitops.cuh | 81 +++++++++++++----- backends/tfhe-cuda-backend/src/bindings.rs | 6 +- tfhe/src/integer/gpu/ciphertext/info.rs | 83 ------------------- tfhe/src/integer/gpu/mod.rs | 23 +++-- .../gpu/server_key/radix/scalar_bitwise_op.rs | 10 +-- 11 files changed, 140 insertions(+), 149 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h index 018efe1e80..c349a426a2 100644 --- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h +++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h @@ -250,9 +250,10 @@ void cuda_bitop_integer_radix_ciphertext_kb_64( void cuda_scalar_bitop_integer_radix_ciphertext_kb_64( void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count, - void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks, + CudaRadixCiphertextFFI *lwe_array_out, + CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks, - void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op); + void *const *ksks); void cleanup_cuda_integer_bitop(void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count, diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh index 1d7f85fa88..e39e33a8c8 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh @@ -20,15 +20,10 @@ __host__ void host_integer_radix_bitop_kb( void *const *bsks, Torus *const *ksks) { auto lut = mem_ptr->lut; - - integer_radix_apply_bivariate_lookup_table_kb( - streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2, - bsks, ksks, lut, lut->params.message_modulus); - + uint64_t degrees[lwe_array_1->num_radix_blocks]; if (mem_ptr->op == BITOP_TYPE::BITAND) { for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { - lwe_array_out->degrees[i] = - std::min(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); + degrees[i] = std::min(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); } } else if (mem_ptr->op == BITOP_TYPE::BITOR) { for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { @@ -41,7 +36,7 @@ __host__ void host_integer_radix_bitop_kb( result = max | j; } } - lwe_array_out->degrees[i] = result; + degrees[i] = result; } } else if (mem_ptr->op == BITXOR) { for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { @@ -55,9 +50,16 @@ __host__ void host_integer_radix_bitop_kb( result = max ^ j; } } - lwe_array_out->degrees[i] = result; + degrees[i] = result; } } + + integer_radix_apply_bivariate_lookup_table_kb( + streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2, + bsks, ksks, lut, lut->params.message_modulus); + + memcpy(lwe_array_out->degrees, degrees, + lwe_array_out->num_radix_blocks * sizeof(uint64_t)); } template diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh index 6c68e7dccf..2977fdca85 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh @@ -122,7 +122,7 @@ __host__ void host_integer_radix_cmux_kb( integer_radix_apply_univariate_lookup_table_kb( streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks, - mem_ptr->message_extract_lut); + mem_ptr->message_extract_lut, num_radix_blocks); delete mem_true; delete mem_false; } diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh index 1be62a1171..ff01505874 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh @@ -353,12 +353,15 @@ __host__ void pack_bivariate_blocks_with_single_block( check_cuda_error(cudaGetLastError()); } +/// num_radix_blocks corresponds to the number of blocks on which to apply the +/// LUT In scalar bitops we use a number of blocks that may be lower or equal to +/// the input and output numbers of blocks template __host__ void integer_radix_apply_univariate_lookup_table_kb( cudaStream_t const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out, CudaRadixCiphertextFFI const *lwe_array_in, void *const *bsks, - Torus *const *ksks, int_radix_lut *lut) { + Torus *const *ksks, int_radix_lut *lut, uint32_t num_radix_blocks) { // apply_lookup_table auto params = lut->params; auto pbs_type = params.pbs_type; @@ -378,11 +381,15 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb( if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension) PANIC("Cuda error: input and output radix ciphertexts should have the same " "lwe dimension") + if (num_radix_blocks > lwe_array_out->num_radix_blocks || + num_radix_blocks > lwe_array_in->num_radix_blocks) + PANIC("Cuda error: num radix blocks on which lut is applied should be " + "smaller or equal" + " to the number of input and output radix blocks") // In the case of extracting a single LWE this parameters are dummy uint32_t num_many_lut = 1; uint32_t lut_stride = 0; - uint32_t num_radix_blocks = lwe_array_in->num_radix_blocks; /// For multi GPU execution we create vectors of pointers for inputs and /// outputs std::vector lwe_array_in_vec = lut->lwe_array_in_vec; @@ -452,7 +459,7 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb( cuda_memcpy_async_to_cpu(&lut_indexes, lut->get_lut_indexes(0, 0), lut->num_blocks * sizeof(Torus), streams[0], gpu_indexes[0]); - for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { + for (uint i = 0; i < num_radix_blocks; i++) { lwe_array_out->degrees[i] = lut->degrees[lut_indexes[i]]; lwe_array_out->noise_levels[i] = NoiseLevel::NOMINAL; } @@ -1888,7 +1895,7 @@ void host_apply_univariate_lut_kb(cudaStream_t const *streams, integer_radix_apply_univariate_lookup_table_kb( streams, gpu_indexes, gpu_count, radix_lwe_out, radix_lwe_in, bsks, ksks, - mem); + mem, radix_lwe_out->num_radix_blocks); } template diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/radix_ciphertext.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/radix_ciphertext.cuh index 8949c8968f..baa428afc2 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/radix_ciphertext.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/radix_ciphertext.cuh @@ -36,8 +36,8 @@ void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix, if (input_radix->num_radix_blocks < end_lwe_index - start_lwe_index + 1) PANIC("Cuda error: input radix should have more blocks than the specified " "range") - if (start_lwe_index >= end_lwe_index) - PANIC("Cuda error: slice range should be strictly positive") + if (start_lwe_index > end_lwe_index) + PANIC("Cuda error: slice range should be non negative") auto lwe_size = input_radix->lwe_dimension + 1; output_radix->num_radix_blocks = end_lwe_index - start_lwe_index + 1; @@ -80,4 +80,29 @@ void copy_radix_ciphertext_to_larger_output_slice_async( } } +// end_lwe_index is inclusive +template +void set_zero_radix_ciphertext_async(cudaStream_t const stream, + uint32_t const gpu_index, + CudaRadixCiphertextFFI *radix, + const uint32_t start_lwe_index, + const uint32_t end_lwe_index) { + if (radix->num_radix_blocks < end_lwe_index - start_lwe_index + 1) + PANIC("Cuda error: input radix should have more blocks than the specified " + "range") + if (start_lwe_index > end_lwe_index) + PANIC("Cuda error: slice range should be non negative") + + auto lwe_size = radix->lwe_dimension + 1; + auto num_blocks_to_set = end_lwe_index - start_lwe_index + 1; + auto lwe_array_out_block = (Torus *)radix->ptr + start_lwe_index * lwe_size; + cuda_memset_async(lwe_array_out_block, 0, + num_blocks_to_set * lwe_size * sizeof(Torus), stream, + gpu_index); + memset(&radix->degrees[start_lwe_index], 0, + num_blocks_to_set * sizeof(uint64_t)); + memset(&radix->noise_levels[start_lwe_index], 0, + num_blocks_to_set * sizeof(uint64_t)); +} + #endif diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu index 1e992a3f61..e64d98f8b9 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu +++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu @@ -2,15 +2,14 @@ void cuda_scalar_bitop_integer_radix_ciphertext_kb_64( void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count, - void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks, + CudaRadixCiphertextFFI *lwe_array_out, + CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks, - void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op) { + void *const *ksks) { host_integer_radix_scalar_bitop_kb( - (cudaStream_t *)(streams), gpu_indexes, gpu_count, - static_cast(lwe_array_out), - static_cast(lwe_array_input), - static_cast(clear_blocks), num_clear_blocks, - (int_bitop_buffer *)mem_ptr, bsks, (uint64_t **)(ksks), - lwe_ciphertext_count, op); + (cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out, + lwe_array_input, static_cast(clear_blocks), + num_clear_blocks, (int_bitop_buffer *)mem_ptr, bsks, + (uint64_t **)(ksks)); } diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh index 24673f4bde..ec25dc24de 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh @@ -7,45 +7,82 @@ template __host__ void host_integer_radix_scalar_bitop_kb( cudaStream_t const *streams, uint32_t const *gpu_indexes, - uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_input, - Torus const *clear_blocks, uint32_t num_clear_blocks, - int_bitop_buffer *mem_ptr, void *const *bsks, Torus *const *ksks, - uint32_t num_radix_blocks, BITOP_TYPE op) { + uint32_t gpu_count, CudaRadixCiphertextFFI *output, + CudaRadixCiphertextFFI const *input, Torus const *clear_blocks, + uint32_t num_clear_blocks, int_bitop_buffer *mem_ptr, + void *const *bsks, Torus *const *ksks) { + if (output->num_radix_blocks != input->num_radix_blocks) + PANIC("Cuda error: input and output num radix blocks must be equal") + if (output->lwe_dimension != input->lwe_dimension) + PANIC("Cuda error: input and output num radix blocks must be equal") auto lut = mem_ptr->lut; - auto params = lut->params; - auto big_lwe_dimension = params.big_lwe_dimension; - - uint32_t lwe_size = big_lwe_dimension + 1; + auto op = mem_ptr->op; + auto num_radix_blocks = output->num_radix_blocks; if (num_clear_blocks == 0) { if (op == SCALAR_BITAND) { - cuda_memset_async(lwe_array_out, 0, - num_radix_blocks * lwe_size * sizeof(Torus), streams[0], - gpu_indexes[0]); + set_zero_radix_ciphertext_async(streams[0], gpu_indexes[0], output, + 0, num_radix_blocks - 1); } else { - cuda_memcpy_async_gpu_to_gpu(lwe_array_out, lwe_array_input, - num_radix_blocks * lwe_size * sizeof(Torus), - streams[0], gpu_indexes[0]); + if (input != output) + copy_radix_ciphertext_to_larger_output_slice_async( + streams[0], gpu_indexes[0], output, input, 0); } } else { // We have all possible LUTs pre-computed and we use the decomposed scalar // as index to recover the right one + uint64_t degrees[num_clear_blocks]; + uint64_t clear_degrees[num_clear_blocks]; + cuda_memcpy_async_to_cpu(&clear_degrees, clear_blocks, + num_clear_blocks * sizeof(Torus), streams[0], + gpu_indexes[0]); + if (mem_ptr->op == BITOP_TYPE::SCALAR_BITAND) { + for (uint i = 0; i < num_clear_blocks; i++) { + degrees[i] = std::min(clear_degrees[i], input->degrees[i]); + } + } else if (mem_ptr->op == BITOP_TYPE::SCALAR_BITOR) { + for (uint i = 0; i < num_clear_blocks; i++) { + auto max = std::max(clear_degrees[i], input->degrees[i]); + auto min = std::min(clear_degrees[i], input->degrees[i]); + auto result = max; + + for (uint j = 0; j < min + 1; j++) { + if (max | j > result) { + result = max | j; + } + } + degrees[i] = result; + } + } else if (mem_ptr->op == SCALAR_BITXOR) { + for (uint i = 0; i < num_clear_blocks; i++) { + auto max = std::max(clear_degrees[i], input->degrees[i]); + auto min = std::min(clear_degrees[i], input->degrees[i]); + auto result = max; + + // Try every possibility to find the worst case + for (uint j = 0; j < min + 1; j++) { + if (max ^ j > result) { + result = max ^ j; + } + } + degrees[i] = result; + } + } cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks, num_clear_blocks * sizeof(Torus), streams[0], gpu_indexes[0]); lut->broadcast_lut(streams, gpu_indexes, 0); - legacy_integer_radix_apply_univariate_lookup_table_kb( - streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks, - ksks, num_clear_blocks, lut); + integer_radix_apply_univariate_lookup_table_kb( + streams, gpu_indexes, gpu_count, output, input, bsks, ksks, lut, + num_clear_blocks); + memcpy(output->degrees, degrees, num_clear_blocks * sizeof(uint64_t)); if (op == SCALAR_BITAND && num_clear_blocks < num_radix_blocks) { - auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size; - cuda_memset_async(lwe_array_out_block, 0, - (num_radix_blocks - num_clear_blocks) * lwe_size * - sizeof(Torus), - streams[0], gpu_indexes[0]); + set_zero_radix_ciphertext_async(streams[0], gpu_indexes[0], output, + num_clear_blocks, + num_radix_blocks - 1); } } } diff --git a/backends/tfhe-cuda-backend/src/bindings.rs b/backends/tfhe-cuda-backend/src/bindings.rs index 75a0b329af..56500dc023 100644 --- a/backends/tfhe-cuda-backend/src/bindings.rs +++ b/backends/tfhe-cuda-backend/src/bindings.rs @@ -639,15 +639,13 @@ unsafe extern "C" { streams: *const *mut ffi::c_void, gpu_indexes: *const u32, gpu_count: u32, - lwe_array_out: *mut ffi::c_void, - lwe_array_input: *const ffi::c_void, + lwe_array_out: *mut CudaRadixCiphertextFFI, + lwe_array_input: *const CudaRadixCiphertextFFI, clear_blocks: *const ffi::c_void, num_clear_blocks: u32, mem_ptr: *mut i8, bsks: *const *mut ffi::c_void, ksks: *const *mut ffi::c_void, - lwe_ciphertext_count: u32, - op: BITOP_TYPE, ); } unsafe extern "C" { diff --git a/tfhe/src/integer/gpu/ciphertext/info.rs b/tfhe/src/integer/gpu/ciphertext/info.rs index 7fccab4363..7be18d2a5c 100644 --- a/tfhe/src/integer/gpu/ciphertext/info.rs +++ b/tfhe/src/integer/gpu/ciphertext/info.rs @@ -311,89 +311,6 @@ impl CudaRadixCiphertextInfo { .collect(), } } - pub(crate) fn after_scalar_bitand(&self, scalar: T) -> Self - where - T: DecomposableInto, - { - let message_modulus = self.blocks.first().unwrap().message_modulus; - let bits_in_message = message_modulus.0.ilog2(); - let decomposer = BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message) - .iter_as::() - .chain(std::iter::repeat(0u8)); - - Self { - blocks: self - .blocks - .iter() - .zip(decomposer) - .map(|(left, scalar_block)| CudaBlockInfo { - degree: left - .degree - .after_bitand(Degree::new(u64::from(scalar_block))), - message_modulus: left.message_modulus, - carry_modulus: left.carry_modulus, - pbs_order: left.pbs_order, - noise_level: left.noise_level, - }) - .collect(), - } - } - - pub(crate) fn after_scalar_bitor(&self, scalar: T) -> Self - where - T: DecomposableInto, - { - let message_modulus = self.blocks.first().unwrap().message_modulus; - let bits_in_message = message_modulus.0.ilog2(); - let decomposer = BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message) - .iter_as::() - .chain(std::iter::repeat(0u8)); - - Self { - blocks: self - .blocks - .iter() - .zip(decomposer) - .map(|(left, scalar_block)| CudaBlockInfo { - degree: left - .degree - .after_bitor(Degree::new(u64::from(scalar_block))), - message_modulus: left.message_modulus, - carry_modulus: left.carry_modulus, - pbs_order: left.pbs_order, - noise_level: left.noise_level, - }) - .collect(), - } - } - - pub(crate) fn after_scalar_bitxor(&self, scalar: T) -> Self - where - T: DecomposableInto, - { - let message_modulus = self.blocks.first().unwrap().message_modulus; - let bits_in_message = message_modulus.0.ilog2(); - let decomposer = BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message) - .iter_as::() - .chain(std::iter::repeat(0u8)); - - Self { - blocks: self - .blocks - .iter() - .zip(decomposer) - .map(|(left, scalar_block)| CudaBlockInfo { - degree: left - .degree - .after_bitxor(Degree::new(u64::from(scalar_block))), - message_modulus: left.message_modulus, - carry_modulus: left.carry_modulus, - pbs_order: left.pbs_order, - noise_level: left.noise_level, - }) - .collect(), - } - } // eq/ne, and comparisons returns a ciphertext that encrypts a 0 or 1, so the first block // (least significant) has a degree of 1, the other blocks should be trivial lwe encrypting 0, diff --git a/tfhe/src/integer/gpu/mod.rs b/tfhe/src/integer/gpu/mod.rs index 188e41c736..0eea69e1a4 100644 --- a/tfhe/src/integer/gpu/mod.rs +++ b/tfhe/src/integer/gpu/mod.rs @@ -880,7 +880,7 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async< B: Numeric, >( streams: &CudaStreams, - radix_lwe: &mut CudaVec, + radix_lwe: &mut CudaRadixCiphertext, clear_blocks: &CudaVec, bootstrapping_key: &CudaVec, keyswitch_key: &CudaVec, @@ -901,7 +901,7 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async< ) { assert_eq!( streams.gpu_indexes[0], - radix_lwe.gpu_index(0), + radix_lwe.d_blocks.0.d_vec.gpu_index(0), "GPU error: all data should reside on the same GPU." ); assert_eq!( @@ -920,6 +920,18 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async< "GPU error: all data should reside on the same GPU." ); let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + let mut radix_lwe_degrees = radix_lwe.info.blocks.iter().map(|b| b.degree.0).collect(); + let mut radix_lwe_noise_levels = radix_lwe + .info + .blocks + .iter() + .map(|b| b.noise_level.0) + .collect(); + let mut cuda_ffi_radix_lwe = prepare_cuda_radix_ffi( + radix_lwe, + &mut radix_lwe_degrees, + &mut radix_lwe_noise_levels, + ); scratch_cuda_integer_radix_bitop_kb_64( streams.ptr.as_ptr(), streams @@ -955,15 +967,13 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async< .collect::>() .as_ptr(), streams.len() as u32, - radix_lwe.as_mut_c_ptr(0), - radix_lwe.as_mut_c_ptr(0), + &mut cuda_ffi_radix_lwe, + &cuda_ffi_radix_lwe, clear_blocks.as_c_ptr(0), min(clear_blocks.len() as u32, num_blocks), mem_ptr, bootstrapping_key.ptr.as_ptr(), keyswitch_key.ptr.as_ptr(), - num_blocks, - op as u32, ); cleanup_cuda_integer_bitop( streams.ptr.as_ptr(), @@ -976,6 +986,7 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async< streams.len() as u32, std::ptr::addr_of_mut!(mem_ptr), ); + update_noise_degree(radix_lwe, &cuda_ffi_radix_lwe); } #[allow(clippy::too_many_arguments)] diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs index 337abdd928..4260eb9aa6 100644 --- a/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs @@ -37,7 +37,7 @@ impl CudaServerKey { CudaBootstrappingKey::Classic(d_bsk) => { unchecked_scalar_bitop_integer_radix_kb_assign_async( streams, - &mut ct.as_mut().d_blocks.0.d_vec, + ct.as_mut(), &clear_blocks, &d_bsk.d_vec, &self.key_switching_key.d_vec, @@ -64,7 +64,7 @@ impl CudaServerKey { CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { unchecked_scalar_bitop_integer_radix_kb_assign_async( streams, - &mut ct.as_mut().d_blocks.0.d_vec, + ct.as_mut(), &clear_blocks, &d_multibit_bsk.d_vec, &self.key_switching_key.d_vec, @@ -117,7 +117,6 @@ impl CudaServerKey { { unsafe { self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs); } streams.synchronize(); } @@ -143,7 +142,6 @@ impl CudaServerKey { { unsafe { self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs); } streams.synchronize(); } @@ -174,7 +172,6 @@ impl CudaServerKey { { unsafe { self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs); } streams.synchronize(); } @@ -196,7 +193,6 @@ impl CudaServerKey { self.full_propagate_assign_async(ct, streams); } self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs); } pub fn scalar_bitand_assign(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams) @@ -237,7 +233,6 @@ impl CudaServerKey { self.full_propagate_assign_async(ct, streams); } self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs); } pub fn scalar_bitor_assign(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams) @@ -278,7 +273,6 @@ impl CudaServerKey { self.full_propagate_assign_async(ct, streams); } self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams); - ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs); } pub fn scalar_bitxor_assign(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)