RustCrypto · erik-3milabs · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/benches/uint.rs b/benches/uint.rs
@@ -1,7 +1,10 @@
-use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
+use criterion::measurement::WallTime;
+use criterion::{
+    black_box, criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
+};
 use crypto_bigint::{
-    Limb, NonZero, Odd, Random, RandomBits, RandomMod, Reciprocal, Uint, U1024, U128, U2048, U256,
-    U4096, U512,
+    Limb, NonZero, Odd, Random, RandomBits, RandomMod, Reciprocal, Uint, U1024, U128, U16384,
+    U2048, U256, U4096, U512, U8192,
 };
 use rand_chacha::ChaCha8Rng;
 use rand_core::{OsRng, RngCore, SeedableRng};
@@ -370,6 +373,37 @@ fn bench_shl(c: &mut Criterion) {
     group.finish();
 }
 
+fn shr_benchmark<const LIMBS: usize>(group: &mut BenchmarkGroup<WallTime>) {
+    group.bench_function(BenchmarkId::new("overflowing_shr_vartime", LIMBS), |b| {
+        b.iter_batched(
+            || Uint::<LIMBS>::ONE,
+            |x| x.overflowing_shr_vartime(Uint::<LIMBS>::BITS / 2 + 10),
+            BatchSize::SmallInput,
+        )
+    });
+    group.bench_function(BenchmarkId::new("overflowing_shr", LIMBS), |b| {
+        b.iter_batched(
+            || Uint::<LIMBS>::ONE,
+            |x| x.overflowing_shr(Uint::<LIMBS>::BITS / 2 + 10),
+            BatchSize::SmallInput,
+        )
+    });
+    group.bench_function(BenchmarkId::new("split_overflowing_shr", LIMBS), |b| {
+        b.iter_batched(
+            || Uint::<LIMBS>::ONE,
+            |x| x.split_overflowing_shr(Uint::<LIMBS>::BITS / 2 + 10),
+            BatchSize::SmallInput,
+        )
+    });
+    group.bench_function(BenchmarkId::new("fast_split_overflowing_shr", LIMBS), |b| {
+        b.iter_batched(
+            || Uint::<LIMBS>::ONE,
+            |x| x.fast_split_overflowing_shr(Uint::<LIMBS>::BITS / 2 + 10),
+            BatchSize::SmallInput,
+        )
+    });
+}
+
 fn bench_shr(c: &mut Criterion) {
     let mut group = c.benchmark_group("right shift");
 
@@ -405,6 +439,14 @@ fn bench_shr(c: &mut Criterion) {
         )
     });
 
+    shr_benchmark::<{ U256::LIMBS }>(&mut group);
+    shr_benchmark::<{ U512::LIMBS }>(&mut group);
+    shr_benchmark::<{ U1024::LIMBS }>(&mut group);
+    shr_benchmark::<{ U2048::LIMBS }>(&mut group);
+    shr_benchmark::<{ U4096::LIMBS }>(&mut group);
+    shr_benchmark::<{ U8192::LIMBS }>(&mut group);
+    shr_benchmark::<{ U16384::LIMBS }>(&mut group);
+
     group.finish();
 }
 

diff --git a/src/limb/shr.rs b/src/limb/shr.rs
@@ -1,6 +1,6 @@
 //! Limb right bitshift
 
-use crate::{Limb, WrappingShr};
+use crate::{ConstChoice, Limb, WrappingShr};
 use core::ops::{Shr, ShrAssign};
 
 impl Limb {
@@ -16,6 +16,23 @@ impl Limb {
     pub(crate) const fn shr1(self) -> (Self, Self) {
         (Self(self.0 >> 1), Self(self.0 << Self::HI_BIT))
     }
+
+    /// Computes `self >> shift` and returns the result as well as the carry: the `shift` _most_
+    /// significant bits of the `carry` are equal to the `shift` _least_ significant bits of `self`.
+    ///
+    /// Panics if `shift` overflows `Limb::BITS`.
+    #[inline(always)]
+    pub const fn carrying_shr(self, shift: u32) -> (Self, Self) {
+        // Note that we can compute carry = self << (Self::BITS - shift) whenever shift > 0.
+        // However, we need to account for the case that shift = 0:
+        // - the carry should be 0, and
+        // - the value by which carry is left shifted should be made to be < Self::BITS.
+        let shift_is_zero = ConstChoice::from_u32_eq(shift, 0);
+        let carry = Self::select(self, Self::ZERO, shift_is_zero);
+        let left_shift = shift_is_zero.select_u32(Self::BITS - shift, 0);
+
+        (self.shr(shift), carry.shl(left_shift))
+    }
 }
 
 macro_rules! impl_shr {

diff --git a/src/uint.rs b/src/uint.rs
@@ -165,7 +165,7 @@ impl<const LIMBS: usize> Uint<LIMBS> {
     }
 
     /// Borrow the limbs of this [`Uint`] mutably.
-    pub fn as_limbs_mut(&mut self) -> &mut [Limb; LIMBS] {
+    pub const fn as_limbs_mut(&mut self) -> &mut [Limb; LIMBS] {
         &mut self.limbs
     }
 

diff --git a/src/uint/shr.rs b/src/uint/shr.rs
@@ -47,6 +47,112 @@ impl<const LIMBS: usize> Uint<LIMBS> {
         ConstCtOption::new(Uint::select(&result, &Self::ZERO, overflow), overflow.not())
     }
 
+    /// Computes `self >> shift`.
+    ///
+    /// Returns `None` if `shift >= Self::BITS`.
+    pub const fn split_overflowing_shr(&self, shift: u32) -> ConstCtOption<Self> {
+        let (intra_limb_shift, limb_shift) = (shift % Limb::BITS, shift / Limb::BITS);
+        self.intra_limb_carrying_shr_internal(intra_limb_shift)
+            .full_limb_shr(limb_shift)
+    }
+
+    /// Computes `self >> shift`, for `shift < Limb::BITS`.
+    ///
+    /// Returns `None` if `shift >= Limb::BITS`.
+    pub const fn intra_limb_overflowing_shr(&self, shift: u32) -> ConstCtOption<Self> {
+        let overflow = ConstChoice::from_u32_lt(shift, Limb::BITS).not();
+        let result = self.intra_limb_carrying_shr_internal(shift % Limb::BITS);
+        ConstCtOption::new(Uint::select(&result, &Self::ZERO, overflow), overflow.not())
+    }
+
+    /// Computes `self >> shift`, for `shift < Limb::BITS`.
+    ///
+    /// Panics if `shift >= Limb::BITS`.
+    #[inline(always)]
+    const fn intra_limb_carrying_shr_internal(&self, shift: u32) -> Self {
+        debug_assert!(shift < Limb::BITS);
+
+        let (mut result, mut carry) = (*self, Limb::ZERO);
+
+        let limbs = result.as_limbs_mut();
+        let mut i = limbs.len();
+        while i > 0 {
+            i -= 1;
+            let (shifted, new_carry) = limbs[i].carrying_shr(shift);
+            limbs[i] = shifted.bitxor(carry);
+            carry = new_carry;
+        }
+
+        result
+    }
+
+    /// Compute `self >> (Limb::BITS * limb_shift)`, for `limb_shift < Self::LIMBS`.
+    ///
+    /// Returns `None` if `limb_shift >= Self::LIMBS`.
+    #[inline(always)]
+    pub const fn full_limb_shr(&self, limb_shift: u32) -> ConstCtOption<Self> {
+        let shift_bits = u32::BITS - (LIMBS as u32 - 1).leading_zeros();
+        let overflow = ConstChoice::from_u32_lt(limb_shift, LIMBS as u32).not();
+        let limb_shift = limb_shift % LIMBS as u32;
+
+        let mut result = *self;
+        let mut i = 0;
+        while i < shift_bits {
+            let bit = ConstChoice::from_u32_lsb((limb_shift >> i) & 1);
+            result = Uint::select(
+                &result,
+                &result
+                    .overflowing_shr_vartime(Limb::BITS << i)
+                    .expect("shift within range"),
+                bit,
+            );
+            i += 1;
+        }
+
+        ConstCtOption::new(Uint::select(&result, &Self::ZERO, overflow), overflow.not())
+    }
+
+    /// Computes `self >> shift`.
+    ///
+    /// Returns `None` if `shift >= Self::BITS`.
+    pub const fn fast_split_overflowing_shr(&self, shift: u32) -> ConstCtOption<Self> {
+        let (intra_limb_shift, limb_shift) = (shift % Limb::BITS, shift / Limb::BITS);
+        self.intra_limb_carrying_shr_internal(intra_limb_shift)
+            .fast_full_limb_shr(limb_shift)
+    }
+
+    /// Compute `self >> (Limb::BITS * limb_shift)`, for `limb_shift < Self::LIMBS`.
+    ///
+    /// Returns `None` if `limb_shift >= Self::LIMBS`.
+    #[inline(always)]
+    pub const fn fast_full_limb_shr(&self, limb_shift: u32) -> ConstCtOption<Self> {
+        let shift_bits = u32::BITS - (LIMBS as u32 - 1).leading_zeros();
+        let overflow = ConstChoice::from_u32_lt(limb_shift, LIMBS as u32).not();
+        let limb_shift = limb_shift % LIMBS as u32;
+
+        let mut result = *self;
+        let mut i = 0;
+        while i < shift_bits {
+            let bit = ConstChoice::from_u32_lsb((limb_shift >> i) & 1);
+
+            let mut j = 0;
+            let limbs = result.as_limbs_mut();
+            let offset = 1 << i;
+            while j < Self::LIMBS.saturating_sub(offset) {
+                limbs[j] = Limb::select(limbs[j], limbs[j + offset], bit);
+                j += 1;
+            }
+            while j < Self::LIMBS {
+                limbs[j] = Limb::select(limbs[j], Limb::ZERO, bit);
+                j += 1;
+            }
+
+            i += 1;
+        }
+
+        ConstCtOption::new(Uint::select(&result, &Self::ZERO, overflow), overflow.not())
+    }
+
     /// Computes `self >> shift`.
     ///
     /// Returns `None` if `shift >= Self::BITS`.

diff --git a/tests/limb.rs b/tests/limb.rs
@@ -0,0 +1,23 @@
+use crypto_bigint::{Limb, Word};
+use proptest::prelude::*;
+
+prop_compose! {
+    fn limb()(x in any::<Word>()) -> Limb {
+        Limb::from(x)
+    }
+}
+proptest! {
+    #[test]
+    fn carrying_shr_doesnt_panic(limb in limb(), shift in 0..32u32) {
+        limb.carrying_shr(shift);
+    }
+
+    #[test]
+    fn carrying_shr(limb in limb(), shift in 0..32u32) {
+        if shift == 0 {
+            assert_eq!(limb.carrying_shr(shift), (limb, Limb::ZERO));
+        } else {
+            assert_eq!(limb.carrying_shr(shift), (limb.shr(shift), limb.shl(Limb::BITS - shift)));
+        }
+    }
+}