diff --git a/crates/prover/src/core/backend/simd/cm31.rs b/crates/prover/src/core/backend/simd/cm31.rs index 2155e8ff1..6bb21dd1d 100644 --- a/crates/prover/src/core/backend/simd/cm31.rs +++ b/crates/prover/src/core/backend/simd/cm31.rs @@ -5,8 +5,9 @@ use bytemuck::{Pod, Zeroable}; use num_traits::{One, Zero}; use super::m31::{PackedM31, N_LANES}; +use super::PACKED_CM31_BATCH_INVERSE_CHUNK_SIZE; use crate::core::fields::cm31::CM31; -use crate::core::fields::FieldExpOps; +use crate::core::fields::{batch_inverse_chunked, FieldExpOps}; /// SIMD implementation of [`CM31`]. #[derive(Copy, Clone, Debug)] @@ -132,6 +133,10 @@ impl FieldExpOps for PackedCM31 { // 1 / (a + bi) = (a - bi) / (a^2 + b^2). Self([self.a(), -self.b()]) * (self.a().square() + self.b().square()).inverse() } + + fn batch_inverse(column: &[Self]) -> Vec { + batch_inverse_chunked(column, PACKED_CM31_BATCH_INVERSE_CHUNK_SIZE) + } } impl Add for PackedCM31 { diff --git a/crates/prover/src/core/backend/simd/m31.rs b/crates/prover/src/core/backend/simd/m31.rs index 3d10be8c0..1ebcd408f 100644 --- a/crates/prover/src/core/backend/simd/m31.rs +++ b/crates/prover/src/core/backend/simd/m31.rs @@ -10,9 +10,10 @@ use num_traits::{One, Zero}; use rand::distributions::{Distribution, Standard}; use super::qm31::PackedQM31; +use super::PACKED_M31_BATCH_INVERSE_CHUNK_SIZE; use crate::core::fields::m31::{pow2147483645, BaseField, M31, P}; use crate::core::fields::qm31::QM31; -use crate::core::fields::FieldExpOps; +use crate::core::fields::{batch_inverse_chunked, FieldExpOps}; pub const LOG_N_LANES: u32 = 4; @@ -251,6 +252,10 @@ impl FieldExpOps for PackedM31 { assert!(!self.is_zero(), "0 has no inverse"); pow2147483645(*self) } + + fn batch_inverse(column: &[Self]) -> Vec { + batch_inverse_chunked(column, PACKED_M31_BATCH_INVERSE_CHUNK_SIZE) + } } unsafe impl Pod for PackedM31 {} diff --git a/crates/prover/src/core/backend/simd/mod.rs b/crates/prover/src/core/backend/simd/mod.rs index 0576ea726..2a0df61af 100644 --- a/crates/prover/src/core/backend/simd/mod.rs +++ b/crates/prover/src/core/backend/simd/mod.rs @@ -33,3 +33,8 @@ impl Backend for SimdBackend {} impl BackendForChannel for SimdBackend {} #[cfg(not(target_arch = "wasm32"))] impl BackendForChannel for SimdBackend {} + +// Optimal chunk sizes were determined empirically on an intel 155u machine. +pub(super) const PACKED_M31_BATCH_INVERSE_CHUNK_SIZE: usize = 1 << 9; +pub(super) const PACKED_CM31_BATCH_INVERSE_CHUNK_SIZE: usize = 1 << 10; +pub(super) const PACKED_QM31_BATCH_INVERSE_CHUNK_SIZE: usize = 1 << 11; diff --git a/crates/prover/src/core/backend/simd/qm31.rs b/crates/prover/src/core/backend/simd/qm31.rs index ce7231d0a..9dde0a92b 100644 --- a/crates/prover/src/core/backend/simd/qm31.rs +++ b/crates/prover/src/core/backend/simd/qm31.rs @@ -8,9 +8,10 @@ use rand::distributions::{Distribution, Standard}; use super::cm31::PackedCM31; use super::m31::{PackedM31, N_LANES}; +use super::PACKED_QM31_BATCH_INVERSE_CHUNK_SIZE; use crate::core::fields::m31::M31; use crate::core::fields::qm31::QM31; -use crate::core::fields::FieldExpOps; +use crate::core::fields::{batch_inverse_chunked, FieldExpOps}; pub type PackedSecureField = PackedQM31; @@ -171,6 +172,10 @@ impl FieldExpOps for PackedQM31 { let denom_inverse = denom.inverse(); Self([self.a() * denom_inverse, -self.b() * denom_inverse]) } + + fn batch_inverse(column: &[Self]) -> Vec { + batch_inverse_chunked(column, PACKED_QM31_BATCH_INVERSE_CHUNK_SIZE) + } } impl Add for PackedQM31 {