Improve handling of constant bits in scalar mul for SW curves (#43)

* We add a double_and_add method that computes 2 * self + other more efficiently than just doubling + addition; this is not used anywhere yet, but I am planning on fiddling with it to see if we can leverage it somehow. (See zcash/zcash#3924 for details) * We handle constant scalars better: * We skip the most-significant constant zeroes to avoid unnecessary doubling * When intermediate bits of the scalar are constants, instead of conditionally adding, we directly use the value of the bit to decide whether to add or not. Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com> Co-authored-by: weikeng <w.k@berkeley.edu>
2026-02-28 12:46:41 +01:00 · 2021-01-22 14:55:55 -08:00
parent 5e4114b19c
commit d9e0200433
3 changed files with 67 additions and 12 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 ## Pending

 ### Breaking changes
- #12 Make the `ToBitsGadget` impl for `FpVar` output fixed-size
+- #12 Make the output of the `ToBitsGadget` impl for `FpVar` fixed-size

 ### Features

@@ -17,7 +17,7 @@
 - #33 Speedup scalar multiplication by a constant
 - #35 Construct a `FpVar` from bits
 - #36 Implement `ToConstraintFieldGadget` for `Vec<Uint8>`
- #40 Faster scalar multiplication for Short Weierstrass curves by relying on affine formulae
+- #40, #43 Faster scalar multiplication for Short Weierstrass curves by relying on affine formulae

 ### Bug fixes
 - #8 Fix bug in `three_bit_cond_neg_lookup` when using a constant lookup bit
--- a/src/groups/curves/short_weierstrass/mod.rs
+++ b/src/groups/curves/short_weierstrass/mod.rs
@@ -2,7 +2,7 @@ use ark_ec::{
    short_weierstrass_jacobian::{GroupAffine as SWAffine, GroupProjective as SWProjective},
    AffineCurve, ProjectiveCurve, SWModelParameters,
 };
-use ark_ff::{BigInteger, BitIteratorBE, Field, FpParameters, One, PrimeField, Zero};
+use ark_ff::{BigInteger, BitIteratorBE, Field, One, PrimeField, Zero};
 use ark_relations::r1cs::{ConstraintSystemRef, Namespace, SynthesisError};
 use core::{borrow::Borrow, marker::PhantomData};
 use non_zero_affine::NonZeroAffineVar;
@@ -276,7 +276,7 @@ where
        multiple_of_power_of_two: &mut NonZeroAffineVar<P, F>,
        bits: &[&Boolean<<P::BaseField as Field>::BasePrimeField>],
    ) -> Result<(), SynthesisError> {
-        let scalar_modulus_bits = <<P::ScalarField as PrimeField>::Params>::MODULUS_BITS as usize;
+        let scalar_modulus_bits = <P::ScalarField as PrimeField>::size_in_bits();

        assert!(scalar_modulus_bits >= bits.len());
        let split_len = ark_std::cmp::min(scalar_modulus_bits - 2, bits.len());
@@ -317,8 +317,14 @@ where

        // As mentioned, we will skip the LSB, and will later handle it via a conditional subtraction.
        for bit in affine_bits.iter().skip(1) {
-            let temp = accumulator.add_unchecked(&multiple_of_power_of_two)?;
-            accumulator = bit.select(&temp, &accumulator)?;
+            if bit.is_constant() {
+                if *bit == &Boolean::TRUE {
+                    accumulator = accumulator.add_unchecked(&multiple_of_power_of_two)?;
+                }
+            } else {
+                let temp = accumulator.add_unchecked(&multiple_of_power_of_two)?;
+                accumulator = bit.select(&temp, &accumulator)?;
+            }
            multiple_of_power_of_two.double_in_place()?;
        }
        // Perform conditional subtraction:
@@ -332,8 +338,14 @@ where

        // Now, let's finish off the rest of the bits using our complete formulae
        for bit in proj_bits {
-            let temp = &*mul_result + &multiple_of_power_of_two.into_projective();
-            *mul_result = bit.select(&temp, &mul_result)?;
+            if bit.is_constant() {
+                if *bit == &Boolean::TRUE {
+                    *mul_result += &multiple_of_power_of_two.into_projective();
+                }
+            } else {
+                let temp = &*mul_result + &multiple_of_power_of_two.into_projective();
+                *mul_result = bit.select(&temp, &mul_result)?;
+            }
            multiple_of_power_of_two.double_in_place()?;
        }
        Ok(())
@@ -485,11 +497,23 @@ where
        // will conditionally select zero if `self` was zero.
        let non_zero_self = NonZeroAffineVar::new(x, y);

-        let bits = bits.collect::<Vec<_>>();
+        let mut bits = bits.collect::<Vec<_>>();
        if bits.len() == 0 {
            return Ok(Self::zero());
        }
-        let scalar_modulus_bits = <<P::ScalarField as PrimeField>::Params>::MODULUS_BITS as usize;
+        // Remove unnecessary constant zeros in the most-significant positions.
+        bits = bits
+            .into_iter()
+            // We iterate from the MSB down.
+            .rev()
+            // Skip leading zeros, if they are constants.
+            .skip_while(|b| b.is_constant() && (b.value().unwrap() == false))
+            .collect();
+        // After collecting we are in big-endian form; we have to reverse to get back to
+        // little-endian.
+        bits.reverse();
+
+        let scalar_modulus_bits = <P::ScalarField as PrimeField>::size_in_bits();
        let mut mul_result = Self::zero();
        let mut power_of_two_times_self = non_zero_self;
        // We chunk up `bits` into `p`-sized chunks.
@@ -497,7 +521,7 @@ where
            self.fixed_scalar_mul_le(&mut mul_result, &mut power_of_two_times_self, bits)?;
        }

-        // The foregoing algorithms rely on mixed/incomplete addition, and so do not
+        // The foregoing algorithm relies on incomplete addition, and so does not
        // work when the input (`self`) is zero. We hence have to perform
        // a check to ensure that if the input is zero, then so is the output.
        // The cost of this check should be less than the benefit of using
--- a/src/groups/curves/short_weierstrass/non_zero_affine.rs
+++ b/src/groups/curves/short_weierstrass/non_zero_affine.rs
@@ -74,7 +74,7 @@ where
            let (x1, y1) = (&self.x, &self.y);
            let x1_sqr = x1.square()?;
            // Then,
-            // tangent lambda := (3 * x1^2 + a) / y1·;
+            // tangent lambda := (3 * x1^2 + a) / (2 * y1);
            // x3 = lambda^2 - 2x1
            // y3 = lambda * (x1 - x3) - y1
            let numerator = x1_sqr.double()? + &x1_sqr + P::COEFF_A;
@@ -86,6 +86,37 @@ where
        }
    }

+    /// Computes `(self + other) + self`. This method requires only 5 constraints,
+    /// less than the 7 required when computing via `self.double() + other`.
+    ///
+    /// This follows the formulae from [\[ELM03\]](https://arxiv.org/abs/math/0208038).
+    #[tracing::instrument(target = "r1cs", skip(self))]
+    pub(crate) fn double_and_add(&self, other: &Self) -> Result<Self, SynthesisError> {
+        if [self].is_constant() || other.is_constant() {
+            self.double()?.add_unchecked(other)
+        } else {
+            let (x1, y1) = (&self.x, &self.y);
+            let (x2, y2) = (&other.x, &other.y);
+
+            // Calculate self + other:
+            // slope lambda := (y2 - y1)/(x2 - x1);
+            // x3 = lambda^2 - x1 - x2;
+            // y3 = lambda * (x1 - x3) - y1
+            let numerator = y2 - y1;
+            let denominator = x2 - x1;
+            let lambda_1 = numerator.mul_by_inverse(&denominator)?;
+
+            let x3 = lambda_1.square()? - x1 - x2;
+
+            // Calculate final addition slope:
+            let lambda_2 = (lambda_1 + y1.double()?.mul_by_inverse(&(&x3 - x1))?).negate()?;
+
+            let x4 = lambda_2.square()? - x1 - x3;
+            let y4 = lambda_2 * &(x1 - &x4) - y1;
+            Ok(Self::new(x4, y4))
+        }
+    }
+
    /// Doubles `self` in place.
    #[tracing::instrument(target = "r1cs", skip(self))]
    pub(crate) fn double_in_place(&mut self) -> Result<(), SynthesisError> {