Better scalar multiplication for Short Weierstrass curves (#40)

Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com> Co-authored-by: Pratyush Mishra <pratyushmishra@berkeley.edu>
3 years ago · 5e4114b19c
5 changed files with 294 additions and 107 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,26 +1,27 @@
 ## Pending

 ### Breaking changes
 - #12 Make FpVar's ToBitsGadget output constant length bit vectors.
 - #12 Make the `ToBitsGadget` impl for `FpVar` output fixed-size

 ### Features


 ### Improvements
 - #5 Speedup BLS-12 pairing
 - #13 Add ToConstraintFieldGadget to ProjectiveVar
 - #15, #16 Allow cs to be none when converting a montgomery point into a twisted edwards point
 - #20 Add conditional select gadget to Uints
 - #21 Add Uint128
 - #13 Add `ToConstraintFieldGadget` to `ProjectiveVar`
 - #15, #16 Allow `cs` to be `None` when converting a Montgomery point into a Twisted Edwards point
 - #20 Add `CondSelectGadget` impl for `UInt`s
 - #21 Add `UInt128`
 - #22 Reduce density of `three_bit_cond_neg_lookup`
 - #23 Reduce allocations in Uints
 - #23 Reduce allocations in `UInt`s
 - #33 Speedup scalar multiplication by a constant
 - #35 Construct a FpVar from bits
 - #36 ImplementToConstraintFieldGadget for `Vec<Uint8>`
 - #35 Construct a `FpVar` from bits
 - #36 Implement `ToConstraintFieldGadget` for `Vec<Uint8>`
 - #40 Faster scalar multiplication for Short Weierstrass curves by relying on affine formulae

 ### Bug fixes
 - #8 Fix bug in three_bit_cond_neg_lookup when using a constant lookup bit
 - #9 Fix bug in short weierstrass projective curve point's to_affine method
 - #8 Fix bug in `three_bit_cond_neg_lookup` when using a constant lookup bit
 - #9 Fix bug in `short_weierstrass::ProjectiveVar::to_affine` 
 - #29 Fix `to_non_unique_bytes` for `BLS12::G1Prepared`
 - #34 Fix `mul_by_inverse` for constants
 - #42 Fix regression in `mul_by_inverse` constraint count
--- a/src/groups/curves/short_weierstrass/mod.rs
+++ b/src/groups/curves/short_weierstrass/mod.rs
@ -2,9 +2,10 @@ use ark_ec::{
    short_weierstrass_jacobian::{GroupAffine as SWAffine, GroupProjective as SWProjective},
    AffineCurve, ProjectiveCurve, SWModelParameters,
 };
 use ark_ff::{BigInteger, BitIteratorBE, Field, One, PrimeField, Zero};
 use ark_ff::{BigInteger, BitIteratorBE, Field, FpParameters, One, PrimeField, Zero};
 use ark_relations::r1cs::{ConstraintSystemRef, Namespace, SynthesisError};
 use core::{borrow::Borrow, marker::PhantomData};
 use non_zero_affine::NonZeroAffineVar;

 use crate::{fields::fp::FpVar, prelude::*, ToConstraintFieldGadget, Vec};

@ -21,6 +22,7 @@ pub mod mnt4;
 ///  family of bilinear groups.
 pub mod mnt6;

 mod non_zero_affine;
 /// An implementation of arithmetic for Short Weierstrass curves that relies on
 /// the complete formulae derived in the paper of
 /// [[Renes, Costello, Batina 2015]](https://eprint.iacr.org/2015/1060).
@ -223,8 +225,8 @@ where
    }

    /// Mixed addition, which is useful when `other = (x2, y2)` is known to have z = 1.
    #[tracing::instrument(target = "r1cs", skip(self, x2, y2))]
    pub(crate) fn add_mixed(&self, (x2, y2): (&F, &F)) -> Result<Self, SynthesisError> {
    #[tracing::instrument(target = "r1cs", skip(self, other))]
    pub(crate) fn add_mixed(&self, other: &NonZeroAffineVar<P, F>) -> Result<Self, SynthesisError> {
        // Complete mixed addition formula from Renes-Costello-Batina 2015
        // Algorithm 2
        // (https://eprint.iacr.org/2015/1060).
@ -235,6 +237,7 @@ where
        // https://github.com/RustCrypto/elliptic-curves/blob/master/p256/src/arithmetic/projective.rs
        let three_b = P::COEFF_B.double() + &P::COEFF_B;
        let (x1, y1, z1) = (&self.x, &self.y, &self.z);
        let (x2, y2) = (&other.x, &other.y);

        let xx = x1 * x2; // 1
        let yy = y1 * y2; // 2
@ -261,6 +264,80 @@ where

        Ok(ProjectiveVar::new(x, y, z))
    }

    /// Computes a scalar multiplication with a little-endian scalar of size `P::ScalarField::MODULUS_BITS`.
    #[tracing::instrument(
        target = "r1cs",
        skip(self, mul_result, multiple_of_power_of_two, bits)
    )]
    fn fixed_scalar_mul_le(
        &self,
        mul_result: &mut Self,
        multiple_of_power_of_two: &mut NonZeroAffineVar<P, F>,
        bits: &[&Boolean<<P::BaseField as Field>::BasePrimeField>],
    ) -> Result<(), SynthesisError> {
        let scalar_modulus_bits = <<P::ScalarField as PrimeField>::Params>::MODULUS_BITS as usize;

        assert!(scalar_modulus_bits >= bits.len());
        let split_len = ark_std::cmp::min(scalar_modulus_bits - 2, bits.len());
        let (affine_bits, proj_bits) = bits.split_at(split_len);
        // Computes the standard little-endian double-and-add algorithm
        // (Algorithm 3.26, Guide to Elliptic Curve Cryptography)
        //
        // We rely on *incomplete* affine formulae for partially computing this.
        // However, we avoid exceptional edge cases because we partition the scalar
        // into two chunks: one guaranteed to be less than p - 2, and the rest.
        // We only use incomplete formulae for the first chunk, which means we avoid exceptions:
        //
        // `add_unchecked(a, b)` is incomplete when either `b.is_zero()`, or when
        // `b = ±a`. During scalar multiplication, we don't hit either case:
        // * `b = ±a`: `b = accumulator = k * a`, where `2 <= k < p - 1`.
        //   This implies that `k != p ± 1`, and so `b != (p ± 1) * a`.
        //   Because the group is finite, this in turn means that `b != ±a`, as required.
        // * `a` or `b` is zero: for `a`, we handle the zero case after the loop; for `b`, notice
        //    that it is monotonically increasing, and furthermore, equals `k * a`, where
        //    `k != p = 0 mod p`.

        // Unlike normal double-and-add, here we start off with a non-zero `accumulator`,
        // because `NonZeroAffineVar::add_unchecked` doesn't support addition with `zero`.
        // In more detail, we initialize `accumulator` to be the initial value of
        // `multiple_of_power_of_two`. This ensures that all unchecked additions of `accumulator`
        // with later values of `multiple_of_power_of_two` are safe.
        // However, to do this correctly, we need to perform two steps:
        // * We must skip the LSB, and instead proceed assuming that it was 1. Later, we will
        //   conditionally subtract the initial value of `accumulator`:
        //     if LSB == 0: subtract initial_acc_value; else, subtract 0.
        // * Because we are assuming the first bit, we must double `multiple_of_power_of_two`.

        let mut accumulator = multiple_of_power_of_two.clone();
        let initial_acc_value = accumulator.into_projective();

        // The powers start at 2 (instead of 1) because we're skipping the first bit.
        multiple_of_power_of_two.double_in_place()?;

        // As mentioned, we will skip the LSB, and will later handle it via a conditional subtraction.
        for bit in affine_bits.iter().skip(1) {
            let temp = accumulator.add_unchecked(&multiple_of_power_of_two)?;
            accumulator = bit.select(&temp, &accumulator)?;
            multiple_of_power_of_two.double_in_place()?;
        }
        // Perform conditional subtraction:

        // We can convert to projective safely because the result is guaranteed to be non-zero
        // by the condition on `affine_bits.len()`, and by the fact that `accumulator` is non-zero
        let result = accumulator.into_projective();
        // If bits[0] is 0, then we have to subtract `self`; else, we subtract zero.
        let subtrahend = bits[0].select(&Self::zero(), &initial_acc_value)?;
        *mul_result += result - subtrahend;

        // Now, let's finish off the rest of the bits using our complete formulae
        for bit in proj_bits {
            let temp = &*mul_result + &multiple_of_power_of_two.into_projective();
            *mul_result = bit.select(&temp, &mul_result)?;
            multiple_of_power_of_two.double_in_place()?;
        }
        Ok(())
    }
 }

 impl<P, F> CurveVar<SWProjective<P>, <P::BaseField as Field>::BasePrimeField>
@ -398,47 +475,52 @@ where
        bits: impl Iterator<Item = &'a Boolean<<P::BaseField as Field>::BasePrimeField>>,
    ) -> Result<Self, SynthesisError> {
        if self.is_constant() {
            // Compute 2^i * self for i in 0..bits.len()
            // (these will be used by`precomputed_base_scalar_mul_le`, to perform
            // a conditional addition.).
            //
            // TODO: if `bits.len()` is small n, it might be cheaper to
            // conditionally select between 2^n options.
            let mut value = self.value()?;
            let bits_and_multiples = bits
                .map(|b| {
                    let multiple = value;
                    value.double_in_place();
                    (b, multiple)
                })
                .collect::<ark_std::vec::Vec<_>>();
            let mut result = self.clone();
            result.precomputed_base_scalar_mul_le(
                bits_and_multiples.iter().map(|&(ref b, ref c)| (*b, c)),
            )?;
            Ok(result)
        } else {
            // Computes the standard big-endian double-and-add algorithm
            // (Algorithm 3.27, Guide to Elliptic Curve Cryptography)
            // the input is little-endian, so we need to reverse it to get it in
            // big-endian.
            let mut bits = bits.collect::<Vec<_>>();
            bits.reverse();

            let mut res = Self::zero();
            let self_affine = self.to_affine()?;
            for bit in bits {
                res.double_in_place()?;
                let tmp = res.add_mixed((&self_affine.x, &self_affine.y))?;
                res = bit.select(&tmp, &res)?;
            if self.value().unwrap().is_zero() {
                return Ok(self.clone());
            }
            // The foregoing algorithm relies on mixed addition, and so does not
            // work when the input (`self`) is zero. We hence have to perform
            // a check to ensure that if the input is zero, then so is the output.
            // The cost of this check should be less than the benefit of using
            // mixed addition in almost all cases.
            self_affine.infinity.select(&Self::zero(), &res)
        }
        let self_affine = self.to_affine()?;
        let (x, y, infinity) = (self_affine.x, self_affine.y, self_affine.infinity);
        // We first handle the non-zero case, and then later
        // will conditionally select zero if `self` was zero.
        let non_zero_self = NonZeroAffineVar::new(x, y);

        let bits = bits.collect::<Vec<_>>();
        if bits.len() == 0 {
            return Ok(Self::zero());
        }
        let scalar_modulus_bits = <<P::ScalarField as PrimeField>::Params>::MODULUS_BITS as usize;
        let mut mul_result = Self::zero();
        let mut power_of_two_times_self = non_zero_self;
        // We chunk up `bits` into `p`-sized chunks.
        for bits in bits.chunks(scalar_modulus_bits) {
            self.fixed_scalar_mul_le(&mut mul_result, &mut power_of_two_times_self, bits)?;
        }

        // The foregoing algorithms rely on mixed/incomplete addition, and so do not
        // work when the input (`self`) is zero. We hence have to perform
        // a check to ensure that if the input is zero, then so is the output.
        // The cost of this check should be less than the benefit of using
        // mixed addition in almost all cases.
        infinity.select(&Self::zero(), &mul_result)
    }

    #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_bases))]
    fn precomputed_base_scalar_mul_le<'a, I, B>(
        &mut self,
        scalar_bits_with_bases: I,
    ) -> Result<(), SynthesisError>
    where
        I: Iterator<Item = (B, &'a SWProjective<P>)>,
        B: Borrow<Boolean<<P::BaseField as Field>::BasePrimeField>>,
    {
        // We just ignore the provided bases and use the faster scalar multiplication.
        let (bits, bases): (Vec<_>, Vec<_>) = scalar_bits_with_bases
            .map(|(b, c)| (b.borrow().clone(), *c))
            .unzip();
        let base = bases[0];
        *self = Self::constant(base).scalar_mul_le(bits.iter())?;
        Ok(())
    }
 }

@ -500,7 +582,7 @@ impl_bounded_ops!(
                // We'll use mixed addition to add non-zero constants.
                let x = F::constant(other.x);
                let y = F::constant(other.y);
                this.add_mixed((&x, &y)).unwrap()
                this.add_mixed(&NonZeroAffineVar::new(x, y)).unwrap()
            }
        } else {
            // Complete addition formula from Renes-Costello-Batina 2015
--- a/src/groups/curves/short_weierstrass/non_zero_affine.rs
+++ b/src/groups/curves/short_weierstrass/non_zero_affine.rs
@ -0,0 +1,132 @@
 use super::*;
 /// An affine representation of a prime order curve point that is guaranteed
 /// to *not* be the point at infinity.
 #[derive(Derivative)]
 #[derivative(Debug, Clone)]
 #[must_use]
 pub struct NonZeroAffineVar<
    P: SWModelParameters,
    F: FieldVar<P::BaseField, <P::BaseField as Field>::BasePrimeField>,
 > where
    for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>,
 {
    /// The x-coordinate.
    pub x: F,
    /// The y-coordinate.
    pub y: F,
    #[derivative(Debug = "ignore")]
    _params: PhantomData<P>,
 }

 impl<P, F> NonZeroAffineVar<P, F>
 where
    P: SWModelParameters,
    F: FieldVar<P::BaseField, <P::BaseField as Field>::BasePrimeField>,
    for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>,
 {
    pub(crate) fn new(x: F, y: F) -> Self {
        Self {
            x,
            y,
            _params: PhantomData,
        }
    }

    /// Converts self into a non-zero projective point.
    #[tracing::instrument(target = "r1cs", skip(self))]
    pub(crate) fn into_projective(&self) -> ProjectiveVar<P, F> {
        ProjectiveVar::new(self.x.clone(), self.y.clone(), F::one())
    }

    /// Performs an addition without checking that other != ±self.
    #[tracing::instrument(target = "r1cs", skip(self, other))]
    pub(crate) fn add_unchecked(&self, other: &Self) -> Result<Self, SynthesisError> {
        if [self, other].is_constant() {
            let result =
                (self.value()?.into_projective() + other.value()?.into_projective()).into_affine();
            Ok(Self::new(F::constant(result.x), F::constant(result.y)))
        } else {
            let (x1, y1) = (&self.x, &self.y);
            let (x2, y2) = (&other.x, &other.y);
            // Then,
            // slope lambda := (y2 - y1)/(x2 - x1);
            // x3 = lambda^2 - x1 - x2;
            // y3 = lambda * (x1 - x3) - y1
            let numerator = y2 - y1;
            let denominator = x2 - x1;
            let lambda = numerator.mul_by_inverse(&denominator)?;
            let x3 = lambda.square()? - x1 - x2;
            let y3 = lambda * &(x1 - &x3) - y1;
            Ok(Self::new(x3, y3))
        }
    }

    /// Doubles `self`. As this is a prime order curve point,
    /// the output is guaranteed to not be the point at infinity.
    #[tracing::instrument(target = "r1cs", skip(self))]
    pub(crate) fn double(&self) -> Result<Self, SynthesisError> {
        if [self].is_constant() {
            let result = self.value()?.into_projective().double().into_affine();
            // Panic if the result is zero.
            assert!(!result.is_zero());
            Ok(Self::new(F::constant(result.x), F::constant(result.y)))
        } else {
            let (x1, y1) = (&self.x, &self.y);
            let x1_sqr = x1.square()?;
            // Then,
            // tangent lambda := (3 * x1^2 + a) / y1·;
            // x3 = lambda^2 - 2x1
            // y3 = lambda * (x1 - x3) - y1
            let numerator = x1_sqr.double()? + &x1_sqr + P::COEFF_A;
            let denominator = y1.double()?;
            let lambda = numerator.mul_by_inverse(&denominator)?;
            let x3 = lambda.square()? - x1.double()?;
            let y3 = lambda * &(x1 - &x3) - y1;
            Ok(Self::new(x3, y3))
        }
    }

    /// Doubles `self` in place.
    #[tracing::instrument(target = "r1cs", skip(self))]
    pub(crate) fn double_in_place(&mut self) -> Result<(), SynthesisError> {
        *self = self.double()?;
        Ok(())
    }
 }

 impl<P, F> R1CSVar<<P::BaseField as Field>::BasePrimeField> for NonZeroAffineVar<P, F>
 where
    P: SWModelParameters,
    F: FieldVar<P::BaseField, <P::BaseField as Field>::BasePrimeField>,
    for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>,
 {
    type Value = SWAffine<P>;

    fn cs(&self) -> ConstraintSystemRef<<P::BaseField as Field>::BasePrimeField> {
        self.x.cs().or(self.y.cs())
    }

    fn value(&self) -> Result<SWAffine<P>, SynthesisError> {
        Ok(SWAffine::new(self.x.value()?, self.y.value()?, false))
    }
 }

 impl<P, F> CondSelectGadget<<P::BaseField as Field>::BasePrimeField> for NonZeroAffineVar<P, F>
 where
    P: SWModelParameters,
    F: FieldVar<P::BaseField, <P::BaseField as Field>::BasePrimeField>,
    for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>,
 {
    #[inline]
    #[tracing::instrument(target = "r1cs")]
    fn conditionally_select(
        cond: &Boolean<<P::BaseField as Field>::BasePrimeField>,
        true_value: &Self,
        false_value: &Self,
    ) -> Result<Self, SynthesisError> {
        let x = cond.select(&true_value.x, &false_value.x)?;
        let y = cond.select(&true_value.y, &false_value.y)?;

        Ok(Self::new(x, y))
    }
 }
--- a/src/groups/curves/twisted_edwards/mod.rs
+++ b/src/groups/curves/twisted_edwards/mod.rs
@ -523,43 +523,34 @@ where
        Ok(Self::new(self.x.negate()?, self.y.clone()))
    }

    #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_base_powers))]
    #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_base_multiples))]
    fn precomputed_base_scalar_mul_le<'a, I, B>(
        &mut self,
        scalar_bits_with_base_powers: I,
        scalar_bits_with_base_multiples: I,
    ) -> Result<(), SynthesisError>
    where
        I: Iterator<Item = (B, &'a TEProjective<P>)>,
        B: Borrow<Boolean<<P::BaseField as Field>::BasePrimeField>>,
    {
        let scalar_bits_with_base_powers = scalar_bits_with_base_powers
        let (bits, multiples): (Vec<_>, Vec<_>) = scalar_bits_with_base_multiples
            .map(|(bit, base)| (bit.borrow().clone(), *base))
            .collect::<Vec<(_, TEProjective<P>)>>();
        let zero = TEProjective::zero();
        for bits_base_powers in scalar_bits_with_base_powers.chunks(2) {
            if bits_base_powers.len() == 2 {
                let bits = [bits_base_powers[0].0.clone(), bits_base_powers[1].0.clone()];
                let base_powers = [&bits_base_powers[0].1, &bits_base_powers[1].1];

                let mut table = [
                    zero,
                    *base_powers[0],
                    *base_powers[1],
                    *base_powers[0] + base_powers[1],
                ];
            .unzip();
        let zero: TEAffine<P> = TEProjective::zero().into_affine();
        for (bits, multiples) in bits.chunks(2).zip(multiples.chunks(2)) {
            if bits.len() == 2 {
                let mut table = [multiples[0], multiples[1], multiples[0] + multiples[1]];

                TEProjective::batch_normalization(&mut table);
                let x_s = [table[0].x, table[1].x, table[2].x, table[3].x];
                let y_s = [table[0].y, table[1].y, table[2].y, table[3].y];
                let x_s = [zero.x, table[0].x, table[1].x, table[2].x];
                let y_s = [zero.y, table[0].y, table[1].y, table[2].y];

                let x = F::two_bit_lookup(&bits, &x_s)?;
                let y = F::two_bit_lookup(&bits, &y_s)?;
                *self += Self::new(x, y);
            } else if bits_base_powers.len() == 1 {
                let bit = bits_base_powers[0].0.clone();
                let base_power = bits_base_powers[0].1;
                let new_encoded = &*self + base_power;
                *self = bit.select(&new_encoded, &self)?;
            } else if bits.len() == 1 {
                let bit = &bits[0];
                let tmp = &*self + multiples[0];
                *self = bit.select(&tmp, &*self)?;
            }
        }

--- a/src/groups/mod.rs
+++ b/src/groups/mod.rs
@ -92,38 +92,19 @@ pub trait CurveVar:
        &self,
        bits: impl Iterator<Item = &'a Boolean<ConstraintF>>,
    ) -> Result<Self, SynthesisError> {
        if self.is_constant() {
            // Compute 2^i * self for i in 0..bits.len()
            // (these will be used by`precomputed_base_scalar_mul_le`, to perform
            // a conditional addition.).
            //
            // TODO: if `bits.len()` is small n, it might be cheaper to
            // conditinally select between 2^n options.
            let mut value = self.value().unwrap();
            let bits_and_multiples = bits
                .map(|b| {
                    let multiple = value;
                    value.double_in_place();
                    (b, multiple)
                })
                .collect::<ark_std::vec::Vec<_>>();
            let mut result = self.clone();
            result.precomputed_base_scalar_mul_le(
                bits_and_multiples.iter().map(|&(ref b, ref c)| (*b, c)),
            )?;
            Ok(result)
        } else {
            // Computes the standard little-endian double-and-add algorithm
            // (Algorithm 3.27, Guide to Elliptic Curve Cryptography)
            let mut res = Self::zero();
            let mut multiple = self.clone();
            for bit in bits {
                let tmp = res.clone() + &multiple;
                res = bit.select(&tmp, &res)?;
                multiple.double_in_place()?;
            }
            Ok(res)
        // TODO: in the constant case we should call precomputed_scalar_mul_le,
        // but rn there's a bug when doing this with TE curves.

        // Computes the standard little-endian double-and-add algorithm
        // (Algorithm 3.26, Guide to Elliptic Curve Cryptography)
        let mut res = Self::zero();
        let mut multiple = self.clone();
        for bit in bits {
            let tmp = res.clone() + &multiple;
            res = bit.select(&tmp, &res)?;
            multiple.double_in_place()?;
        }
        Ok(res)
    }

    /// Computes a `I * self` in place, where `I` is a `Boolean` *little-endian*