From 5e4114b19c5486707e01758bf5eb4e73f386f646 Mon Sep 17 00:00:00 2001 From: Pratyush Mishra Date: Fri, 15 Jan 2021 13:18:26 -0800 Subject: [PATCH] Better scalar multiplication for Short Weierstrass curves (#40) Co-authored-by: Dev Ojha Co-authored-by: Pratyush Mishra --- CHANGELOG.md | 21 +-- src/groups/curves/short_weierstrass/mod.rs | 168 +++++++++++++----- .../short_weierstrass/non_zero_affine.rs | 132 ++++++++++++++ src/groups/curves/twisted_edwards/mod.rs | 37 ++-- src/groups/mod.rs | 43 ++--- 5 files changed, 294 insertions(+), 107 deletions(-) create mode 100644 src/groups/curves/short_weierstrass/non_zero_affine.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a9f89c..f7cfa4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,26 +1,27 @@ ## Pending ### Breaking changes -- #12 Make FpVar's ToBitsGadget output constant length bit vectors. +- #12 Make the `ToBitsGadget` impl for `FpVar` output fixed-size ### Features ### Improvements - #5 Speedup BLS-12 pairing -- #13 Add ToConstraintFieldGadget to ProjectiveVar -- #15, #16 Allow cs to be none when converting a montgomery point into a twisted edwards point -- #20 Add conditional select gadget to Uints -- #21 Add Uint128 +- #13 Add `ToConstraintFieldGadget` to `ProjectiveVar` +- #15, #16 Allow `cs` to be `None` when converting a Montgomery point into a Twisted Edwards point +- #20 Add `CondSelectGadget` impl for `UInt`s +- #21 Add `UInt128` - #22 Reduce density of `three_bit_cond_neg_lookup` -- #23 Reduce allocations in Uints +- #23 Reduce allocations in `UInt`s - #33 Speedup scalar multiplication by a constant -- #35 Construct a FpVar from bits -- #36 ImplementToConstraintFieldGadget for `Vec` +- #35 Construct a `FpVar` from bits +- #36 Implement `ToConstraintFieldGadget` for `Vec` +- #40 Faster scalar multiplication for Short Weierstrass curves by relying on affine formulae ### Bug fixes -- #8 Fix bug in three_bit_cond_neg_lookup when using a constant lookup bit -- #9 Fix bug in short weierstrass projective curve point's to_affine method +- #8 Fix bug in `three_bit_cond_neg_lookup` when using a constant lookup bit +- #9 Fix bug in `short_weierstrass::ProjectiveVar::to_affine` - #29 Fix `to_non_unique_bytes` for `BLS12::G1Prepared` - #34 Fix `mul_by_inverse` for constants - #42 Fix regression in `mul_by_inverse` constraint count diff --git a/src/groups/curves/short_weierstrass/mod.rs b/src/groups/curves/short_weierstrass/mod.rs index 1bf3b83..47afc1f 100644 --- a/src/groups/curves/short_weierstrass/mod.rs +++ b/src/groups/curves/short_weierstrass/mod.rs @@ -2,9 +2,10 @@ use ark_ec::{ short_weierstrass_jacobian::{GroupAffine as SWAffine, GroupProjective as SWProjective}, AffineCurve, ProjectiveCurve, SWModelParameters, }; -use ark_ff::{BigInteger, BitIteratorBE, Field, One, PrimeField, Zero}; +use ark_ff::{BigInteger, BitIteratorBE, Field, FpParameters, One, PrimeField, Zero}; use ark_relations::r1cs::{ConstraintSystemRef, Namespace, SynthesisError}; use core::{borrow::Borrow, marker::PhantomData}; +use non_zero_affine::NonZeroAffineVar; use crate::{fields::fp::FpVar, prelude::*, ToConstraintFieldGadget, Vec}; @@ -21,6 +22,7 @@ pub mod mnt4; /// family of bilinear groups. pub mod mnt6; +mod non_zero_affine; /// An implementation of arithmetic for Short Weierstrass curves that relies on /// the complete formulae derived in the paper of /// [[Renes, Costello, Batina 2015]](https://eprint.iacr.org/2015/1060). @@ -223,8 +225,8 @@ where } /// Mixed addition, which is useful when `other = (x2, y2)` is known to have z = 1. - #[tracing::instrument(target = "r1cs", skip(self, x2, y2))] - pub(crate) fn add_mixed(&self, (x2, y2): (&F, &F)) -> Result { + #[tracing::instrument(target = "r1cs", skip(self, other))] + pub(crate) fn add_mixed(&self, other: &NonZeroAffineVar) -> Result { // Complete mixed addition formula from Renes-Costello-Batina 2015 // Algorithm 2 // (https://eprint.iacr.org/2015/1060). @@ -235,6 +237,7 @@ where // https://github.com/RustCrypto/elliptic-curves/blob/master/p256/src/arithmetic/projective.rs let three_b = P::COEFF_B.double() + &P::COEFF_B; let (x1, y1, z1) = (&self.x, &self.y, &self.z); + let (x2, y2) = (&other.x, &other.y); let xx = x1 * x2; // 1 let yy = y1 * y2; // 2 @@ -261,6 +264,80 @@ where Ok(ProjectiveVar::new(x, y, z)) } + + /// Computes a scalar multiplication with a little-endian scalar of size `P::ScalarField::MODULUS_BITS`. + #[tracing::instrument( + target = "r1cs", + skip(self, mul_result, multiple_of_power_of_two, bits) + )] + fn fixed_scalar_mul_le( + &self, + mul_result: &mut Self, + multiple_of_power_of_two: &mut NonZeroAffineVar, + bits: &[&Boolean<::BasePrimeField>], + ) -> Result<(), SynthesisError> { + let scalar_modulus_bits = <::Params>::MODULUS_BITS as usize; + + assert!(scalar_modulus_bits >= bits.len()); + let split_len = ark_std::cmp::min(scalar_modulus_bits - 2, bits.len()); + let (affine_bits, proj_bits) = bits.split_at(split_len); + // Computes the standard little-endian double-and-add algorithm + // (Algorithm 3.26, Guide to Elliptic Curve Cryptography) + // + // We rely on *incomplete* affine formulae for partially computing this. + // However, we avoid exceptional edge cases because we partition the scalar + // into two chunks: one guaranteed to be less than p - 2, and the rest. + // We only use incomplete formulae for the first chunk, which means we avoid exceptions: + // + // `add_unchecked(a, b)` is incomplete when either `b.is_zero()`, or when + // `b = ±a`. During scalar multiplication, we don't hit either case: + // * `b = ±a`: `b = accumulator = k * a`, where `2 <= k < p - 1`. + // This implies that `k != p ± 1`, and so `b != (p ± 1) * a`. + // Because the group is finite, this in turn means that `b != ±a`, as required. + // * `a` or `b` is zero: for `a`, we handle the zero case after the loop; for `b`, notice + // that it is monotonically increasing, and furthermore, equals `k * a`, where + // `k != p = 0 mod p`. + + // Unlike normal double-and-add, here we start off with a non-zero `accumulator`, + // because `NonZeroAffineVar::add_unchecked` doesn't support addition with `zero`. + // In more detail, we initialize `accumulator` to be the initial value of + // `multiple_of_power_of_two`. This ensures that all unchecked additions of `accumulator` + // with later values of `multiple_of_power_of_two` are safe. + // However, to do this correctly, we need to perform two steps: + // * We must skip the LSB, and instead proceed assuming that it was 1. Later, we will + // conditionally subtract the initial value of `accumulator`: + // if LSB == 0: subtract initial_acc_value; else, subtract 0. + // * Because we are assuming the first bit, we must double `multiple_of_power_of_two`. + + let mut accumulator = multiple_of_power_of_two.clone(); + let initial_acc_value = accumulator.into_projective(); + + // The powers start at 2 (instead of 1) because we're skipping the first bit. + multiple_of_power_of_two.double_in_place()?; + + // As mentioned, we will skip the LSB, and will later handle it via a conditional subtraction. + for bit in affine_bits.iter().skip(1) { + let temp = accumulator.add_unchecked(&multiple_of_power_of_two)?; + accumulator = bit.select(&temp, &accumulator)?; + multiple_of_power_of_two.double_in_place()?; + } + // Perform conditional subtraction: + + // We can convert to projective safely because the result is guaranteed to be non-zero + // by the condition on `affine_bits.len()`, and by the fact that `accumulator` is non-zero + let result = accumulator.into_projective(); + // If bits[0] is 0, then we have to subtract `self`; else, we subtract zero. + let subtrahend = bits[0].select(&Self::zero(), &initial_acc_value)?; + *mul_result += result - subtrahend; + + // Now, let's finish off the rest of the bits using our complete formulae + for bit in proj_bits { + let temp = &*mul_result + &multiple_of_power_of_two.into_projective(); + *mul_result = bit.select(&temp, &mul_result)?; + multiple_of_power_of_two.double_in_place()?; + } + Ok(()) + } } impl CurveVar, ::BasePrimeField> @@ -398,47 +475,52 @@ where bits: impl Iterator::BasePrimeField>>, ) -> Result { if self.is_constant() { - // Compute 2^i * self for i in 0..bits.len() - // (these will be used by`precomputed_base_scalar_mul_le`, to perform - // a conditional addition.). - // - // TODO: if `bits.len()` is small n, it might be cheaper to - // conditionally select between 2^n options. - let mut value = self.value()?; - let bits_and_multiples = bits - .map(|b| { - let multiple = value; - value.double_in_place(); - (b, multiple) - }) - .collect::>(); - let mut result = self.clone(); - result.precomputed_base_scalar_mul_le( - bits_and_multiples.iter().map(|&(ref b, ref c)| (*b, c)), - )?; - Ok(result) - } else { - // Computes the standard big-endian double-and-add algorithm - // (Algorithm 3.27, Guide to Elliptic Curve Cryptography) - // the input is little-endian, so we need to reverse it to get it in - // big-endian. - let mut bits = bits.collect::>(); - bits.reverse(); - - let mut res = Self::zero(); - let self_affine = self.to_affine()?; - for bit in bits { - res.double_in_place()?; - let tmp = res.add_mixed((&self_affine.x, &self_affine.y))?; - res = bit.select(&tmp, &res)?; + if self.value().unwrap().is_zero() { + return Ok(self.clone()); } - // The foregoing algorithm relies on mixed addition, and so does not - // work when the input (`self`) is zero. We hence have to perform - // a check to ensure that if the input is zero, then so is the output. - // The cost of this check should be less than the benefit of using - // mixed addition in almost all cases. - self_affine.infinity.select(&Self::zero(), &res) } + let self_affine = self.to_affine()?; + let (x, y, infinity) = (self_affine.x, self_affine.y, self_affine.infinity); + // We first handle the non-zero case, and then later + // will conditionally select zero if `self` was zero. + let non_zero_self = NonZeroAffineVar::new(x, y); + + let bits = bits.collect::>(); + if bits.len() == 0 { + return Ok(Self::zero()); + } + let scalar_modulus_bits = <::Params>::MODULUS_BITS as usize; + let mut mul_result = Self::zero(); + let mut power_of_two_times_self = non_zero_self; + // We chunk up `bits` into `p`-sized chunks. + for bits in bits.chunks(scalar_modulus_bits) { + self.fixed_scalar_mul_le(&mut mul_result, &mut power_of_two_times_self, bits)?; + } + + // The foregoing algorithms rely on mixed/incomplete addition, and so do not + // work when the input (`self`) is zero. We hence have to perform + // a check to ensure that if the input is zero, then so is the output. + // The cost of this check should be less than the benefit of using + // mixed addition in almost all cases. + infinity.select(&Self::zero(), &mul_result) + } + + #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_bases))] + fn precomputed_base_scalar_mul_le<'a, I, B>( + &mut self, + scalar_bits_with_bases: I, + ) -> Result<(), SynthesisError> + where + I: Iterator)>, + B: Borrow::BasePrimeField>>, + { + // We just ignore the provided bases and use the faster scalar multiplication. + let (bits, bases): (Vec<_>, Vec<_>) = scalar_bits_with_bases + .map(|(b, c)| (b.borrow().clone(), *c)) + .unzip(); + let base = bases[0]; + *self = Self::constant(base).scalar_mul_le(bits.iter())?; + Ok(()) } } @@ -500,7 +582,7 @@ impl_bounded_ops!( // We'll use mixed addition to add non-zero constants. let x = F::constant(other.x); let y = F::constant(other.y); - this.add_mixed((&x, &y)).unwrap() + this.add_mixed(&NonZeroAffineVar::new(x, y)).unwrap() } } else { // Complete addition formula from Renes-Costello-Batina 2015 diff --git a/src/groups/curves/short_weierstrass/non_zero_affine.rs b/src/groups/curves/short_weierstrass/non_zero_affine.rs new file mode 100644 index 0000000..2a69582 --- /dev/null +++ b/src/groups/curves/short_weierstrass/non_zero_affine.rs @@ -0,0 +1,132 @@ +use super::*; +/// An affine representation of a prime order curve point that is guaranteed +/// to *not* be the point at infinity. +#[derive(Derivative)] +#[derivative(Debug, Clone)] +#[must_use] +pub struct NonZeroAffineVar< + P: SWModelParameters, + F: FieldVar::BasePrimeField>, +> where + for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>, +{ + /// The x-coordinate. + pub x: F, + /// The y-coordinate. + pub y: F, + #[derivative(Debug = "ignore")] + _params: PhantomData

, +} + +impl NonZeroAffineVar +where + P: SWModelParameters, + F: FieldVar::BasePrimeField>, + for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>, +{ + pub(crate) fn new(x: F, y: F) -> Self { + Self { + x, + y, + _params: PhantomData, + } + } + + /// Converts self into a non-zero projective point. + #[tracing::instrument(target = "r1cs", skip(self))] + pub(crate) fn into_projective(&self) -> ProjectiveVar { + ProjectiveVar::new(self.x.clone(), self.y.clone(), F::one()) + } + + /// Performs an addition without checking that other != ±self. + #[tracing::instrument(target = "r1cs", skip(self, other))] + pub(crate) fn add_unchecked(&self, other: &Self) -> Result { + if [self, other].is_constant() { + let result = + (self.value()?.into_projective() + other.value()?.into_projective()).into_affine(); + Ok(Self::new(F::constant(result.x), F::constant(result.y))) + } else { + let (x1, y1) = (&self.x, &self.y); + let (x2, y2) = (&other.x, &other.y); + // Then, + // slope lambda := (y2 - y1)/(x2 - x1); + // x3 = lambda^2 - x1 - x2; + // y3 = lambda * (x1 - x3) - y1 + let numerator = y2 - y1; + let denominator = x2 - x1; + let lambda = numerator.mul_by_inverse(&denominator)?; + let x3 = lambda.square()? - x1 - x2; + let y3 = lambda * &(x1 - &x3) - y1; + Ok(Self::new(x3, y3)) + } + } + + /// Doubles `self`. As this is a prime order curve point, + /// the output is guaranteed to not be the point at infinity. + #[tracing::instrument(target = "r1cs", skip(self))] + pub(crate) fn double(&self) -> Result { + if [self].is_constant() { + let result = self.value()?.into_projective().double().into_affine(); + // Panic if the result is zero. + assert!(!result.is_zero()); + Ok(Self::new(F::constant(result.x), F::constant(result.y))) + } else { + let (x1, y1) = (&self.x, &self.y); + let x1_sqr = x1.square()?; + // Then, + // tangent lambda := (3 * x1^2 + a) / y1·; + // x3 = lambda^2 - 2x1 + // y3 = lambda * (x1 - x3) - y1 + let numerator = x1_sqr.double()? + &x1_sqr + P::COEFF_A; + let denominator = y1.double()?; + let lambda = numerator.mul_by_inverse(&denominator)?; + let x3 = lambda.square()? - x1.double()?; + let y3 = lambda * &(x1 - &x3) - y1; + Ok(Self::new(x3, y3)) + } + } + + /// Doubles `self` in place. + #[tracing::instrument(target = "r1cs", skip(self))] + pub(crate) fn double_in_place(&mut self) -> Result<(), SynthesisError> { + *self = self.double()?; + Ok(()) + } +} + +impl R1CSVar<::BasePrimeField> for NonZeroAffineVar +where + P: SWModelParameters, + F: FieldVar::BasePrimeField>, + for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>, +{ + type Value = SWAffine

; + + fn cs(&self) -> ConstraintSystemRef<::BasePrimeField> { + self.x.cs().or(self.y.cs()) + } + + fn value(&self) -> Result, SynthesisError> { + Ok(SWAffine::new(self.x.value()?, self.y.value()?, false)) + } +} + +impl CondSelectGadget<::BasePrimeField> for NonZeroAffineVar +where + P: SWModelParameters, + F: FieldVar::BasePrimeField>, + for<'a> &'a F: FieldOpsBounds<'a, P::BaseField, F>, +{ + #[inline] + #[tracing::instrument(target = "r1cs")] + fn conditionally_select( + cond: &Boolean<::BasePrimeField>, + true_value: &Self, + false_value: &Self, + ) -> Result { + let x = cond.select(&true_value.x, &false_value.x)?; + let y = cond.select(&true_value.y, &false_value.y)?; + + Ok(Self::new(x, y)) + } +} diff --git a/src/groups/curves/twisted_edwards/mod.rs b/src/groups/curves/twisted_edwards/mod.rs index 6753995..aed8f82 100644 --- a/src/groups/curves/twisted_edwards/mod.rs +++ b/src/groups/curves/twisted_edwards/mod.rs @@ -523,43 +523,34 @@ where Ok(Self::new(self.x.negate()?, self.y.clone())) } - #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_base_powers))] + #[tracing::instrument(target = "r1cs", skip(scalar_bits_with_base_multiples))] fn precomputed_base_scalar_mul_le<'a, I, B>( &mut self, - scalar_bits_with_base_powers: I, + scalar_bits_with_base_multiples: I, ) -> Result<(), SynthesisError> where I: Iterator)>, B: Borrow::BasePrimeField>>, { - let scalar_bits_with_base_powers = scalar_bits_with_base_powers + let (bits, multiples): (Vec<_>, Vec<_>) = scalar_bits_with_base_multiples .map(|(bit, base)| (bit.borrow().clone(), *base)) - .collect::)>>(); - let zero = TEProjective::zero(); - for bits_base_powers in scalar_bits_with_base_powers.chunks(2) { - if bits_base_powers.len() == 2 { - let bits = [bits_base_powers[0].0.clone(), bits_base_powers[1].0.clone()]; - let base_powers = [&bits_base_powers[0].1, &bits_base_powers[1].1]; - - let mut table = [ - zero, - *base_powers[0], - *base_powers[1], - *base_powers[0] + base_powers[1], - ]; + .unzip(); + let zero: TEAffine

= TEProjective::zero().into_affine(); + for (bits, multiples) in bits.chunks(2).zip(multiples.chunks(2)) { + if bits.len() == 2 { + let mut table = [multiples[0], multiples[1], multiples[0] + multiples[1]]; TEProjective::batch_normalization(&mut table); - let x_s = [table[0].x, table[1].x, table[2].x, table[3].x]; - let y_s = [table[0].y, table[1].y, table[2].y, table[3].y]; + let x_s = [zero.x, table[0].x, table[1].x, table[2].x]; + let y_s = [zero.y, table[0].y, table[1].y, table[2].y]; let x = F::two_bit_lookup(&bits, &x_s)?; let y = F::two_bit_lookup(&bits, &y_s)?; *self += Self::new(x, y); - } else if bits_base_powers.len() == 1 { - let bit = bits_base_powers[0].0.clone(); - let base_power = bits_base_powers[0].1; - let new_encoded = &*self + base_power; - *self = bit.select(&new_encoded, &self)?; + } else if bits.len() == 1 { + let bit = &bits[0]; + let tmp = &*self + multiples[0]; + *self = bit.select(&tmp, &*self)?; } } diff --git a/src/groups/mod.rs b/src/groups/mod.rs index eb79051..881c566 100644 --- a/src/groups/mod.rs +++ b/src/groups/mod.rs @@ -92,38 +92,19 @@ pub trait CurveVar: &self, bits: impl Iterator>, ) -> Result { - if self.is_constant() { - // Compute 2^i * self for i in 0..bits.len() - // (these will be used by`precomputed_base_scalar_mul_le`, to perform - // a conditional addition.). - // - // TODO: if `bits.len()` is small n, it might be cheaper to - // conditinally select between 2^n options. - let mut value = self.value().unwrap(); - let bits_and_multiples = bits - .map(|b| { - let multiple = value; - value.double_in_place(); - (b, multiple) - }) - .collect::>(); - let mut result = self.clone(); - result.precomputed_base_scalar_mul_le( - bits_and_multiples.iter().map(|&(ref b, ref c)| (*b, c)), - )?; - Ok(result) - } else { - // Computes the standard little-endian double-and-add algorithm - // (Algorithm 3.27, Guide to Elliptic Curve Cryptography) - let mut res = Self::zero(); - let mut multiple = self.clone(); - for bit in bits { - let tmp = res.clone() + &multiple; - res = bit.select(&tmp, &res)?; - multiple.double_in_place()?; - } - Ok(res) + // TODO: in the constant case we should call precomputed_scalar_mul_le, + // but rn there's a bug when doing this with TE curves. + + // Computes the standard little-endian double-and-add algorithm + // (Algorithm 3.26, Guide to Elliptic Curve Cryptography) + let mut res = Self::zero(); + let mut multiple = self.clone(); + for bit in bits { + let tmp = res.clone() + &multiple; + res = bit.select(&tmp, &res)?; + multiple.double_in_place()?; } + Ok(res) } /// Computes a `I * self` in place, where `I` is a `Boolean` *little-endian*