// Copyright 2020 ConsenSys Software Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "textflag.h"
|
|
#include "funcdata.h"
|
|
|
|
// modulus q
|
|
DATA q<>+0(SB)/8, $0x43e1f593f0000001
|
|
DATA q<>+8(SB)/8, $0x2833e84879b97091
|
|
DATA q<>+16(SB)/8, $0xb85045b68181585d
|
|
DATA q<>+24(SB)/8, $0x30644e72e131a029
|
|
GLOBL q<>(SB), (RODATA+NOPTR), $32
|
|
|
|
// qInv0 q'[0]
|
|
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
|
|
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
|
|
|
|
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
|
|
MOVQ ra0, rb0; \
|
|
SUBQ q<>(SB), ra0; \
|
|
MOVQ ra1, rb1; \
|
|
SBBQ q<>+8(SB), ra1; \
|
|
MOVQ ra2, rb2; \
|
|
SBBQ q<>+16(SB), ra2; \
|
|
MOVQ ra3, rb3; \
|
|
SBBQ q<>+24(SB), ra3; \
|
|
CMOVQCS rb0, ra0; \
|
|
CMOVQCS rb1, ra1; \
|
|
CMOVQCS rb2, ra2; \
|
|
CMOVQCS rb3, ra3; \
|
|
|
|
// add(res, x, y *Element)
|
|
TEXT ·add(SB), NOSPLIT, $0-24
|
|
MOVQ x+8(FP), AX
|
|
MOVQ 0(AX), CX
|
|
MOVQ 8(AX), BX
|
|
MOVQ 16(AX), SI
|
|
MOVQ 24(AX), DI
|
|
MOVQ y+16(FP), DX
|
|
ADDQ 0(DX), CX
|
|
ADCQ 8(DX), BX
|
|
ADCQ 16(DX), SI
|
|
ADCQ 24(DX), DI
|
|
|
|
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
|
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
|
|
|
MOVQ res+0(FP), R12
|
|
MOVQ CX, 0(R12)
|
|
MOVQ BX, 8(R12)
|
|
MOVQ SI, 16(R12)
|
|
MOVQ DI, 24(R12)
|
|
RET
|
|
|
|
// sub(res, x, y *Element)
|
|
TEXT ·sub(SB), NOSPLIT, $0-24
|
|
XORQ DI, DI
|
|
MOVQ x+8(FP), SI
|
|
MOVQ 0(SI), AX
|
|
MOVQ 8(SI), DX
|
|
MOVQ 16(SI), CX
|
|
MOVQ 24(SI), BX
|
|
MOVQ y+16(FP), SI
|
|
SUBQ 0(SI), AX
|
|
SBBQ 8(SI), DX
|
|
SBBQ 16(SI), CX
|
|
SBBQ 24(SI), BX
|
|
MOVQ $0x43e1f593f0000001, R8
|
|
MOVQ $0x2833e84879b97091, R9
|
|
MOVQ $0xb85045b68181585d, R10
|
|
MOVQ $0x30644e72e131a029, R11
|
|
CMOVQCC DI, R8
|
|
CMOVQCC DI, R9
|
|
CMOVQCC DI, R10
|
|
CMOVQCC DI, R11
|
|
ADDQ R8, AX
|
|
ADCQ R9, DX
|
|
ADCQ R10, CX
|
|
ADCQ R11, BX
|
|
MOVQ res+0(FP), R12
|
|
MOVQ AX, 0(R12)
|
|
MOVQ DX, 8(R12)
|
|
MOVQ CX, 16(R12)
|
|
MOVQ BX, 24(R12)
|
|
RET
|
|
|
|
// double(res, x *Element)
|
|
TEXT ·double(SB), NOSPLIT, $0-16
|
|
MOVQ x+8(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
MOVQ res+0(FP), R11
|
|
MOVQ DX, 0(R11)
|
|
MOVQ CX, 8(R11)
|
|
MOVQ BX, 16(R11)
|
|
MOVQ SI, 24(R11)
|
|
RET
|
|
|
|
// neg(res, x *Element)
|
|
TEXT ·neg(SB), NOSPLIT, $0-16
|
|
MOVQ res+0(FP), DI
|
|
MOVQ x+8(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
MOVQ DX, AX
|
|
ORQ CX, AX
|
|
ORQ BX, AX
|
|
ORQ SI, AX
|
|
TESTQ AX, AX
|
|
JEQ l1
|
|
MOVQ $0x43e1f593f0000001, R8
|
|
SUBQ DX, R8
|
|
MOVQ R8, 0(DI)
|
|
MOVQ $0x2833e84879b97091, R8
|
|
SBBQ CX, R8
|
|
MOVQ R8, 8(DI)
|
|
MOVQ $0xb85045b68181585d, R8
|
|
SBBQ BX, R8
|
|
MOVQ R8, 16(DI)
|
|
MOVQ $0x30644e72e131a029, R8
|
|
SBBQ SI, R8
|
|
MOVQ R8, 24(DI)
|
|
RET
|
|
|
|
l1:
|
|
MOVQ AX, 0(DI)
|
|
MOVQ AX, 8(DI)
|
|
MOVQ AX, 16(DI)
|
|
MOVQ AX, 24(DI)
|
|
RET
|
|
|
|
TEXT ·reduce(SB), NOSPLIT, $0-8
|
|
MOVQ res+0(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
MOVQ DX, 0(AX)
|
|
MOVQ CX, 8(AX)
|
|
MOVQ BX, 16(AX)
|
|
MOVQ SI, 24(AX)
|
|
RET
|
|
|
|
// MulBy3(x *Element)
|
|
TEXT ·MulBy3(SB), NOSPLIT, $0-8
|
|
MOVQ x+0(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
ADDQ 0(AX), DX
|
|
ADCQ 8(AX), CX
|
|
ADCQ 16(AX), BX
|
|
ADCQ 24(AX), SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
|
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
|
|
|
MOVQ DX, 0(AX)
|
|
MOVQ CX, 8(AX)
|
|
MOVQ BX, 16(AX)
|
|
MOVQ SI, 24(AX)
|
|
RET
|
|
|
|
// MulBy5(x *Element)
|
|
TEXT ·MulBy5(SB), NOSPLIT, $0-8
|
|
MOVQ x+0(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
|
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
|
|
|
ADDQ 0(AX), DX
|
|
ADCQ 8(AX), CX
|
|
ADCQ 16(AX), BX
|
|
ADCQ 24(AX), SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
|
|
REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
|
|
|
|
MOVQ DX, 0(AX)
|
|
MOVQ CX, 8(AX)
|
|
MOVQ BX, 16(AX)
|
|
MOVQ SI, 24(AX)
|
|
RET
|
|
|
|
// MulBy13(x *Element)
|
|
TEXT ·MulBy13(SB), NOSPLIT, $0-8
|
|
MOVQ x+0(FP), AX
|
|
MOVQ 0(AX), DX
|
|
MOVQ 8(AX), CX
|
|
MOVQ 16(AX), BX
|
|
MOVQ 24(AX), SI
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
|
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
|
|
|
MOVQ DX, R11
|
|
MOVQ CX, R12
|
|
MOVQ BX, R13
|
|
MOVQ SI, R14
|
|
ADDQ DX, DX
|
|
ADCQ CX, CX
|
|
ADCQ BX, BX
|
|
ADCQ SI, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
ADDQ R11, DX
|
|
ADCQ R12, CX
|
|
ADCQ R13, BX
|
|
ADCQ R14, SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
ADDQ 0(AX), DX
|
|
ADCQ 8(AX), CX
|
|
ADCQ 16(AX), BX
|
|
ADCQ 24(AX), SI
|
|
|
|
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
|
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
|
|
|
MOVQ DX, 0(AX)
|
|
MOVQ CX, 8(AX)
|
|
MOVQ BX, 16(AX)
|
|
MOVQ SI, 24(AX)
|
|
RET
|
|
|
|
// Butterfly(a, b *Element) sets a = a + b; b = a - b
|
|
TEXT ·Butterfly(SB), NOSPLIT, $0-16
|
|
MOVQ a+0(FP), AX
|
|
MOVQ 0(AX), CX
|
|
MOVQ 8(AX), BX
|
|
MOVQ 16(AX), SI
|
|
MOVQ 24(AX), DI
|
|
MOVQ CX, R8
|
|
MOVQ BX, R9
|
|
MOVQ SI, R10
|
|
MOVQ DI, R11
|
|
XORQ AX, AX
|
|
MOVQ b+8(FP), DX
|
|
ADDQ 0(DX), CX
|
|
ADCQ 8(DX), BX
|
|
ADCQ 16(DX), SI
|
|
ADCQ 24(DX), DI
|
|
SUBQ 0(DX), R8
|
|
SBBQ 8(DX), R9
|
|
SBBQ 16(DX), R10
|
|
SBBQ 24(DX), R11
|
|
MOVQ $0x43e1f593f0000001, R12
|
|
MOVQ $0x2833e84879b97091, R13
|
|
MOVQ $0xb85045b68181585d, R14
|
|
MOVQ $0x30644e72e131a029, R15
|
|
CMOVQCC AX, R12
|
|
CMOVQCC AX, R13
|
|
CMOVQCC AX, R14
|
|
CMOVQCC AX, R15
|
|
ADDQ R12, R8
|
|
ADCQ R13, R9
|
|
ADCQ R14, R10
|
|
ADCQ R15, R11
|
|
MOVQ R8, 0(DX)
|
|
MOVQ R9, 8(DX)
|
|
MOVQ R10, 16(DX)
|
|
MOVQ R11, 24(DX)
|
|
|
|
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
|
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
|
|
|
MOVQ a+0(FP), AX
|
|
MOVQ CX, 0(AX)
|
|
MOVQ BX, 8(AX)
|
|
MOVQ SI, 16(AX)
|
|
MOVQ DI, 24(AX)
|
|
RET
|