|
|
// Copyright 2020 ConsenSys Software Inc. // // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.
#include "textflag.h"
#include "funcdata.h"
// modulus q DATA q<>+0(SB)/8, $0x43e1f593f0000001 DATA q<>+8(SB)/8, $0x2833e84879b97091 DATA q<>+16(SB)/8, $0xb85045b68181585d DATA q<>+24(SB)/8, $0x30644e72e131a029 GLOBL q<>(SB), (RODATA+NOPTR), $32
// qInv0 q'[0] DATA qInv0<>(SB)/8, $0xc2e1f593efffffff GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
MOVQ ra0, rb0; \
SUBQ q<>(SB), ra0; \
MOVQ ra1, rb1; \
SBBQ q<>+8(SB), ra1; \
MOVQ ra2, rb2; \
SBBQ q<>+16(SB), ra2; \
MOVQ ra3, rb3; \
SBBQ q<>+24(SB), ra3; \
CMOVQCS rb0, ra0; \
CMOVQCS rb1, ra1; \
CMOVQCS rb2, ra2; \
CMOVQCS rb3, ra3; \
// add(res, x, y *Element) TEXT ·add(SB), NOSPLIT, $0-24 MOVQ x+8(FP), AX MOVQ 0(AX), CX MOVQ 8(AX), BX MOVQ 16(AX), SI MOVQ 24(AX), DI MOVQ y+16(FP), DX ADDQ 0(DX), CX ADCQ 8(DX), BX ADCQ 16(DX), SI ADCQ 24(DX), DI
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11) REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
MOVQ res+0(FP), R12 MOVQ CX, 0(R12) MOVQ BX, 8(R12) MOVQ SI, 16(R12) MOVQ DI, 24(R12) RET
// sub(res, x, y *Element) TEXT ·sub(SB), NOSPLIT, $0-24 XORQ DI, DI MOVQ x+8(FP), SI MOVQ 0(SI), AX MOVQ 8(SI), DX MOVQ 16(SI), CX MOVQ 24(SI), BX MOVQ y+16(FP), SI SUBQ 0(SI), AX SBBQ 8(SI), DX SBBQ 16(SI), CX SBBQ 24(SI), BX MOVQ $0x43e1f593f0000001, R8 MOVQ $0x2833e84879b97091, R9 MOVQ $0xb85045b68181585d, R10 MOVQ $0x30644e72e131a029, R11 CMOVQCC DI, R8 CMOVQCC DI, R9 CMOVQCC DI, R10 CMOVQCC DI, R11 ADDQ R8, AX ADCQ R9, DX ADCQ R10, CX ADCQ R11, BX MOVQ res+0(FP), R12 MOVQ AX, 0(R12) MOVQ DX, 8(R12) MOVQ CX, 16(R12) MOVQ BX, 24(R12) RET
// double(res, x *Element) TEXT ·double(SB), NOSPLIT, $0-16 MOVQ x+8(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
MOVQ res+0(FP), R11 MOVQ DX, 0(R11) MOVQ CX, 8(R11) MOVQ BX, 16(R11) MOVQ SI, 24(R11) RET
// neg(res, x *Element) TEXT ·neg(SB), NOSPLIT, $0-16 MOVQ res+0(FP), DI MOVQ x+8(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI MOVQ DX, AX ORQ CX, AX ORQ BX, AX ORQ SI, AX TESTQ AX, AX JEQ l1 MOVQ $0x43e1f593f0000001, R8 SUBQ DX, R8 MOVQ R8, 0(DI) MOVQ $0x2833e84879b97091, R8 SBBQ CX, R8 MOVQ R8, 8(DI) MOVQ $0xb85045b68181585d, R8 SBBQ BX, R8 MOVQ R8, 16(DI) MOVQ $0x30644e72e131a029, R8 SBBQ SI, R8 MOVQ R8, 24(DI) RET
l1: MOVQ AX, 0(DI) MOVQ AX, 8(DI) MOVQ AX, 16(DI) MOVQ AX, 24(DI) RET
TEXT ·reduce(SB), NOSPLIT, $0-8 MOVQ res+0(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
MOVQ DX, 0(AX) MOVQ CX, 8(AX) MOVQ BX, 16(AX) MOVQ SI, 24(AX) RET
// MulBy3(x *Element) TEXT ·MulBy3(SB), NOSPLIT, $0-8 MOVQ x+0(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
ADDQ 0(AX), DX ADCQ 8(AX), CX ADCQ 16(AX), BX ADCQ 24(AX), SI
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
MOVQ DX, 0(AX) MOVQ CX, 8(AX) MOVQ BX, 16(AX) MOVQ SI, 24(AX) RET
// MulBy5(x *Element) TEXT ·MulBy5(SB), NOSPLIT, $0-8 MOVQ x+0(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
ADDQ 0(AX), DX ADCQ 8(AX), CX ADCQ 16(AX), BX ADCQ 24(AX), SI
// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9) REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
MOVQ DX, 0(AX) MOVQ CX, 8(AX) MOVQ BX, 16(AX) MOVQ SI, 24(AX) RET
// MulBy13(x *Element) TEXT ·MulBy13(SB), NOSPLIT, $0-8 MOVQ x+0(FP), AX MOVQ 0(AX), DX MOVQ 8(AX), CX MOVQ 16(AX), BX MOVQ 24(AX), SI ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
MOVQ DX, R11 MOVQ CX, R12 MOVQ BX, R13 MOVQ SI, R14 ADDQ DX, DX ADCQ CX, CX ADCQ BX, BX ADCQ SI, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
ADDQ R11, DX ADCQ R12, CX ADCQ R13, BX ADCQ R14, SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
ADDQ 0(AX), DX ADCQ 8(AX), CX ADCQ 16(AX), BX ADCQ 24(AX), SI
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
MOVQ DX, 0(AX) MOVQ CX, 8(AX) MOVQ BX, 16(AX) MOVQ SI, 24(AX) RET
// Butterfly(a, b *Element) sets a = a + b; b = a - b
TEXT ·Butterfly(SB), NOSPLIT, $0-16 MOVQ a+0(FP), AX MOVQ 0(AX), CX MOVQ 8(AX), BX MOVQ 16(AX), SI MOVQ 24(AX), DI MOVQ CX, R8 MOVQ BX, R9 MOVQ SI, R10 MOVQ DI, R11 XORQ AX, AX MOVQ b+8(FP), DX ADDQ 0(DX), CX ADCQ 8(DX), BX ADCQ 16(DX), SI ADCQ 24(DX), DI SUBQ 0(DX), R8 SBBQ 8(DX), R9 SBBQ 16(DX), R10 SBBQ 24(DX), R11 MOVQ $0x43e1f593f0000001, R12 MOVQ $0x2833e84879b97091, R13 MOVQ $0xb85045b68181585d, R14 MOVQ $0x30644e72e131a029, R15 CMOVQCC AX, R12 CMOVQCC AX, R13 CMOVQCC AX, R14 CMOVQCC AX, R15 ADDQ R12, R8 ADCQ R13, R9 ADCQ R14, R10 ADCQ R15, R11 MOVQ R8, 0(DX) MOVQ R9, 8(DX) MOVQ R10, 16(DX) MOVQ R11, 24(DX)
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11) REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
MOVQ a+0(FP), AX MOVQ CX, 0(AX) MOVQ BX, 8(AX) MOVQ SI, 16(AX) MOVQ DI, 24(AX) RET
|