mirror of
https://github.com/arnaucube/go-iden3-crypto.git
synced 2026-02-07 19:46:41 +01:00
Faster ff arithmetics (regenerated code with the newest goff) (#43)
This commit is contained in:
committed by
GitHub
parent
f597e20569
commit
69354ae29c
340
ff/element_ops_amd64.s
Normal file
340
ff/element_ops_amd64.s
Normal file
@@ -0,0 +1,340 @@
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
|
||||
// modulus q
|
||||
DATA q<>+0(SB)/8, $0x43e1f593f0000001
|
||||
DATA q<>+8(SB)/8, $0x2833e84879b97091
|
||||
DATA q<>+16(SB)/8, $0xb85045b68181585d
|
||||
DATA q<>+24(SB)/8, $0x30644e72e131a029
|
||||
GLOBL q<>(SB), (RODATA+NOPTR), $32
|
||||
|
||||
// qInv0 q'[0]
|
||||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
|
||||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
|
||||
|
||||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
|
||||
MOVQ ra0, rb0; \
|
||||
SUBQ q<>(SB), ra0; \
|
||||
MOVQ ra1, rb1; \
|
||||
SBBQ q<>+8(SB), ra1; \
|
||||
MOVQ ra2, rb2; \
|
||||
SBBQ q<>+16(SB), ra2; \
|
||||
MOVQ ra3, rb3; \
|
||||
SBBQ q<>+24(SB), ra3; \
|
||||
CMOVQCS rb0, ra0; \
|
||||
CMOVQCS rb1, ra1; \
|
||||
CMOVQCS rb2, ra2; \
|
||||
CMOVQCS rb3, ra3; \
|
||||
|
||||
// add(res, x, y *Element)
|
||||
TEXT ·add(SB), NOSPLIT, $0-24
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), CX
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ 16(AX), SI
|
||||
MOVQ 24(AX), DI
|
||||
MOVQ y+16(FP), DX
|
||||
ADDQ 0(DX), CX
|
||||
ADCQ 8(DX), BX
|
||||
ADCQ 16(DX), SI
|
||||
ADCQ 24(DX), DI
|
||||
|
||||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
||||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
||||
|
||||
MOVQ res+0(FP), R12
|
||||
MOVQ CX, 0(R12)
|
||||
MOVQ BX, 8(R12)
|
||||
MOVQ SI, 16(R12)
|
||||
MOVQ DI, 24(R12)
|
||||
RET
|
||||
|
||||
// sub(res, x, y *Element)
|
||||
TEXT ·sub(SB), NOSPLIT, $0-24
|
||||
XORQ DI, DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ 0(SI), AX
|
||||
MOVQ 8(SI), DX
|
||||
MOVQ 16(SI), CX
|
||||
MOVQ 24(SI), BX
|
||||
MOVQ y+16(FP), SI
|
||||
SUBQ 0(SI), AX
|
||||
SBBQ 8(SI), DX
|
||||
SBBQ 16(SI), CX
|
||||
SBBQ 24(SI), BX
|
||||
MOVQ $0x43e1f593f0000001, R8
|
||||
MOVQ $0x2833e84879b97091, R9
|
||||
MOVQ $0xb85045b68181585d, R10
|
||||
MOVQ $0x30644e72e131a029, R11
|
||||
CMOVQCC DI, R8
|
||||
CMOVQCC DI, R9
|
||||
CMOVQCC DI, R10
|
||||
CMOVQCC DI, R11
|
||||
ADDQ R8, AX
|
||||
ADCQ R9, DX
|
||||
ADCQ R10, CX
|
||||
ADCQ R11, BX
|
||||
MOVQ res+0(FP), R12
|
||||
MOVQ AX, 0(R12)
|
||||
MOVQ DX, 8(R12)
|
||||
MOVQ CX, 16(R12)
|
||||
MOVQ BX, 24(R12)
|
||||
RET
|
||||
|
||||
// double(res, x *Element)
|
||||
TEXT ·double(SB), NOSPLIT, $0-16
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ res+0(FP), R11
|
||||
MOVQ DX, 0(R11)
|
||||
MOVQ CX, 8(R11)
|
||||
MOVQ BX, 16(R11)
|
||||
MOVQ SI, 24(R11)
|
||||
RET
|
||||
|
||||
// neg(res, x *Element)
|
||||
TEXT ·neg(SB), NOSPLIT, $0-16
|
||||
MOVQ res+0(FP), DI
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
MOVQ DX, AX
|
||||
ORQ CX, AX
|
||||
ORQ BX, AX
|
||||
ORQ SI, AX
|
||||
TESTQ AX, AX
|
||||
JEQ l1
|
||||
MOVQ $0x43e1f593f0000001, R8
|
||||
SUBQ DX, R8
|
||||
MOVQ R8, 0(DI)
|
||||
MOVQ $0x2833e84879b97091, R8
|
||||
SBBQ CX, R8
|
||||
MOVQ R8, 8(DI)
|
||||
MOVQ $0xb85045b68181585d, R8
|
||||
SBBQ BX, R8
|
||||
MOVQ R8, 16(DI)
|
||||
MOVQ $0x30644e72e131a029, R8
|
||||
SBBQ SI, R8
|
||||
MOVQ R8, 24(DI)
|
||||
RET
|
||||
|
||||
l1:
|
||||
MOVQ AX, 0(DI)
|
||||
MOVQ AX, 8(DI)
|
||||
MOVQ AX, 16(DI)
|
||||
MOVQ AX, 24(DI)
|
||||
RET
|
||||
|
||||
TEXT ·reduce(SB), NOSPLIT, $0-8
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy3(x *Element)
|
||||
TEXT ·MulBy3(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy5(x *Element)
|
||||
TEXT ·MulBy5(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
|
||||
REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy13(x *Element)
|
||||
TEXT ·MulBy13(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
MOVQ DX, R11
|
||||
MOVQ CX, R12
|
||||
MOVQ BX, R13
|
||||
MOVQ SI, R14
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ R11, DX
|
||||
ADCQ R12, CX
|
||||
ADCQ R13, BX
|
||||
ADCQ R14, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// Butterfly(a, b *Element) sets a = a + b; b = a - b
|
||||
TEXT ·Butterfly(SB), NOSPLIT, $0-16
|
||||
MOVQ a+0(FP), AX
|
||||
MOVQ 0(AX), CX
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ 16(AX), SI
|
||||
MOVQ 24(AX), DI
|
||||
MOVQ CX, R8
|
||||
MOVQ BX, R9
|
||||
MOVQ SI, R10
|
||||
MOVQ DI, R11
|
||||
XORQ AX, AX
|
||||
MOVQ b+8(FP), DX
|
||||
ADDQ 0(DX), CX
|
||||
ADCQ 8(DX), BX
|
||||
ADCQ 16(DX), SI
|
||||
ADCQ 24(DX), DI
|
||||
SUBQ 0(DX), R8
|
||||
SBBQ 8(DX), R9
|
||||
SBBQ 16(DX), R10
|
||||
SBBQ 24(DX), R11
|
||||
MOVQ $0x43e1f593f0000001, R12
|
||||
MOVQ $0x2833e84879b97091, R13
|
||||
MOVQ $0xb85045b68181585d, R14
|
||||
MOVQ $0x30644e72e131a029, R15
|
||||
CMOVQCC AX, R12
|
||||
CMOVQCC AX, R13
|
||||
CMOVQCC AX, R14
|
||||
CMOVQCC AX, R15
|
||||
ADDQ R12, R8
|
||||
ADCQ R13, R9
|
||||
ADCQ R14, R10
|
||||
ADCQ R15, R11
|
||||
MOVQ R8, 0(DX)
|
||||
MOVQ R9, 8(DX)
|
||||
MOVQ R10, 16(DX)
|
||||
MOVQ R11, 24(DX)
|
||||
|
||||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
||||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
||||
|
||||
MOVQ a+0(FP), AX
|
||||
MOVQ CX, 0(AX)
|
||||
MOVQ BX, 8(AX)
|
||||
MOVQ SI, 16(AX)
|
||||
MOVQ DI, 24(AX)
|
||||
RET
|
||||
Reference in New Issue
Block a user