|
|
// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
#include "textflag.h"
#define POLY1305_ADD(msg, h0, h1, h2) \
ADDQ 0(msg), h0; \
ADCQ 8(msg), h1; \
ADCQ $1, h2; \
LEAQ 16(msg), msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
MOVQ r0, AX; \
MULQ h0; \
MOVQ AX, t0; \
MOVQ DX, t1; \
MOVQ r0, AX; \
MULQ h1; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ r0, t2; \
IMULQ h2, t2; \
ADDQ DX, t2; \
\ MOVQ r1, AX; \
MULQ h0; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ DX, h0; \
MOVQ r1, t3; \
IMULQ h2, t3; \
MOVQ r1, AX; \
MULQ h1; \
ADDQ AX, t2; \
ADCQ DX, t3; \
ADDQ h0, t2; \
ADCQ $0, t3; \
\ MOVQ t0, h0; \
MOVQ t1, h1; \
MOVQ t2, h2; \
ANDQ $3, h2; \
MOVQ t2, t0; \
ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
ADDQ t0, h0; \
ADCQ t3, h1; \
ADCQ $0, h2; \
SHRQ $2, t3, t2; \
SHRQ $2, t3; \
ADDQ t2, h0; \
ADCQ t3, h1; \
ADCQ $0, h2
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC GLOBL ·poly1305Mask<>(SB), RODATA, $16
// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) TEXT ·poly1305(SB), $0-32 MOVQ out+0(FP), DI MOVQ m+8(FP), SI MOVQ mlen+16(FP), R15 MOVQ key+24(FP), AX
MOVQ 0(AX), R11 MOVQ 8(AX), R12 ANDQ ·poly1305Mask<>(SB), R11 // r0 ANDQ ·poly1305Mask<>+8(SB), R12 // r1 XORQ R8, R8 // h0 XORQ R9, R9 // h1 XORQ R10, R10 // h2
CMPQ R15, $16 JB bytes_between_0_and_15
loop: POLY1305_ADD(SI, R8, R9, R10)
multiply: POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) SUBQ $16, R15 CMPQ R15, $16 JAE loop
bytes_between_0_and_15: TESTQ R15, R15 JZ done MOVQ $1, BX XORQ CX, CX XORQ R13, R13 ADDQ R15, SI
flush_buffer: SHLQ $8, BX, CX SHLQ $8, BX MOVB -1(SI), R13 XORQ R13, BX DECQ SI DECQ R15 JNZ flush_buffer
ADDQ BX, R8 ADCQ CX, R9 ADCQ $0, R10 MOVQ $16, R15 JMP multiply
done: MOVQ R8, AX MOVQ R9, BX SUBQ $0xFFFFFFFFFFFFFFFB, AX SBBQ $0xFFFFFFFFFFFFFFFF, BX SBBQ $3, R10 CMOVQCS R8, AX CMOVQCS R9, BX MOVQ key+24(FP), R8 ADDQ 16(R8), AX ADCQ 24(R8), BX
MOVQ AX, 0(DI) MOVQ BX, 8(DI) RET
|