You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1920 lines
54 KiB

// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package simd
import (
"fmt"
"gitlab.com/nitya-sattva/go-x11/hash"
)
// HashSize holds the size of a hash in bytes.
const HashSize = int(64)
// BlockSize holds the size of a block in bytes.
const BlockSize = uintptr(128)
////////////////
type digest struct {
ptr uintptr
ch uint32
cl uint32
h [32]uint32
b [BlockSize]byte
}
// New returns a new digest to compute a SIMD512 hash.
func New() hash.Digest {
ref := &digest{}
ref.Reset()
return ref
}
////////////////
// Reset resets the digest to its initial state.
func (ref *digest) Reset() {
ref.ptr = 0
ref.cl, ref.ch = 0, 0
copy(ref.h[:], kInit[:])
}
// Sum appends the current hash to dst and returns the result
// as a slice. It does not change the underlying hash state.
func (ref *digest) Sum(dst []byte) []byte {
dgt := *ref
hsh := [64]byte{}
dgt.Close(hsh[:], 0, 0)
return append(dst, hsh[:]...)
}
// Write more data to the running hash, never returns an error.
func (ref *digest) Write(src []byte) (int, error) {
sln := uintptr(len(src))
fln := len(src)
for sln > 0 {
cln := BlockSize - ref.ptr
if cln > sln {
cln = sln
}
sln -= cln
copy(ref.b[ref.ptr:], src[:cln])
src = src[cln:]
ref.ptr += cln
if ref.ptr == BlockSize {
ref.compress(0)
ref.ptr = 0
ref.cl += 1
if ref.cl == 0 {
ref.ch++
}
}
}
return fln, nil
}
// Close the digest by writing the last bits and storing the hash
// in dst. This prepares the digest for reuse by calling reset. A call
// to Close with a dst that is smaller then HashSize will return an error.
func (ref *digest) Close(dst []byte, bits uint8, bcnt uint8) error {
if ln := len(dst); HashSize > ln {
return fmt.Errorf("Simd Close: dst min length: %d, got %d", HashSize, ln)
}
if ref.ptr > 0 || bcnt > 0 {
memset(ref.b[ref.ptr:], 0)
ref.b[ref.ptr] = uint8(bits & (0xFF << (8 - bcnt)))
ref.compress(0)
}
memset(ref.b[:], 0)
{
low := uint32(ref.cl << 10)
low += uint32(ref.ptr<<3) + uint32(bcnt)
high := uint32(ref.ch<<10) + (ref.cl >> 22)
encUInt32le(ref.b[:], low)
encUInt32le(ref.b[4:], high)
}
ref.compress(1)
for u := int(0); u < 16; u++ {
encUInt32le(dst[(u<<2):], ref.h[u])
}
ref.Reset()
return nil
}
// Size returns the number of bytes required to store the hash.
func (*digest) Size() int {
return HashSize
}
// BlockSize returns the block size of the hash.
func (*digest) BlockSize() int {
return int(BlockSize)
}
////////////////
func memset(dst []byte, src byte) {
for i := range dst {
dst[i] = src
}
}
func decUInt32le(src []byte) uint32 {
return (uint32(src[0]) |
uint32(src[1])<<8 |
uint32(src[2])<<16 |
uint32(src[3])<<24)
}
func encUInt32le(dst []uint8, src uint32) {
dst[0] = uint8(src)
dst[1] = uint8(src >> 8)
dst[2] = uint8(src >> 16)
dst[3] = uint8(src >> 24)
}
func (ref *digest) compress(last int) {
var q [256]int32
var w [64]uint32
var st [32]uint32
mixoutRound(ref.b[:], q[:], 1<<2)
mixoutRound(ref.b[2:], q[64:], 1<<2)
{
var t int32
var u, v uintptr
m := q[0]
n := q[64]
q[0] = m + n
q[64] = m - n
m = q[u+1]
n = q[u+1+64]
t = (n * kAlphaTab[v+1*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+1] = m + t
q[u+1+64] = m - t
m = q[u+2]
n = q[u+2+64]
t = (n * kAlphaTab[v+2*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+2] = m + t
q[u+2+64] = m - t
m = q[u+3]
n = q[u+3+64]
t = (n * kAlphaTab[v+3*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+3] = m + t
q[u+3+64] = m - t
u = 4
v = 4 * 2
for u < 64 {
m = q[u]
n = q[u+(64)]
t = (n * kAlphaTab[v+0*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u] = m + t
q[u+(64)] = m - t
m = q[u+1]
n = q[u+1+64]
t = (n * kAlphaTab[v+1*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+1] = m + t
q[u+1+64] = m - t
m = q[u+2]
n = q[u+2+64]
t = (n * kAlphaTab[v+2*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+2] = m + t
q[u+2+64] = m - t
m = q[u+3]
n = q[u+3+64]
t = (n * kAlphaTab[v+3*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+3] = m + t
q[u+3+64] = m - t
u += 4
v += 4 * 2
}
}
mixoutRound(ref.b[1:], q[128:], 1<<2)
mixoutRound(ref.b[3:], q[192:], 1<<2)
{
var t int32
var u, v uintptr
m := q[128]
n := q[128+64]
q[128] = m + n
q[128+64] = m - n
m = q[128+u+1]
n = q[128+u+1+64]
t = (n * kAlphaTab[v+1*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+1] = m + t
q[128+u+1+64] = m - t
m = q[128+u+2]
n = q[128+u+2+64]
t = (n * kAlphaTab[v+2*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+2] = m + t
q[128+u+2+64] = m - t
m = q[128+u+3]
n = q[128+u+3+64]
t = (n * kAlphaTab[v+3*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+3] = m + t
q[128+u+3+64] = m - t
u = 4
v = 4 * 2
for u < 64 {
m = q[128+u]
n = q[128+u+64]
t = (n * kAlphaTab[v+0*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u] = m + t
q[128+u+64] = m - t
m = q[128+u+1]
n = q[128+u+1+64]
t = (n * kAlphaTab[v+1*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+1] = m + t
q[128+u+1+64] = m - t
m = q[128+u+2]
n = q[128+u+2+64]
t = (n * kAlphaTab[v+2*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+2] = m + t
q[128+u+2+64] = m - t
m = q[128+u+3]
n = q[128+u+3+64]
t = (n * kAlphaTab[v+3*2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[128+u+3] = m + t
q[128+u+3+64] = m - t
u += 4
v += 4 * 2
}
}
{
var t int32
var u, v uintptr
m := q[0]
n := q[128]
q[0] = m + n
q[128] = m - n
m = q[u+1]
n = q[u+1+128]
t = (n * kAlphaTab[v+1])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+1] = m + t
q[u+1+128] = m - t
m = q[u+2]
n = q[u+2+128]
t = (n * kAlphaTab[v+2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+2] = m + t
q[u+2+128] = m - t
m = q[u+3]
n = q[u+3+128]
t = (n * kAlphaTab[v+3])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+3] = m + t
q[u+3+128] = m - t
u = 4
v = 4
for u < 128 {
m = q[u]
n = q[u+128]
t = (n * kAlphaTab[v+0])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u] = m + t
q[u+128] = m - t
m = q[u+1]
n = q[u+1+128]
t = (n * kAlphaTab[v+1])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+1] = m + t
q[u+1+128] = m - t
m = q[u+2]
n = q[u+2+128]
t = (n * kAlphaTab[v+2])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+2] = m + t
q[u+2+128] = m - t
m = q[u+3]
n = q[u+3+128]
t = (n * kAlphaTab[v+3])
t = ((t) & 0xFFFF) + ((t) >> 16)
q[u+3] = m + t
q[u+3+128] = m - t
u += 4
v += 4
}
}
if last == 1 {
var tq int32
for i := uintptr(0); i < 256; i++ {
tq = q[i] + kYOffB[i]
tq = (((tq) & 0xFFFF) + ((tq) >> 16))
tq = (((tq) & 0xFF) - ((tq) >> 8))
tq = (((tq) & 0xFF) - ((tq) >> 8))
if tq <= 128 {
q[i] = tq
} else {
q[i] = tq - 257
}
}
} else {
var tq int32
for i := uintptr(0); i < 256; i++ {
tq = q[i] + kYOffA[i]
tq = (((tq) & 0xFFFF) + ((tq) >> 16))
tq = (((tq) & 0xFF) - ((tq) >> 8))
tq = (((tq) & 0xFF) - ((tq) >> 8))
if tq <= 128 {
q[i] = tq
} else {
q[i] = tq - 257
}
}
}
{
b := ref.b[:]
s := ref.h[:]
for i := uintptr(0); i < 32; i += 8 {
st[i+0] = s[i+0] ^ decUInt32le(b[4*(i+0):])
st[i+1] = s[i+1] ^ decUInt32le(b[4*(i+1):])
st[i+2] = s[i+2] ^ decUInt32le(b[4*(i+2):])
st[i+3] = s[i+3] ^ decUInt32le(b[4*(i+3):])
st[i+4] = s[i+4] ^ decUInt32le(b[4*(i+4):])
st[i+5] = s[i+5] ^ decUInt32le(b[4*(i+5):])
st[i+6] = s[i+6] ^ decUInt32le(b[4*(i+6):])
st[i+7] = s[i+7] ^ decUInt32le(b[4*(i+7):])
}
}
for u := uintptr(0); u < 64; u += 8 {
v := uintptr(wbp[(u >> 3)])
w[u+0] = ((uint32(q[v+2*0]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*0+1]*185) << 16))
w[u+1] = ((uint32(q[v+2*1]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*1+1]*185) << 16))
w[u+2] = ((uint32(q[v+2*2]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*2+1]*185) << 16))
w[u+3] = ((uint32(q[v+2*3]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*3+1]*185) << 16))
w[u+4] = ((uint32(q[v+2*4]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*4+1]*185) << 16))
w[u+5] = ((uint32(q[v+2*5]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*5+1]*185) << 16))
w[u+6] = ((uint32(q[v+2*6]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*6+1]*185) << 16))
w[u+7] = ((uint32(q[v+2*7]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*7+1]*185) << 16))
}
mixinRound(st[:], w[:], 0, 3, 23, 17, 27)
for u := uintptr(0); u < 64; u += 8 {
v := uintptr(wbp[(u>>3)+8])
w[u+0] = (uint32(q[v+2*0]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*0+1]*185) << 16)
w[u+1] = (uint32(q[v+2*1]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*1+1]*185) << 16)
w[u+2] = (uint32(q[v+2*2]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*2+1]*185) << 16)
w[u+3] = (uint32(q[v+2*3]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*3+1]*185) << 16)
w[u+4] = (uint32(q[v+2*4]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*4+1]*185) << 16)
w[u+5] = (uint32(q[v+2*5]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*5+1]*185) << 16)
w[u+6] = (uint32(q[v+2*6]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*6+1]*185) << 16)
w[u+7] = (uint32(q[v+2*7]*185) & uint32(0xFFFF)) +
(uint32(q[v+2*7+1]*185) << 16)
}
mixinRound(st[:], w[:], 1, 28, 19, 22, 7)
for u := uintptr(0); u < 64; u += 8 {
v := uintptr(wbp[(u>>3)+16])
w[u+0] = ((uint32(q[v+2*0-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*0-128])*(233)) << 16))
w[u+1] = ((uint32(q[v+2*1-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*1-128])*(233)) << 16))
w[u+2] = ((uint32(q[v+2*2-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*2-128])*(233)) << 16))
w[u+3] = ((uint32(q[v+2*3-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*3-128])*(233)) << 16))
w[u+4] = ((uint32(q[v+2*4-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*4-128])*(233)) << 16))
w[u+5] = ((uint32(q[v+2*5-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*5-128])*(233)) << 16))
w[u+6] = ((uint32(q[v+2*6-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*6-128])*(233)) << 16))
w[u+7] = ((uint32(q[v+2*7-256]*(233)) & uint32(0xFFFF)) +
(uint32((q[v+2*7-128])*(233)) << 16))
}
mixinRound(st[:], w[:], 2, 29, 9, 15, 5)
for u := uintptr(0); u < 64; u += 8 {
v := uintptr(wbp[(u>>3)+24])
w[u+0] = ((uint32(q[v+2*0-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*0-255]*233) << 16))
w[u+1] = ((uint32(q[v+2*1-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*1-255]*233) << 16))
w[u+2] = ((uint32(q[v+2*2-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*2-255]*233) << 16))
w[u+3] = ((uint32(q[v+2*3-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*3-255]*233) << 16))
w[u+4] = ((uint32(q[v+2*4-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*4-255]*233) << 16))
w[u+5] = ((uint32(q[v+2*5-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*5-255]*233) << 16))
w[u+6] = ((uint32(q[v+2*6-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*6-255]*233) << 16))
w[u+7] = ((uint32(q[v+2*7-383]*233) & uint32(0xFFFF)) +
(uint32(q[v+2*7-255]*233) << 16))
}
mixinRound(st[:], w[:], 3, 4, 13, 10, 25)
{
var tp uint32
var tA [8]uint32
sta := ref.h[:]
tA[0] = ((st[0] << 4) | (st[0] >> (32 - 4)))
tA[1] = ((st[1] << 4) | (st[1] >> (32 - 4)))
tA[2] = ((st[2] << 4) | (st[2] >> (32 - 4)))
tA[3] = ((st[3] << 4) | (st[3] >> (32 - 4)))
tA[4] = ((st[4] << 4) | (st[4] >> (32 - 4)))
tA[5] = ((st[5] << 4) | (st[5] >> (32 - 4)))
tA[6] = ((st[6] << 4) | (st[6] >> (32 - 4)))
tA[7] = ((st[7] << 4) | (st[7] >> (32 - 4)))
tp = uint32(st[kIdxD[0]] + sta[0] +
(((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
st[kIdxA[0]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][0]]
st[kIdxD[0]] = st[kIdxC[0]]
st[kIdxC[0]] = st[kIdxB[0]]
st[kIdxB[0]] = tA[0]
tp = uint32(st[kIdxD[1]] + sta[1] +
(((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
st[kIdxA[1]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][1]]
st[kIdxD[1]] = st[kIdxC[1]]
st[kIdxC[1]] = st[kIdxB[1]]
st[kIdxB[1]] = tA[1]
tp = uint32(st[kIdxD[2]] + sta[2] +
(((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
st[kIdxA[2]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][2]]
st[kIdxD[2]] = st[kIdxC[2]]
st[kIdxC[2]] = st[kIdxB[2]]
st[kIdxB[2]] = tA[2]
tp = uint32(st[kIdxD[3]] + sta[3] +
(((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
st[kIdxA[3]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][3]]
st[kIdxD[3]] = st[kIdxC[3]]
st[kIdxC[3]] = st[kIdxB[3]]
st[kIdxB[3]] = tA[3]
tp = uint32(st[kIdxD[4]] + sta[4] +
(((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
st[kIdxA[4]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][4]]
st[kIdxD[4]] = st[kIdxC[4]]
st[kIdxC[4]] = st[kIdxB[4]]
st[kIdxB[4]] = tA[4]
tp = uint32(st[kIdxD[5]] + sta[5] +
(((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
st[kIdxA[5]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][5]]
st[kIdxD[5]] = st[kIdxC[5]]
st[kIdxC[5]] = st[kIdxB[5]]
st[kIdxB[5]] = tA[5]
tp = uint32(st[kIdxD[6]] + sta[6] +
(((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
st[kIdxA[6]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][6]]
st[kIdxD[6]] = st[kIdxC[6]]
st[kIdxC[6]] = st[kIdxB[6]]
st[kIdxB[6]] = tA[6]
tp = uint32(st[kIdxD[7]] + sta[7] +
(((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
st[kIdxA[7]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][7]]
st[kIdxD[7]] = st[kIdxC[7]]
st[kIdxC[7]] = st[kIdxB[7]]
st[kIdxB[7]] = tA[7]
tA[0] = ((st[0] << 13) | (st[0] >> (32 - 13)))
tA[1] = ((st[1] << 13) | (st[1] >> (32 - 13)))
tA[2] = ((st[2] << 13) | (st[2] >> (32 - 13)))
tA[3] = ((st[3] << 13) | (st[3] >> (32 - 13)))
tA[4] = ((st[4] << 13) | (st[4] >> (32 - 13)))
tA[5] = ((st[5] << 13) | (st[5] >> (32 - 13)))
tA[6] = ((st[6] << 13) | (st[6] >> (32 - 13)))
tA[7] = ((st[7] << 13) | (st[7] >> (32 - 13)))
tp = uint32(st[kIdxD[0]] + sta[8] +
(((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
st[kIdxA[0]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][0]]
st[kIdxD[0]] = st[kIdxC[0]]
st[kIdxC[0]] = st[kIdxB[0]]
st[kIdxB[0]] = tA[0]
tp = uint32(st[kIdxD[1]] + sta[9] +
(((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
st[kIdxA[1]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][1]]
st[kIdxD[1]] = st[kIdxC[1]]
st[kIdxC[1]] = st[kIdxB[1]]
st[kIdxB[1]] = tA[1]
tp = uint32(st[kIdxD[2]] + sta[10] +
(((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
st[kIdxA[2]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][2]]
st[kIdxD[2]] = st[kIdxC[2]]
st[kIdxC[2]] = st[kIdxB[2]]
st[kIdxB[2]] = tA[2]
tp = uint32(st[kIdxD[3]] + sta[11] +
(((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
st[kIdxA[3]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][3]]
st[kIdxD[3]] = st[kIdxC[3]]
st[kIdxC[3]] = st[kIdxB[3]]
st[kIdxB[3]] = tA[3]
tp = uint32(st[kIdxD[4]] + sta[12] +
(((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
st[kIdxA[4]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][4]]
st[kIdxD[4]] = st[kIdxC[4]]
st[kIdxC[4]] = st[kIdxB[4]]
st[kIdxB[4]] = tA[4]
tp = uint32(st[kIdxD[5]] + sta[13] +
(((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
st[kIdxA[5]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][5]]
st[kIdxD[5]] = st[kIdxC[5]]
st[kIdxC[5]] = st[kIdxB[5]]
st[kIdxB[5]] = tA[5]
tp = uint32(st[kIdxD[6]] + sta[14] +
(((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
st[kIdxA[6]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][6]]
st[kIdxD[6]] = st[kIdxC[6]]
st[kIdxC[6]] = st[kIdxB[6]]
st[kIdxB[6]] = tA[6]
tp = uint32(st[kIdxD[7]] + sta[15] +
(((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
st[kIdxA[7]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][7]]
st[kIdxD[7]] = st[kIdxC[7]]
st[kIdxC[7]] = st[kIdxB[7]]
st[kIdxB[7]] = tA[7]
tA[0] = ((st[0] << 10) | (st[0] >> (32 - 10)))
tA[1] = ((st[1] << 10) | (st[1] >> (32 - 10)))
tA[2] = ((st[2] << 10) | (st[2] >> (32 - 10)))
tA[3] = ((st[3] << 10) | (st[3] >> (32 - 10)))
tA[4] = ((st[4] << 10) | (st[4] >> (32 - 10)))
tA[5] = ((st[5] << 10) | (st[5] >> (32 - 10)))
tA[6] = ((st[6] << 10) | (st[6] >> (32 - 10)))
tA[7] = ((st[7] << 10) | (st[7] >> (32 - 10)))
tp = uint32(st[kIdxD[0]] + sta[16] +
(((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
st[kIdxA[0]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][0]]
st[kIdxD[0]] = st[kIdxC[0]]
st[kIdxC[0]] = st[kIdxB[0]]
st[kIdxB[0]] = tA[0]
tp = uint32(st[kIdxD[1]] + sta[17] +
(((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
st[kIdxA[1]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][1]]
st[kIdxD[1]] = st[kIdxC[1]]
st[kIdxC[1]] = st[kIdxB[1]]
st[kIdxB[1]] = tA[1]
tp = uint32(st[kIdxD[2]] + sta[18] +
(((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
st[kIdxA[2]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][2]]
st[kIdxD[2]] = st[kIdxC[2]]
st[kIdxC[2]] = st[kIdxB[2]]
st[kIdxB[2]] = tA[2]
tp = uint32(st[kIdxD[3]] + sta[19] +
(((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
st[kIdxA[3]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][3]]
st[kIdxD[3]] = st[kIdxC[3]]
st[kIdxC[3]] = st[kIdxB[3]]
st[kIdxB[3]] = tA[3]
tp = uint32(st[kIdxD[4]] + sta[20] +
(((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
st[kIdxA[4]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][4]]
st[kIdxD[4]] = st[kIdxC[4]]
st[kIdxC[4]] = st[kIdxB[4]]
st[kIdxB[4]] = tA[4]
tp = uint32(st[kIdxD[5]] + sta[21] +
(((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
st[kIdxA[5]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][5]]
st[kIdxD[5]] = st[kIdxC[5]]
st[kIdxC[5]] = st[kIdxB[5]]
st[kIdxB[5]] = tA[5]
tp = uint32(st[kIdxD[6]] + sta[22] +
(((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
st[kIdxA[6]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][6]]
st[kIdxD[6]] = st[kIdxC[6]]
st[kIdxC[6]] = st[kIdxB[6]]
st[kIdxB[6]] = tA[6]
tp = uint32(st[kIdxD[7]] + sta[23] +
(((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
st[kIdxA[7]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][7]]
st[kIdxD[7]] = st[kIdxC[7]]
st[kIdxC[7]] = st[kIdxB[7]]
st[kIdxB[7]] = tA[7]
tA[0] = ((st[0] << 25) | (st[0] >> (32 - 25)))
tA[1] = ((st[1] << 25) | (st[1] >> (32 - 25)))
tA[2] = ((st[2] << 25) | (st[2] >> (32 - 25)))
tA[3] = ((st[3] << 25) | (st[3] >> (32 - 25)))
tA[4] = ((st[4] << 25) | (st[4] >> (32 - 25)))
tA[5] = ((st[5] << 25) | (st[5] >> (32 - 25)))
tA[6] = ((st[6] << 25) | (st[6] >> (32 - 25)))
tA[7] = ((st[7] << 25) | (st[7] >> (32 - 25)))
tp = uint32(st[kIdxD[0]] + sta[24] +
(((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
st[kIdxA[0]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][0]]
st[kIdxD[0]] = st[kIdxC[0]]
st[kIdxC[0]] = st[kIdxB[0]]
st[kIdxB[0]] = tA[0]
tp = uint32(st[kIdxD[1]] + sta[25] +
(((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
st[kIdxA[1]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][1]]
st[kIdxD[1]] = st[kIdxC[1]]
st[kIdxC[1]] = st[kIdxB[1]]
st[kIdxB[1]] = tA[1]
tp = uint32(st[kIdxD[2]] + sta[26] +
(((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
st[kIdxA[2]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][2]]
st[kIdxD[2]] = st[kIdxC[2]]
st[kIdxC[2]] = st[kIdxB[2]]
st[kIdxB[2]] = tA[2]
tp = uint32(st[kIdxD[3]] + sta[27] +
(((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
st[kIdxA[3]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][3]]
st[kIdxD[3]] = st[kIdxC[3]]
st[kIdxC[3]] = st[kIdxB[3]]
st[kIdxB[3]] = tA[3]
tp = uint32(st[kIdxD[4]] + sta[28] +
(((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
st[kIdxA[4]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][4]]
st[kIdxD[4]] = st[kIdxC[4]]
st[kIdxC[4]] = st[kIdxB[4]]
st[kIdxB[4]] = tA[4]
tp = uint32(st[kIdxD[5]] + sta[29] +
(((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
st[kIdxA[5]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][5]]
st[kIdxD[5]] = st[kIdxC[5]]
st[kIdxC[5]] = st[kIdxB[5]]
st[kIdxB[5]] = tA[5]
tp = uint32(st[kIdxD[6]] + sta[30] +
(((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
st[kIdxA[6]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][6]]
st[kIdxD[6]] = st[kIdxC[6]]
st[kIdxC[6]] = st[kIdxB[6]]
st[kIdxB[6]] = tA[6]
tp = uint32(st[kIdxD[7]] + sta[31] +
(((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
st[kIdxA[7]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][7]]
st[kIdxD[7]] = st[kIdxC[7]]
st[kIdxC[7]] = st[kIdxB[7]]
st[kIdxB[7]] = tA[7]
}
copy(ref.h[:], st[:])
}
func mixoutRound(x []uint8, q []int32, xt uintptr) {
var tx int32
var d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7 int32
var d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7 int32
xd := xt << 1
{
var sa, sb uintptr
var x0, x1, x2, x3 int32
var a0, a1, a2, a3 int32
var b0, b1, b2, b3 int32
sb = xd << 2
x0 = int32(x[0])
x1 = int32(x[sb])
x2 = int32(x[2*sb])
x3 = int32(x[3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d1_0 = a0 + b0
d1_1 = a1 + b1
d1_2 = a2 + b2
d1_3 = a3 + b3
d1_4 = a0 - b0
d1_5 = a1 - b1
d1_6 = a2 - b2
d1_7 = a3 - b3
sa = xd << 1
sb = xd << 2
x0 = int32(x[sa])
x1 = int32(x[sa+sb])
x2 = int32(x[sa+2*sb])
x3 = int32(x[sa+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d2_0 = a0 + b0
d2_1 = a1 + b1
d2_2 = a2 + b2
d2_3 = a3 + b3
d2_4 = a0 - b0
d2_5 = a1 - b1
d2_6 = a2 - b2
d2_7 = a3 - b3
}
q[0] = d1_0 + d2_0
q[1] = d1_1 + (d2_1 << 1)
q[2] = d1_2 + (d2_2 << 2)
q[3] = d1_3 + (d2_3 << 3)
q[4] = d1_4 + (d2_4 << 4)
q[5] = d1_5 + (d2_5 << 5)
q[6] = d1_6 + (d2_6 << 6)
q[7] = d1_7 + (d2_7 << 7)
q[8] = d1_0 - d2_0
q[9] = d1_1 - (d2_1 << 1)
q[10] = d1_2 - (d2_2 << 2)
q[11] = d1_3 - (d2_3 << 3)
q[12] = d1_4 - (d2_4 << 4)
q[13] = d1_5 - (d2_5 << 5)
q[14] = d1_6 - (d2_6 << 6)
q[15] = d1_7 - (d2_7 << 7)
{
var sa, sb uintptr
var x0, x1, x2, x3 int32
var a0, a1, a2, a3 int32
var b0, b1, b2, b3 int32
sb = xd << 2
x0 = int32(x[(xd)])
x1 = int32(x[(xd)+sb])
x2 = int32(x[(xd)+2*sb])
x3 = int32(x[(xd)+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d1_0 = a0 + b0
d1_1 = a1 + b1
d1_2 = a2 + b2
d1_3 = a3 + b3
d1_4 = a0 - b0
d1_5 = a1 - b1
d1_6 = a2 - b2
d1_7 = a3 - b3
sa = xd + (xd << 1)
sb = xd << 2
x0 = int32(x[sa])
x1 = int32(x[sa+sb])
x2 = int32(x[sa+2*sb])
x3 = int32(x[sa+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d2_0 = a0 + b0
d2_1 = a1 + b1
d2_2 = a2 + b2
d2_3 = a3 + b3
d2_4 = a0 - b0
d2_5 = a1 - b1
d2_6 = a2 - b2
d2_7 = a3 - b3
}
q[16+0] = d1_0 + d2_0
q[16+1] = d1_1 + (d2_1 << 1)
q[16+2] = d1_2 + (d2_2 << 2)
q[16+3] = d1_3 + (d2_3 << 3)
q[16+4] = d1_4 + (d2_4 << 4)
q[16+5] = d1_5 + (d2_5 << 5)
q[16+6] = d1_6 + (d2_6 << 6)
q[16+7] = d1_7 + (d2_7 << 7)
q[16+8] = d1_0 - d2_0
q[16+9] = d1_1 - (d2_1 << 1)
q[16+10] = d1_2 - (d2_2 << 2)
q[16+11] = d1_3 - (d2_3 << 3)
q[16+12] = d1_4 - (d2_4 << 4)
q[16+13] = d1_5 - (d2_5 << 5)
q[16+14] = d1_6 - (d2_6 << 6)
q[16+15] = d1_7 - (d2_7 << 7)
{
var u, v uintptr
m := q[0]
n := q[16]
q[0] = m + n
q[16] = m - n
m = q[u+1]
n = q[u+1+16]
tx = (n * kAlphaTab[v+1*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+1] = m + tx
q[u+1+16] = m - tx
m = q[u+2]
n = q[u+2+16]
tx = (n * kAlphaTab[v+2*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+2] = m + tx
q[u+2+16] = m - tx
m = q[u+3]
n = q[u+3+16]
tx = (n * kAlphaTab[v+3*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+3] = m + tx
q[u+3+16] = m - tx
for u < 16 {
u += 4
v += 4 * 8
m = q[u+0]
n = q[u+0+16]
tx = (n * kAlphaTab[v+0*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+0] = m + tx
q[u+0+16] = m - tx
m = q[u+1]
n = q[u+1+16]
tx = (n * kAlphaTab[v+1*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+1] = m + tx
q[u+1+16] = m - tx
m = q[u+2]
n = q[u+2+16]
tx = (n * kAlphaTab[v+2*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+2] = m + tx
q[u+2+16] = m - tx
m = q[u+3]
n = q[u+3+16]
tx = (n * kAlphaTab[v+3*8])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+3] = m + tx
q[u+3+16] = m - tx
}
}
{
var sa, sb uintptr
var x0, x1, x2, x3 int32
var a0, a1, a2, a3 int32
var b0, b1, b2, b3 int32
sb = uintptr(xd << 2)
x0 = int32(x[xt])
x1 = int32(x[xt+sb])
x2 = int32(x[xt+2*sb])
x3 = int32(x[xt+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d1_0 = a0 + b0
d1_1 = a1 + b1
d1_2 = a2 + b2
d1_3 = a3 + b3
d1_4 = a0 - b0
d1_5 = a1 - b1
d1_6 = a2 - b2
d1_7 = a3 - b3
sa = xt + (xd << 1)
sb = xd << 2
x0 = int32(x[sa])
x1 = int32(x[sa+sb])
x2 = int32(x[sa+2*sb])
x3 = int32(x[sa+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d2_0 = a0 + b0
d2_1 = a1 + b1
d2_2 = a2 + b2
d2_3 = a3 + b3
d2_4 = a0 - b0
d2_5 = a1 - b1
d2_6 = a2 - b2
d2_7 = a3 - b3
}
q[32+0] = d1_0 + d2_0
q[32+1] = d1_1 + (d2_1 << 1)
q[32+2] = d1_2 + (d2_2 << 2)
q[32+3] = d1_3 + (d2_3 << 3)
q[32+4] = d1_4 + (d2_4 << 4)
q[32+5] = d1_5 + (d2_5 << 5)
q[32+6] = d1_6 + (d2_6 << 6)
q[32+7] = d1_7 + (d2_7 << 7)
q[32+8] = d1_0 - d2_0
q[32+9] = d1_1 - (d2_1 << 1)
q[32+10] = d1_2 - (d2_2 << 2)
q[32+11] = d1_3 - (d2_3 << 3)
q[32+12] = d1_4 - (d2_4 << 4)
q[32+13] = d1_5 - (d2_5 << 5)
q[32+14] = d1_6 - (d2_6 << 6)
q[32+15] = d1_7 - (d2_7 << 7)
{
var sa, sb uintptr
var x0, x1, x2, x3 int32
var a0, a1, a2, a3 int32
var b0, b1, b2, b3 int32
sa = (xt) + (xd)
sb = xd << 2
x0 = int32(x[sa])
x1 = int32(x[sa+sb])
x2 = int32(x[sa+2*sb])
x3 = int32(x[sa+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d1_0 = a0 + b0
d1_1 = a1 + b1
d1_2 = a2 + b2
d1_3 = a3 + b3
d1_4 = a0 - b0
d1_5 = a1 - b1
d1_6 = a2 - b2
d1_7 = a3 - b3
sa = (xt + xd) + (xd << 1)
sb = xd << 2
x0 = int32(x[sa])
x1 = int32(x[sa+sb])
x2 = int32(x[sa+2*sb])
x3 = int32(x[sa+3*sb])
a0 = x0 + x2
a1 = x0 + (x2 << 4)
a2 = x0 - x2
a3 = x0 - (x2 << 4)
b0 = x1 + x3
tx = ((x1 << 2) + (x3 << 6))
b1 = ((tx & 0xFF) - (tx >> 8))
b2 = (x1 << 4) - (x3 << 4)
tx = (x1 << 6) + (x3 << 2)
b3 = ((tx & 0xFF) - (tx >> 8))
d2_0 = a0 + b0
d2_1 = a1 + b1
d2_2 = a2 + b2
d2_3 = a3 + b3
d2_4 = a0 - b0
d2_5 = a1 - b1
d2_6 = a2 - b2
d2_7 = a3 - b3
}
q[48+0] = d1_0 + d2_0
q[48+1] = d1_1 + (d2_1 << 1)
q[48+2] = d1_2 + (d2_2 << 2)
q[48+3] = d1_3 + (d2_3 << 3)
q[48+4] = d1_4 + (d2_4 << 4)
q[48+5] = d1_5 + (d2_5 << 5)
q[48+6] = d1_6 + (d2_6 << 6)
q[48+7] = d1_7 + (d2_7 << 7)
q[48+8] = d1_0 - d2_0
q[48+9] = d1_1 - (d2_1 << 1)
q[48+10] = d1_2 - (d2_2 << 2)
q[48+11] = d1_3 - (d2_3 << 3)
q[48+12] = d1_4 - (d2_4 << 4)
q[48+13] = d1_5 - (d2_5 << 5)
q[48+14] = d1_6 - (d2_6 << 6)
q[48+15] = d1_7 - (d2_7 << 7)
{
var u, v uintptr
m := q[(32)]
n := q[(32)+(16)]
q[(32)] = m + n
q[(32)+(16)] = m - n
m = q[(32)+u+1]
n = q[(32)+u+1+(16)]
tx = (n * kAlphaTab[v+1*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+1] = m + tx
q[(32)+u+1+(16)] = m - tx
m = q[(32)+u+2]
n = q[(32)+u+2+(16)]
tx = (n * kAlphaTab[v+2*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+2] = m + tx
q[(32)+u+2+(16)] = m - tx
m = q[(32)+u+3]
n = q[(32)+u+3+(16)]
tx = (n * kAlphaTab[v+3*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+3] = m + tx
q[(32)+u+3+(16)] = m - tx
u = 4
v = 4 * (8)
for u < 16 {
m = q[(32)+u]
n = q[(32)+u+(16)]
tx = (n * kAlphaTab[v+0*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+0] = m + tx
q[(32)+u+0+(16)] = m - tx
m = q[(32)+u+1]
n = q[(32)+u+1+(16)]
tx = (n * kAlphaTab[v+1*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+1] = m + tx
q[(32)+u+1+(16)] = m - tx
m = q[(32)+u+2]
n = q[(32)+u+2+(16)]
tx = (n * kAlphaTab[v+2*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+2] = m + tx
q[(32)+u+2+(16)] = m - tx
m = q[(32)+u+3]
n = q[(32)+u+3+(16)]
tx = (n * kAlphaTab[v+3*(8)])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[(32)+u+3] = m + tx
q[(32)+u+3+(16)] = m - tx
u += 4
v += 4 * (8)
}
}
{
var u, v uintptr
m := q[0]
n := q[32]
q[0] = m + n
q[32] = m - n
m = q[u+1]
n = q[u+1+32]
tx = (n * kAlphaTab[v+1*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+1] = m + tx
q[u+1+32] = m - tx
m = q[u+2]
n = q[u+2+32]
tx = (n * kAlphaTab[v+2*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+2] = m + tx
q[u+2+32] = m - tx
m = q[u+3]
n = q[u+3+32]
tx = (n * kAlphaTab[v+3*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+3] = m + tx
q[u+3+32] = m - tx
u = 4
v = 4 * 4
for u < 32 {
m = q[u]
n = q[u+32]
tx = (n * kAlphaTab[v+0*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u] = m + tx
q[u+(32)] = m - tx
m = q[u+1]
n = q[u+1+32]
tx = (n * kAlphaTab[v+1*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+1] = m + tx
q[u+1+32] = m - tx
m = q[u+2]
n = q[u+2+32]
tx = (n * kAlphaTab[v+2*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+2] = m + tx
q[u+2+32] = m - tx
m = q[u+3]
n = q[u+3+32]
tx = (n * kAlphaTab[v+3*4])
tx = ((tx & 0xFFFF) + (tx >> 16))
q[u+3] = m + tx
q[u+3+32] = m - tx
u += 4
v += 4 * 4
}
}
}
func mixinRound(h, w []uint32, isp, p0, p1, p2, p3 uint32) {
var tA [8]uint32
var tp uint32
tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0)))
tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0)))
tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0)))
tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0)))
tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0)))
tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0)))
tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0)))
tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0)))
tp = uint32(h[kIdxD[0]] + w[0] +
(((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[1] +
(((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[2] +
(((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[3] +
(((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[4] +
(((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[5] +
(((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[6] +
(((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[7] +
(((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1)))
tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1)))
tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1)))
tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1)))
tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1)))
tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1)))
tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1)))
tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1)))
tp = uint32(h[kIdxD[0]] + w[8] +
(((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[9] +
(((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[10] +
(((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[11] +
(((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[12] +
(((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[13] +
(((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[14] +
(((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[15] +
(((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2)))
tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2)))
tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2)))
tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2)))
tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2)))
tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2)))
tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2)))
tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2)))
tp = uint32(h[kIdxD[0]] + w[16] +
(((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[17] +
(((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[18] +
(((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[19] +
(((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[20] +
(((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[21] +
(((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[22] +
(((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[23] +
(((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3)))
tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3)))
tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3)))
tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3)))
tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3)))
tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3)))
tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3)))
tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3)))
tp = uint32(h[kIdxD[0]] + w[24] +
(((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[25] +
(((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[26] +
(((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[27] +
(((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[28] +
(((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[29] +
(((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[30] +
(((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[31] +
(((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0)))
tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0)))
tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0)))
tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0)))
tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0)))
tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0)))
tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0)))
tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0)))
tp = uint32(h[kIdxD[0]] + w[32] +
((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[33] +
((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[34] +
((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[35] +
((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[36] +
((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[37] +
((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[38] +
((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[39] +
((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1)))
tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1)))
tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1)))
tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1)))
tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1)))
tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1)))
tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1)))
tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1)))
tp = uint32(h[kIdxD[0]] + w[40] +
((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[41] +
((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[42] +
((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[43] +
((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[44] +
((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[45] +
((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[46] +
((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[47] +
((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2)))
tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2)))
tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2)))
tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2)))
tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2)))
tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2)))
tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2)))
tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2)))
tp = uint32(h[kIdxD[0]] + w[48] +
((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[49] +
((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[50] +
((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[51] +
((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[52] +
((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[53] +
((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[54] +
((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[55] +
((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3)))
tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3)))
tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3)))
tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3)))
tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3)))
tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3)))
tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3)))
tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3)))
tp = uint32(h[kIdxD[0]] + w[56] +
((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]]
h[kIdxD[0]] = h[kIdxC[0]]
h[kIdxC[0]] = h[kIdxB[0]]
h[kIdxB[0]] = tA[0]
tp = uint32(h[kIdxD[1]] + w[57] +
((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^1]
h[kIdxD[1]] = h[kIdxC[1]]
h[kIdxC[1]] = h[kIdxB[1]]
h[kIdxB[1]] = tA[1]
tp = uint32(h[kIdxD[2]] + w[58] +
((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^2]
h[kIdxD[2]] = h[kIdxC[2]]
h[kIdxC[2]] = h[kIdxB[2]]
h[kIdxB[2]] = tA[2]
tp = uint32(h[kIdxD[3]] + w[59] +
((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^3]
h[kIdxD[3]] = h[kIdxC[3]]
h[kIdxC[3]] = h[kIdxB[3]]
h[kIdxB[3]] = tA[3]
tp = uint32(h[kIdxD[4]] + w[60] +
((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^4]
h[kIdxD[4]] = h[kIdxC[4]]
h[kIdxC[4]] = h[kIdxB[4]]
h[kIdxB[4]] = tA[4]
tp = uint32(h[kIdxD[5]] + w[61] +
((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^5]
h[kIdxD[5]] = h[kIdxC[5]]
h[kIdxC[5]] = h[kIdxB[5]]
h[kIdxB[5]] = tA[5]
tp = uint32(h[kIdxD[6]] + w[62] +
((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^6]
h[kIdxD[6]] = h[kIdxC[6]]
h[kIdxC[6]] = h[kIdxB[6]]
h[kIdxB[6]] = tA[6]
tp = uint32(h[kIdxD[7]] + w[63] +
((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^7]
h[kIdxD[7]] = h[kIdxC[7]]
h[kIdxC[7]] = h[kIdxB[7]]
h[kIdxB[7]] = tA[7]
}
////////////////
var kInit = []uint32{
uint32(0x0BA16B95), uint32(0x72F999AD),
uint32(0x9FECC2AE), uint32(0xBA3264FC),
uint32(0x5E894929), uint32(0x8E9F30E5),
uint32(0x2F1DAA37), uint32(0xF0F2C558),
uint32(0xAC506643), uint32(0xA90635A5),
uint32(0xE25B878B), uint32(0xAAB7878F),
uint32(0x88817F7A), uint32(0x0A02892B),
uint32(0x559A7550), uint32(0x598F657E),
uint32(0x7EEF60A1), uint32(0x6B70E3E8),
uint32(0x9C1714D1), uint32(0xB958E2A8),
uint32(0xAB02675E), uint32(0xED1C014F),
uint32(0xCD8D65BB), uint32(0xFDB7A257),
uint32(0x09254899), uint32(0xD699C7BC),
uint32(0x9019B6DC), uint32(0x2B9022E4),
uint32(0x8FA14956), uint32(0x21BF9BD3),
uint32(0xB94D0943), uint32(0x6FFDDC22),
}
var kIdxA = [8]uint8{
0, 1, 2, 3, 4, 5, 6, 7,
}
var kIdxB = [8]uint8{
8, 9, 10, 11, 12, 13, 14, 15,
}
var kIdxC = [8]uint8{
16, 17, 18, 19, 20, 21, 22, 23,
}
var kIdxD = [8]uint8{
24, 25, 26, 27, 28, 29, 30, 31,
}
var kPrems = []uint8{
1, 6, 2, 3, 5, 7, 4, 1, 6, 2, 3,
}
var kPrem = [7][8]uint8{
{1, 0, 3, 2, 5, 4, 7, 6},
{6, 7, 4, 5, 2, 3, 0, 1},
{2, 3, 0, 1, 6, 7, 4, 5},
{3, 2, 1, 0, 7, 6, 5, 4},
{5, 4, 7, 6, 1, 0, 3, 2},
{7, 6, 5, 4, 3, 2, 1, 0},
{4, 5, 6, 7, 0, 1, 2, 3},
}
var wbp = [32]uintptr{
4 << 4, 6 << 4, 0 << 4, 2 << 4,
7 << 4, 5 << 4, 3 << 4, 1 << 4,
15 << 4, 11 << 4, 12 << 4, 8 << 4,
9 << 4, 13 << 4, 10 << 4, 14 << 4,
17 << 4, 18 << 4, 23 << 4, 20 << 4,
22 << 4, 21 << 4, 16 << 4, 19 << 4,
30 << 4, 24 << 4, 25 << 4, 31 << 4,
27 << 4, 29 << 4, 28 << 4, 26 << 4,
}
var kAlphaTab = []int32{
1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
95, 40, 98, 163,
}
var kYOffA = []int32{
1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
46, 45, 139, 41,
}
var kYOffB = []int32{
2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20,
111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89,
49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239,
253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79,
96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226,
248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115,
17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208,
57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40,
189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45,
187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107,
77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210,
139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6,
160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190,
106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208,
181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127,
96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193,
0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44,
245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9,
210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94,
53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185,
10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156,
236, 192, 108, 86,
}