// Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package simd import ( "fmt" "gitlab.com/nitya-sattva/go-x11/hash" ) // HashSize holds the size of a hash in bytes. const HashSize = int(64) // BlockSize holds the size of a block in bytes. const BlockSize = uintptr(128) //////////////// type digest struct { ptr uintptr ch uint32 cl uint32 h [32]uint32 b [BlockSize]byte } // New returns a new digest to compute a SIMD512 hash. func New() hash.Digest { ref := &digest{} ref.Reset() return ref } //////////////// // Reset resets the digest to its initial state. func (ref *digest) Reset() { ref.ptr = 0 ref.cl, ref.ch = 0, 0 copy(ref.h[:], kInit[:]) } // Sum appends the current hash to dst and returns the result // as a slice. It does not change the underlying hash state. func (ref *digest) Sum(dst []byte) []byte { dgt := *ref hsh := [64]byte{} dgt.Close(hsh[:], 0, 0) return append(dst, hsh[:]...) } // Write more data to the running hash, never returns an error. func (ref *digest) Write(src []byte) (int, error) { sln := uintptr(len(src)) fln := len(src) for sln > 0 { cln := BlockSize - ref.ptr if cln > sln { cln = sln } sln -= cln copy(ref.b[ref.ptr:], src[:cln]) src = src[cln:] ref.ptr += cln if ref.ptr == BlockSize { ref.compress(0) ref.ptr = 0 ref.cl += 1 if ref.cl == 0 { ref.ch++ } } } return fln, nil } // Close the digest by writing the last bits and storing the hash // in dst. This prepares the digest for reuse by calling reset. A call // to Close with a dst that is smaller then HashSize will return an error. func (ref *digest) Close(dst []byte, bits uint8, bcnt uint8) error { if ln := len(dst); HashSize > ln { return fmt.Errorf("Simd Close: dst min length: %d, got %d", HashSize, ln) } if ref.ptr > 0 || bcnt > 0 { memset(ref.b[ref.ptr:], 0) ref.b[ref.ptr] = uint8(bits & (0xFF << (8 - bcnt))) ref.compress(0) } memset(ref.b[:], 0) { low := uint32(ref.cl << 10) low += uint32(ref.ptr<<3) + uint32(bcnt) high := uint32(ref.ch<<10) + (ref.cl >> 22) encUInt32le(ref.b[:], low) encUInt32le(ref.b[4:], high) } ref.compress(1) for u := int(0); u < 16; u++ { encUInt32le(dst[(u<<2):], ref.h[u]) } ref.Reset() return nil } // Size returns the number of bytes required to store the hash. func (*digest) Size() int { return HashSize } // BlockSize returns the block size of the hash. func (*digest) BlockSize() int { return int(BlockSize) } //////////////// func memset(dst []byte, src byte) { for i := range dst { dst[i] = src } } func decUInt32le(src []byte) uint32 { return (uint32(src[0]) | uint32(src[1])<<8 | uint32(src[2])<<16 | uint32(src[3])<<24) } func encUInt32le(dst []uint8, src uint32) { dst[0] = uint8(src) dst[1] = uint8(src >> 8) dst[2] = uint8(src >> 16) dst[3] = uint8(src >> 24) } func (ref *digest) compress(last int) { var q [256]int32 var w [64]uint32 var st [32]uint32 mixoutRound(ref.b[:], q[:], 1<<2) mixoutRound(ref.b[2:], q[64:], 1<<2) { var t int32 var u, v uintptr m := q[0] n := q[64] q[0] = m + n q[64] = m - n m = q[u+1] n = q[u+1+64] t = (n * kAlphaTab[v+1*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+1] = m + t q[u+1+64] = m - t m = q[u+2] n = q[u+2+64] t = (n * kAlphaTab[v+2*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+2] = m + t q[u+2+64] = m - t m = q[u+3] n = q[u+3+64] t = (n * kAlphaTab[v+3*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+3] = m + t q[u+3+64] = m - t u = 4 v = 4 * 2 for u < 64 { m = q[u] n = q[u+(64)] t = (n * kAlphaTab[v+0*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u] = m + t q[u+(64)] = m - t m = q[u+1] n = q[u+1+64] t = (n * kAlphaTab[v+1*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+1] = m + t q[u+1+64] = m - t m = q[u+2] n = q[u+2+64] t = (n * kAlphaTab[v+2*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+2] = m + t q[u+2+64] = m - t m = q[u+3] n = q[u+3+64] t = (n * kAlphaTab[v+3*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+3] = m + t q[u+3+64] = m - t u += 4 v += 4 * 2 } } mixoutRound(ref.b[1:], q[128:], 1<<2) mixoutRound(ref.b[3:], q[192:], 1<<2) { var t int32 var u, v uintptr m := q[128] n := q[128+64] q[128] = m + n q[128+64] = m - n m = q[128+u+1] n = q[128+u+1+64] t = (n * kAlphaTab[v+1*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+1] = m + t q[128+u+1+64] = m - t m = q[128+u+2] n = q[128+u+2+64] t = (n * kAlphaTab[v+2*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+2] = m + t q[128+u+2+64] = m - t m = q[128+u+3] n = q[128+u+3+64] t = (n * kAlphaTab[v+3*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+3] = m + t q[128+u+3+64] = m - t u = 4 v = 4 * 2 for u < 64 { m = q[128+u] n = q[128+u+64] t = (n * kAlphaTab[v+0*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u] = m + t q[128+u+64] = m - t m = q[128+u+1] n = q[128+u+1+64] t = (n * kAlphaTab[v+1*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+1] = m + t q[128+u+1+64] = m - t m = q[128+u+2] n = q[128+u+2+64] t = (n * kAlphaTab[v+2*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+2] = m + t q[128+u+2+64] = m - t m = q[128+u+3] n = q[128+u+3+64] t = (n * kAlphaTab[v+3*2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[128+u+3] = m + t q[128+u+3+64] = m - t u += 4 v += 4 * 2 } } { var t int32 var u, v uintptr m := q[0] n := q[128] q[0] = m + n q[128] = m - n m = q[u+1] n = q[u+1+128] t = (n * kAlphaTab[v+1]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+1] = m + t q[u+1+128] = m - t m = q[u+2] n = q[u+2+128] t = (n * kAlphaTab[v+2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+2] = m + t q[u+2+128] = m - t m = q[u+3] n = q[u+3+128] t = (n * kAlphaTab[v+3]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+3] = m + t q[u+3+128] = m - t u = 4 v = 4 for u < 128 { m = q[u] n = q[u+128] t = (n * kAlphaTab[v+0]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u] = m + t q[u+128] = m - t m = q[u+1] n = q[u+1+128] t = (n * kAlphaTab[v+1]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+1] = m + t q[u+1+128] = m - t m = q[u+2] n = q[u+2+128] t = (n * kAlphaTab[v+2]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+2] = m + t q[u+2+128] = m - t m = q[u+3] n = q[u+3+128] t = (n * kAlphaTab[v+3]) t = ((t) & 0xFFFF) + ((t) >> 16) q[u+3] = m + t q[u+3+128] = m - t u += 4 v += 4 } } if last == 1 { var tq int32 for i := uintptr(0); i < 256; i++ { tq = q[i] + kYOffB[i] tq = (((tq) & 0xFFFF) + ((tq) >> 16)) tq = (((tq) & 0xFF) - ((tq) >> 8)) tq = (((tq) & 0xFF) - ((tq) >> 8)) if tq <= 128 { q[i] = tq } else { q[i] = tq - 257 } } } else { var tq int32 for i := uintptr(0); i < 256; i++ { tq = q[i] + kYOffA[i] tq = (((tq) & 0xFFFF) + ((tq) >> 16)) tq = (((tq) & 0xFF) - ((tq) >> 8)) tq = (((tq) & 0xFF) - ((tq) >> 8)) if tq <= 128 { q[i] = tq } else { q[i] = tq - 257 } } } { b := ref.b[:] s := ref.h[:] for i := uintptr(0); i < 32; i += 8 { st[i+0] = s[i+0] ^ decUInt32le(b[4*(i+0):]) st[i+1] = s[i+1] ^ decUInt32le(b[4*(i+1):]) st[i+2] = s[i+2] ^ decUInt32le(b[4*(i+2):]) st[i+3] = s[i+3] ^ decUInt32le(b[4*(i+3):]) st[i+4] = s[i+4] ^ decUInt32le(b[4*(i+4):]) st[i+5] = s[i+5] ^ decUInt32le(b[4*(i+5):]) st[i+6] = s[i+6] ^ decUInt32le(b[4*(i+6):]) st[i+7] = s[i+7] ^ decUInt32le(b[4*(i+7):]) } } for u := uintptr(0); u < 64; u += 8 { v := uintptr(wbp[(u >> 3)]) w[u+0] = ((uint32(q[v+2*0]*185) & uint32(0xFFFF)) + (uint32(q[v+2*0+1]*185) << 16)) w[u+1] = ((uint32(q[v+2*1]*185) & uint32(0xFFFF)) + (uint32(q[v+2*1+1]*185) << 16)) w[u+2] = ((uint32(q[v+2*2]*185) & uint32(0xFFFF)) + (uint32(q[v+2*2+1]*185) << 16)) w[u+3] = ((uint32(q[v+2*3]*185) & uint32(0xFFFF)) + (uint32(q[v+2*3+1]*185) << 16)) w[u+4] = ((uint32(q[v+2*4]*185) & uint32(0xFFFF)) + (uint32(q[v+2*4+1]*185) << 16)) w[u+5] = ((uint32(q[v+2*5]*185) & uint32(0xFFFF)) + (uint32(q[v+2*5+1]*185) << 16)) w[u+6] = ((uint32(q[v+2*6]*185) & uint32(0xFFFF)) + (uint32(q[v+2*6+1]*185) << 16)) w[u+7] = ((uint32(q[v+2*7]*185) & uint32(0xFFFF)) + (uint32(q[v+2*7+1]*185) << 16)) } mixinRound(st[:], w[:], 0, 3, 23, 17, 27) for u := uintptr(0); u < 64; u += 8 { v := uintptr(wbp[(u>>3)+8]) w[u+0] = (uint32(q[v+2*0]*185) & uint32(0xFFFF)) + (uint32(q[v+2*0+1]*185) << 16) w[u+1] = (uint32(q[v+2*1]*185) & uint32(0xFFFF)) + (uint32(q[v+2*1+1]*185) << 16) w[u+2] = (uint32(q[v+2*2]*185) & uint32(0xFFFF)) + (uint32(q[v+2*2+1]*185) << 16) w[u+3] = (uint32(q[v+2*3]*185) & uint32(0xFFFF)) + (uint32(q[v+2*3+1]*185) << 16) w[u+4] = (uint32(q[v+2*4]*185) & uint32(0xFFFF)) + (uint32(q[v+2*4+1]*185) << 16) w[u+5] = (uint32(q[v+2*5]*185) & uint32(0xFFFF)) + (uint32(q[v+2*5+1]*185) << 16) w[u+6] = (uint32(q[v+2*6]*185) & uint32(0xFFFF)) + (uint32(q[v+2*6+1]*185) << 16) w[u+7] = (uint32(q[v+2*7]*185) & uint32(0xFFFF)) + (uint32(q[v+2*7+1]*185) << 16) } mixinRound(st[:], w[:], 1, 28, 19, 22, 7) for u := uintptr(0); u < 64; u += 8 { v := uintptr(wbp[(u>>3)+16]) w[u+0] = ((uint32(q[v+2*0-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*0-128])*(233)) << 16)) w[u+1] = ((uint32(q[v+2*1-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*1-128])*(233)) << 16)) w[u+2] = ((uint32(q[v+2*2-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*2-128])*(233)) << 16)) w[u+3] = ((uint32(q[v+2*3-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*3-128])*(233)) << 16)) w[u+4] = ((uint32(q[v+2*4-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*4-128])*(233)) << 16)) w[u+5] = ((uint32(q[v+2*5-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*5-128])*(233)) << 16)) w[u+6] = ((uint32(q[v+2*6-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*6-128])*(233)) << 16)) w[u+7] = ((uint32(q[v+2*7-256]*(233)) & uint32(0xFFFF)) + (uint32((q[v+2*7-128])*(233)) << 16)) } mixinRound(st[:], w[:], 2, 29, 9, 15, 5) for u := uintptr(0); u < 64; u += 8 { v := uintptr(wbp[(u>>3)+24]) w[u+0] = ((uint32(q[v+2*0-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*0-255]*233) << 16)) w[u+1] = ((uint32(q[v+2*1-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*1-255]*233) << 16)) w[u+2] = ((uint32(q[v+2*2-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*2-255]*233) << 16)) w[u+3] = ((uint32(q[v+2*3-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*3-255]*233) << 16)) w[u+4] = ((uint32(q[v+2*4-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*4-255]*233) << 16)) w[u+5] = ((uint32(q[v+2*5-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*5-255]*233) << 16)) w[u+6] = ((uint32(q[v+2*6-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*6-255]*233) << 16)) w[u+7] = ((uint32(q[v+2*7-383]*233) & uint32(0xFFFF)) + (uint32(q[v+2*7-255]*233) << 16)) } mixinRound(st[:], w[:], 3, 4, 13, 10, 25) { var tp uint32 var tA [8]uint32 sta := ref.h[:] tA[0] = ((st[0] << 4) | (st[0] >> (32 - 4))) tA[1] = ((st[1] << 4) | (st[1] >> (32 - 4))) tA[2] = ((st[2] << 4) | (st[2] >> (32 - 4))) tA[3] = ((st[3] << 4) | (st[3] >> (32 - 4))) tA[4] = ((st[4] << 4) | (st[4] >> (32 - 4))) tA[5] = ((st[5] << 4) | (st[5] >> (32 - 4))) tA[6] = ((st[6] << 4) | (st[6] >> (32 - 4))) tA[7] = ((st[7] << 4) | (st[7] >> (32 - 4))) tp = uint32(st[kIdxD[0]] + sta[0] + (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]])) st[kIdxA[0]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][0]] st[kIdxD[0]] = st[kIdxC[0]] st[kIdxC[0]] = st[kIdxB[0]] st[kIdxB[0]] = tA[0] tp = uint32(st[kIdxD[1]] + sta[1] + (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]])) st[kIdxA[1]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][1]] st[kIdxD[1]] = st[kIdxC[1]] st[kIdxC[1]] = st[kIdxB[1]] st[kIdxB[1]] = tA[1] tp = uint32(st[kIdxD[2]] + sta[2] + (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]])) st[kIdxA[2]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][2]] st[kIdxD[2]] = st[kIdxC[2]] st[kIdxC[2]] = st[kIdxB[2]] st[kIdxB[2]] = tA[2] tp = uint32(st[kIdxD[3]] + sta[3] + (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]])) st[kIdxA[3]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][3]] st[kIdxD[3]] = st[kIdxC[3]] st[kIdxC[3]] = st[kIdxB[3]] st[kIdxB[3]] = tA[3] tp = uint32(st[kIdxD[4]] + sta[4] + (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]])) st[kIdxA[4]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][4]] st[kIdxD[4]] = st[kIdxC[4]] st[kIdxC[4]] = st[kIdxB[4]] st[kIdxB[4]] = tA[4] tp = uint32(st[kIdxD[5]] + sta[5] + (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]])) st[kIdxA[5]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][5]] st[kIdxD[5]] = st[kIdxC[5]] st[kIdxC[5]] = st[kIdxB[5]] st[kIdxB[5]] = tA[5] tp = uint32(st[kIdxD[6]] + sta[6] + (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]])) st[kIdxA[6]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][6]] st[kIdxD[6]] = st[kIdxC[6]] st[kIdxC[6]] = st[kIdxB[6]] st[kIdxB[6]] = tA[6] tp = uint32(st[kIdxD[7]] + sta[7] + (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]])) st[kIdxA[7]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][7]] st[kIdxD[7]] = st[kIdxC[7]] st[kIdxC[7]] = st[kIdxB[7]] st[kIdxB[7]] = tA[7] tA[0] = ((st[0] << 13) | (st[0] >> (32 - 13))) tA[1] = ((st[1] << 13) | (st[1] >> (32 - 13))) tA[2] = ((st[2] << 13) | (st[2] >> (32 - 13))) tA[3] = ((st[3] << 13) | (st[3] >> (32 - 13))) tA[4] = ((st[4] << 13) | (st[4] >> (32 - 13))) tA[5] = ((st[5] << 13) | (st[5] >> (32 - 13))) tA[6] = ((st[6] << 13) | (st[6] >> (32 - 13))) tA[7] = ((st[7] << 13) | (st[7] >> (32 - 13))) tp = uint32(st[kIdxD[0]] + sta[8] + (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]])) st[kIdxA[0]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][0]] st[kIdxD[0]] = st[kIdxC[0]] st[kIdxC[0]] = st[kIdxB[0]] st[kIdxB[0]] = tA[0] tp = uint32(st[kIdxD[1]] + sta[9] + (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]])) st[kIdxA[1]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][1]] st[kIdxD[1]] = st[kIdxC[1]] st[kIdxC[1]] = st[kIdxB[1]] st[kIdxB[1]] = tA[1] tp = uint32(st[kIdxD[2]] + sta[10] + (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]])) st[kIdxA[2]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][2]] st[kIdxD[2]] = st[kIdxC[2]] st[kIdxC[2]] = st[kIdxB[2]] st[kIdxB[2]] = tA[2] tp = uint32(st[kIdxD[3]] + sta[11] + (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]])) st[kIdxA[3]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][3]] st[kIdxD[3]] = st[kIdxC[3]] st[kIdxC[3]] = st[kIdxB[3]] st[kIdxB[3]] = tA[3] tp = uint32(st[kIdxD[4]] + sta[12] + (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]])) st[kIdxA[4]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][4]] st[kIdxD[4]] = st[kIdxC[4]] st[kIdxC[4]] = st[kIdxB[4]] st[kIdxB[4]] = tA[4] tp = uint32(st[kIdxD[5]] + sta[13] + (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]])) st[kIdxA[5]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][5]] st[kIdxD[5]] = st[kIdxC[5]] st[kIdxC[5]] = st[kIdxB[5]] st[kIdxB[5]] = tA[5] tp = uint32(st[kIdxD[6]] + sta[14] + (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]])) st[kIdxA[6]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][6]] st[kIdxD[6]] = st[kIdxC[6]] st[kIdxC[6]] = st[kIdxB[6]] st[kIdxB[6]] = tA[6] tp = uint32(st[kIdxD[7]] + sta[15] + (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]])) st[kIdxA[7]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][7]] st[kIdxD[7]] = st[kIdxC[7]] st[kIdxC[7]] = st[kIdxB[7]] st[kIdxB[7]] = tA[7] tA[0] = ((st[0] << 10) | (st[0] >> (32 - 10))) tA[1] = ((st[1] << 10) | (st[1] >> (32 - 10))) tA[2] = ((st[2] << 10) | (st[2] >> (32 - 10))) tA[3] = ((st[3] << 10) | (st[3] >> (32 - 10))) tA[4] = ((st[4] << 10) | (st[4] >> (32 - 10))) tA[5] = ((st[5] << 10) | (st[5] >> (32 - 10))) tA[6] = ((st[6] << 10) | (st[6] >> (32 - 10))) tA[7] = ((st[7] << 10) | (st[7] >> (32 - 10))) tp = uint32(st[kIdxD[0]] + sta[16] + (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]])) st[kIdxA[0]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][0]] st[kIdxD[0]] = st[kIdxC[0]] st[kIdxC[0]] = st[kIdxB[0]] st[kIdxB[0]] = tA[0] tp = uint32(st[kIdxD[1]] + sta[17] + (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]])) st[kIdxA[1]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][1]] st[kIdxD[1]] = st[kIdxC[1]] st[kIdxC[1]] = st[kIdxB[1]] st[kIdxB[1]] = tA[1] tp = uint32(st[kIdxD[2]] + sta[18] + (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]])) st[kIdxA[2]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][2]] st[kIdxD[2]] = st[kIdxC[2]] st[kIdxC[2]] = st[kIdxB[2]] st[kIdxB[2]] = tA[2] tp = uint32(st[kIdxD[3]] + sta[19] + (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]])) st[kIdxA[3]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][3]] st[kIdxD[3]] = st[kIdxC[3]] st[kIdxC[3]] = st[kIdxB[3]] st[kIdxB[3]] = tA[3] tp = uint32(st[kIdxD[4]] + sta[20] + (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]])) st[kIdxA[4]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][4]] st[kIdxD[4]] = st[kIdxC[4]] st[kIdxC[4]] = st[kIdxB[4]] st[kIdxB[4]] = tA[4] tp = uint32(st[kIdxD[5]] + sta[21] + (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]])) st[kIdxA[5]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][5]] st[kIdxD[5]] = st[kIdxC[5]] st[kIdxC[5]] = st[kIdxB[5]] st[kIdxB[5]] = tA[5] tp = uint32(st[kIdxD[6]] + sta[22] + (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]])) st[kIdxA[6]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][6]] st[kIdxD[6]] = st[kIdxC[6]] st[kIdxC[6]] = st[kIdxB[6]] st[kIdxB[6]] = tA[6] tp = uint32(st[kIdxD[7]] + sta[23] + (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]])) st[kIdxA[7]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][7]] st[kIdxD[7]] = st[kIdxC[7]] st[kIdxC[7]] = st[kIdxB[7]] st[kIdxB[7]] = tA[7] tA[0] = ((st[0] << 25) | (st[0] >> (32 - 25))) tA[1] = ((st[1] << 25) | (st[1] >> (32 - 25))) tA[2] = ((st[2] << 25) | (st[2] >> (32 - 25))) tA[3] = ((st[3] << 25) | (st[3] >> (32 - 25))) tA[4] = ((st[4] << 25) | (st[4] >> (32 - 25))) tA[5] = ((st[5] << 25) | (st[5] >> (32 - 25))) tA[6] = ((st[6] << 25) | (st[6] >> (32 - 25))) tA[7] = ((st[7] << 25) | (st[7] >> (32 - 25))) tp = uint32(st[kIdxD[0]] + sta[24] + (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]])) st[kIdxA[0]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][0]] st[kIdxD[0]] = st[kIdxC[0]] st[kIdxC[0]] = st[kIdxB[0]] st[kIdxB[0]] = tA[0] tp = uint32(st[kIdxD[1]] + sta[25] + (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]])) st[kIdxA[1]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][1]] st[kIdxD[1]] = st[kIdxC[1]] st[kIdxC[1]] = st[kIdxB[1]] st[kIdxB[1]] = tA[1] tp = uint32(st[kIdxD[2]] + sta[26] + (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]])) st[kIdxA[2]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][2]] st[kIdxD[2]] = st[kIdxC[2]] st[kIdxC[2]] = st[kIdxB[2]] st[kIdxB[2]] = tA[2] tp = uint32(st[kIdxD[3]] + sta[27] + (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]])) st[kIdxA[3]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][3]] st[kIdxD[3]] = st[kIdxC[3]] st[kIdxC[3]] = st[kIdxB[3]] st[kIdxB[3]] = tA[3] tp = uint32(st[kIdxD[4]] + sta[28] + (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]])) st[kIdxA[4]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][4]] st[kIdxD[4]] = st[kIdxC[4]] st[kIdxC[4]] = st[kIdxB[4]] st[kIdxB[4]] = tA[4] tp = uint32(st[kIdxD[5]] + sta[29] + (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]])) st[kIdxA[5]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][5]] st[kIdxD[5]] = st[kIdxC[5]] st[kIdxC[5]] = st[kIdxB[5]] st[kIdxB[5]] = tA[5] tp = uint32(st[kIdxD[6]] + sta[30] + (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]])) st[kIdxA[6]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][6]] st[kIdxD[6]] = st[kIdxC[6]] st[kIdxC[6]] = st[kIdxB[6]] st[kIdxB[6]] = tA[6] tp = uint32(st[kIdxD[7]] + sta[31] + (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]])) st[kIdxA[7]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][7]] st[kIdxD[7]] = st[kIdxC[7]] st[kIdxC[7]] = st[kIdxB[7]] st[kIdxB[7]] = tA[7] } copy(ref.h[:], st[:]) } func mixoutRound(x []uint8, q []int32, xt uintptr) { var tx int32 var d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7 int32 var d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7 int32 xd := xt << 1 { var sa, sb uintptr var x0, x1, x2, x3 int32 var a0, a1, a2, a3 int32 var b0, b1, b2, b3 int32 sb = xd << 2 x0 = int32(x[0]) x1 = int32(x[sb]) x2 = int32(x[2*sb]) x3 = int32(x[3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d1_0 = a0 + b0 d1_1 = a1 + b1 d1_2 = a2 + b2 d1_3 = a3 + b3 d1_4 = a0 - b0 d1_5 = a1 - b1 d1_6 = a2 - b2 d1_7 = a3 - b3 sa = xd << 1 sb = xd << 2 x0 = int32(x[sa]) x1 = int32(x[sa+sb]) x2 = int32(x[sa+2*sb]) x3 = int32(x[sa+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d2_0 = a0 + b0 d2_1 = a1 + b1 d2_2 = a2 + b2 d2_3 = a3 + b3 d2_4 = a0 - b0 d2_5 = a1 - b1 d2_6 = a2 - b2 d2_7 = a3 - b3 } q[0] = d1_0 + d2_0 q[1] = d1_1 + (d2_1 << 1) q[2] = d1_2 + (d2_2 << 2) q[3] = d1_3 + (d2_3 << 3) q[4] = d1_4 + (d2_4 << 4) q[5] = d1_5 + (d2_5 << 5) q[6] = d1_6 + (d2_6 << 6) q[7] = d1_7 + (d2_7 << 7) q[8] = d1_0 - d2_0 q[9] = d1_1 - (d2_1 << 1) q[10] = d1_2 - (d2_2 << 2) q[11] = d1_3 - (d2_3 << 3) q[12] = d1_4 - (d2_4 << 4) q[13] = d1_5 - (d2_5 << 5) q[14] = d1_6 - (d2_6 << 6) q[15] = d1_7 - (d2_7 << 7) { var sa, sb uintptr var x0, x1, x2, x3 int32 var a0, a1, a2, a3 int32 var b0, b1, b2, b3 int32 sb = xd << 2 x0 = int32(x[(xd)]) x1 = int32(x[(xd)+sb]) x2 = int32(x[(xd)+2*sb]) x3 = int32(x[(xd)+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d1_0 = a0 + b0 d1_1 = a1 + b1 d1_2 = a2 + b2 d1_3 = a3 + b3 d1_4 = a0 - b0 d1_5 = a1 - b1 d1_6 = a2 - b2 d1_7 = a3 - b3 sa = xd + (xd << 1) sb = xd << 2 x0 = int32(x[sa]) x1 = int32(x[sa+sb]) x2 = int32(x[sa+2*sb]) x3 = int32(x[sa+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d2_0 = a0 + b0 d2_1 = a1 + b1 d2_2 = a2 + b2 d2_3 = a3 + b3 d2_4 = a0 - b0 d2_5 = a1 - b1 d2_6 = a2 - b2 d2_7 = a3 - b3 } q[16+0] = d1_0 + d2_0 q[16+1] = d1_1 + (d2_1 << 1) q[16+2] = d1_2 + (d2_2 << 2) q[16+3] = d1_3 + (d2_3 << 3) q[16+4] = d1_4 + (d2_4 << 4) q[16+5] = d1_5 + (d2_5 << 5) q[16+6] = d1_6 + (d2_6 << 6) q[16+7] = d1_7 + (d2_7 << 7) q[16+8] = d1_0 - d2_0 q[16+9] = d1_1 - (d2_1 << 1) q[16+10] = d1_2 - (d2_2 << 2) q[16+11] = d1_3 - (d2_3 << 3) q[16+12] = d1_4 - (d2_4 << 4) q[16+13] = d1_5 - (d2_5 << 5) q[16+14] = d1_6 - (d2_6 << 6) q[16+15] = d1_7 - (d2_7 << 7) { var u, v uintptr m := q[0] n := q[16] q[0] = m + n q[16] = m - n m = q[u+1] n = q[u+1+16] tx = (n * kAlphaTab[v+1*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+1] = m + tx q[u+1+16] = m - tx m = q[u+2] n = q[u+2+16] tx = (n * kAlphaTab[v+2*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+2] = m + tx q[u+2+16] = m - tx m = q[u+3] n = q[u+3+16] tx = (n * kAlphaTab[v+3*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+3] = m + tx q[u+3+16] = m - tx for u < 16 { u += 4 v += 4 * 8 m = q[u+0] n = q[u+0+16] tx = (n * kAlphaTab[v+0*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+0] = m + tx q[u+0+16] = m - tx m = q[u+1] n = q[u+1+16] tx = (n * kAlphaTab[v+1*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+1] = m + tx q[u+1+16] = m - tx m = q[u+2] n = q[u+2+16] tx = (n * kAlphaTab[v+2*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+2] = m + tx q[u+2+16] = m - tx m = q[u+3] n = q[u+3+16] tx = (n * kAlphaTab[v+3*8]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+3] = m + tx q[u+3+16] = m - tx } } { var sa, sb uintptr var x0, x1, x2, x3 int32 var a0, a1, a2, a3 int32 var b0, b1, b2, b3 int32 sb = uintptr(xd << 2) x0 = int32(x[xt]) x1 = int32(x[xt+sb]) x2 = int32(x[xt+2*sb]) x3 = int32(x[xt+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d1_0 = a0 + b0 d1_1 = a1 + b1 d1_2 = a2 + b2 d1_3 = a3 + b3 d1_4 = a0 - b0 d1_5 = a1 - b1 d1_6 = a2 - b2 d1_7 = a3 - b3 sa = xt + (xd << 1) sb = xd << 2 x0 = int32(x[sa]) x1 = int32(x[sa+sb]) x2 = int32(x[sa+2*sb]) x3 = int32(x[sa+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d2_0 = a0 + b0 d2_1 = a1 + b1 d2_2 = a2 + b2 d2_3 = a3 + b3 d2_4 = a0 - b0 d2_5 = a1 - b1 d2_6 = a2 - b2 d2_7 = a3 - b3 } q[32+0] = d1_0 + d2_0 q[32+1] = d1_1 + (d2_1 << 1) q[32+2] = d1_2 + (d2_2 << 2) q[32+3] = d1_3 + (d2_3 << 3) q[32+4] = d1_4 + (d2_4 << 4) q[32+5] = d1_5 + (d2_5 << 5) q[32+6] = d1_6 + (d2_6 << 6) q[32+7] = d1_7 + (d2_7 << 7) q[32+8] = d1_0 - d2_0 q[32+9] = d1_1 - (d2_1 << 1) q[32+10] = d1_2 - (d2_2 << 2) q[32+11] = d1_3 - (d2_3 << 3) q[32+12] = d1_4 - (d2_4 << 4) q[32+13] = d1_5 - (d2_5 << 5) q[32+14] = d1_6 - (d2_6 << 6) q[32+15] = d1_7 - (d2_7 << 7) { var sa, sb uintptr var x0, x1, x2, x3 int32 var a0, a1, a2, a3 int32 var b0, b1, b2, b3 int32 sa = (xt) + (xd) sb = xd << 2 x0 = int32(x[sa]) x1 = int32(x[sa+sb]) x2 = int32(x[sa+2*sb]) x3 = int32(x[sa+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d1_0 = a0 + b0 d1_1 = a1 + b1 d1_2 = a2 + b2 d1_3 = a3 + b3 d1_4 = a0 - b0 d1_5 = a1 - b1 d1_6 = a2 - b2 d1_7 = a3 - b3 sa = (xt + xd) + (xd << 1) sb = xd << 2 x0 = int32(x[sa]) x1 = int32(x[sa+sb]) x2 = int32(x[sa+2*sb]) x3 = int32(x[sa+3*sb]) a0 = x0 + x2 a1 = x0 + (x2 << 4) a2 = x0 - x2 a3 = x0 - (x2 << 4) b0 = x1 + x3 tx = ((x1 << 2) + (x3 << 6)) b1 = ((tx & 0xFF) - (tx >> 8)) b2 = (x1 << 4) - (x3 << 4) tx = (x1 << 6) + (x3 << 2) b3 = ((tx & 0xFF) - (tx >> 8)) d2_0 = a0 + b0 d2_1 = a1 + b1 d2_2 = a2 + b2 d2_3 = a3 + b3 d2_4 = a0 - b0 d2_5 = a1 - b1 d2_6 = a2 - b2 d2_7 = a3 - b3 } q[48+0] = d1_0 + d2_0 q[48+1] = d1_1 + (d2_1 << 1) q[48+2] = d1_2 + (d2_2 << 2) q[48+3] = d1_3 + (d2_3 << 3) q[48+4] = d1_4 + (d2_4 << 4) q[48+5] = d1_5 + (d2_5 << 5) q[48+6] = d1_6 + (d2_6 << 6) q[48+7] = d1_7 + (d2_7 << 7) q[48+8] = d1_0 - d2_0 q[48+9] = d1_1 - (d2_1 << 1) q[48+10] = d1_2 - (d2_2 << 2) q[48+11] = d1_3 - (d2_3 << 3) q[48+12] = d1_4 - (d2_4 << 4) q[48+13] = d1_5 - (d2_5 << 5) q[48+14] = d1_6 - (d2_6 << 6) q[48+15] = d1_7 - (d2_7 << 7) { var u, v uintptr m := q[(32)] n := q[(32)+(16)] q[(32)] = m + n q[(32)+(16)] = m - n m = q[(32)+u+1] n = q[(32)+u+1+(16)] tx = (n * kAlphaTab[v+1*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+1] = m + tx q[(32)+u+1+(16)] = m - tx m = q[(32)+u+2] n = q[(32)+u+2+(16)] tx = (n * kAlphaTab[v+2*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+2] = m + tx q[(32)+u+2+(16)] = m - tx m = q[(32)+u+3] n = q[(32)+u+3+(16)] tx = (n * kAlphaTab[v+3*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+3] = m + tx q[(32)+u+3+(16)] = m - tx u = 4 v = 4 * (8) for u < 16 { m = q[(32)+u] n = q[(32)+u+(16)] tx = (n * kAlphaTab[v+0*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+0] = m + tx q[(32)+u+0+(16)] = m - tx m = q[(32)+u+1] n = q[(32)+u+1+(16)] tx = (n * kAlphaTab[v+1*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+1] = m + tx q[(32)+u+1+(16)] = m - tx m = q[(32)+u+2] n = q[(32)+u+2+(16)] tx = (n * kAlphaTab[v+2*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+2] = m + tx q[(32)+u+2+(16)] = m - tx m = q[(32)+u+3] n = q[(32)+u+3+(16)] tx = (n * kAlphaTab[v+3*(8)]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[(32)+u+3] = m + tx q[(32)+u+3+(16)] = m - tx u += 4 v += 4 * (8) } } { var u, v uintptr m := q[0] n := q[32] q[0] = m + n q[32] = m - n m = q[u+1] n = q[u+1+32] tx = (n * kAlphaTab[v+1*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+1] = m + tx q[u+1+32] = m - tx m = q[u+2] n = q[u+2+32] tx = (n * kAlphaTab[v+2*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+2] = m + tx q[u+2+32] = m - tx m = q[u+3] n = q[u+3+32] tx = (n * kAlphaTab[v+3*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+3] = m + tx q[u+3+32] = m - tx u = 4 v = 4 * 4 for u < 32 { m = q[u] n = q[u+32] tx = (n * kAlphaTab[v+0*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u] = m + tx q[u+(32)] = m - tx m = q[u+1] n = q[u+1+32] tx = (n * kAlphaTab[v+1*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+1] = m + tx q[u+1+32] = m - tx m = q[u+2] n = q[u+2+32] tx = (n * kAlphaTab[v+2*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+2] = m + tx q[u+2+32] = m - tx m = q[u+3] n = q[u+3+32] tx = (n * kAlphaTab[v+3*4]) tx = ((tx & 0xFFFF) + (tx >> 16)) q[u+3] = m + tx q[u+3+32] = m - tx u += 4 v += 4 * 4 } } } func mixinRound(h, w []uint32, isp, p0, p1, p2, p3 uint32) { var tA [8]uint32 var tp uint32 tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0))) tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0))) tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0))) tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0))) tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0))) tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0))) tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0))) tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0))) tp = uint32(h[kIdxD[0]] + w[0] + (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]])) h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[1] + (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]])) h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[2] + (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]])) h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[3] + (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]])) h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[4] + (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]])) h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[5] + (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]])) h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[6] + (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]])) h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[7] + (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]])) h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1))) tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1))) tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1))) tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1))) tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1))) tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1))) tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1))) tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1))) tp = uint32(h[kIdxD[0]] + w[8] + (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]])) h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[9] + (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]])) h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[10] + (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]])) h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[11] + (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]])) h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[12] + (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]])) h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[13] + (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]])) h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[14] + (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]])) h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[15] + (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]])) h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2))) tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2))) tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2))) tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2))) tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2))) tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2))) tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2))) tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2))) tp = uint32(h[kIdxD[0]] + w[16] + (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]])) h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[17] + (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]])) h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[18] + (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]])) h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[19] + (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]])) h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[20] + (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]])) h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[21] + (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]])) h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[22] + (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]])) h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[23] + (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]])) h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3))) tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3))) tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3))) tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3))) tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3))) tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3))) tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3))) tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3))) tp = uint32(h[kIdxD[0]] + w[24] + (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]])) h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[25] + (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]])) h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[26] + (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]])) h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[27] + (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]])) h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[28] + (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]])) h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[29] + (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]])) h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[30] + (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]])) h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[31] + (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]])) h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0))) tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0))) tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0))) tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0))) tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0))) tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0))) tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0))) tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0))) tp = uint32(h[kIdxD[0]] + w[32] + ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]]))) h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[33] + ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]]))) h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[34] + ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]]))) h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[35] + ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]]))) h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[36] + ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]]))) h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[37] + ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]]))) h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[38] + ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]]))) h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[39] + ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]]))) h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1))) tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1))) tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1))) tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1))) tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1))) tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1))) tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1))) tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1))) tp = uint32(h[kIdxD[0]] + w[40] + ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]]))) h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[41] + ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]]))) h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[42] + ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]]))) h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[43] + ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]]))) h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[44] + ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]]))) h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[45] + ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]]))) h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[46] + ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]]))) h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[47] + ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]]))) h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2))) tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2))) tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2))) tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2))) tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2))) tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2))) tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2))) tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2))) tp = uint32(h[kIdxD[0]] + w[48] + ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]]))) h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[49] + ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]]))) h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[50] + ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]]))) h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[51] + ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]]))) h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[52] + ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]]))) h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[53] + ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]]))) h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[54] + ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]]))) h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[55] + ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]]))) h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3))) tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3))) tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3))) tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3))) tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3))) tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3))) tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3))) tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3))) tp = uint32(h[kIdxD[0]] + w[56] + ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]]))) h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]] h[kIdxD[0]] = h[kIdxC[0]] h[kIdxC[0]] = h[kIdxB[0]] h[kIdxB[0]] = tA[0] tp = uint32(h[kIdxD[1]] + w[57] + ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]]))) h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^1] h[kIdxD[1]] = h[kIdxC[1]] h[kIdxC[1]] = h[kIdxB[1]] h[kIdxB[1]] = tA[1] tp = uint32(h[kIdxD[2]] + w[58] + ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]]))) h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^2] h[kIdxD[2]] = h[kIdxC[2]] h[kIdxC[2]] = h[kIdxB[2]] h[kIdxB[2]] = tA[2] tp = uint32(h[kIdxD[3]] + w[59] + ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]]))) h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^3] h[kIdxD[3]] = h[kIdxC[3]] h[kIdxC[3]] = h[kIdxB[3]] h[kIdxB[3]] = tA[3] tp = uint32(h[kIdxD[4]] + w[60] + ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]]))) h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^4] h[kIdxD[4]] = h[kIdxC[4]] h[kIdxC[4]] = h[kIdxB[4]] h[kIdxB[4]] = tA[4] tp = uint32(h[kIdxD[5]] + w[61] + ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]]))) h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^5] h[kIdxD[5]] = h[kIdxC[5]] h[kIdxC[5]] = h[kIdxB[5]] h[kIdxB[5]] = tA[5] tp = uint32(h[kIdxD[6]] + w[62] + ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]]))) h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^6] h[kIdxD[6]] = h[kIdxC[6]] h[kIdxC[6]] = h[kIdxB[6]] h[kIdxB[6]] = tA[6] tp = uint32(h[kIdxD[7]] + w[63] + ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]]))) h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^7] h[kIdxD[7]] = h[kIdxC[7]] h[kIdxC[7]] = h[kIdxB[7]] h[kIdxB[7]] = tA[7] } //////////////// var kInit = []uint32{ uint32(0x0BA16B95), uint32(0x72F999AD), uint32(0x9FECC2AE), uint32(0xBA3264FC), uint32(0x5E894929), uint32(0x8E9F30E5), uint32(0x2F1DAA37), uint32(0xF0F2C558), uint32(0xAC506643), uint32(0xA90635A5), uint32(0xE25B878B), uint32(0xAAB7878F), uint32(0x88817F7A), uint32(0x0A02892B), uint32(0x559A7550), uint32(0x598F657E), uint32(0x7EEF60A1), uint32(0x6B70E3E8), uint32(0x9C1714D1), uint32(0xB958E2A8), uint32(0xAB02675E), uint32(0xED1C014F), uint32(0xCD8D65BB), uint32(0xFDB7A257), uint32(0x09254899), uint32(0xD699C7BC), uint32(0x9019B6DC), uint32(0x2B9022E4), uint32(0x8FA14956), uint32(0x21BF9BD3), uint32(0xB94D0943), uint32(0x6FFDDC22), } var kIdxA = [8]uint8{ 0, 1, 2, 3, 4, 5, 6, 7, } var kIdxB = [8]uint8{ 8, 9, 10, 11, 12, 13, 14, 15, } var kIdxC = [8]uint8{ 16, 17, 18, 19, 20, 21, 22, 23, } var kIdxD = [8]uint8{ 24, 25, 26, 27, 28, 29, 30, 31, } var kPrems = []uint8{ 1, 6, 2, 3, 5, 7, 4, 1, 6, 2, 3, } var kPrem = [7][8]uint8{ {1, 0, 3, 2, 5, 4, 7, 6}, {6, 7, 4, 5, 2, 3, 0, 1}, {2, 3, 0, 1, 6, 7, 4, 5}, {3, 2, 1, 0, 7, 6, 5, 4}, {5, 4, 7, 6, 1, 0, 3, 2}, {7, 6, 5, 4, 3, 2, 1, 0}, {4, 5, 6, 7, 0, 1, 2, 3}, } var wbp = [32]uintptr{ 4 << 4, 6 << 4, 0 << 4, 2 << 4, 7 << 4, 5 << 4, 3 << 4, 1 << 4, 15 << 4, 11 << 4, 12 << 4, 8 << 4, 9 << 4, 13 << 4, 10 << 4, 14 << 4, 17 << 4, 18 << 4, 23 << 4, 20 << 4, 22 << 4, 21 << 4, 16 << 4, 19 << 4, 30 << 4, 24 << 4, 25 << 4, 31 << 4, 27 << 4, 29 << 4, 28 << 4, 26 << 4, } var kAlphaTab = []int32{ 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130, 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28, 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180, 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19, 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12, 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224, 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155, 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152, 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96, 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250, 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212, 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188, 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254, 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201, 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154, 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219, 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233, 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66, 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204, 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210, 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65, 95, 40, 98, 163, } var kYOffA = []int32{ 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172, 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101, 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10, 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230, 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150, 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109, 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194, 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93, 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83, 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110, 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217, 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108, 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171, 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78, 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252, 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142, 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182, 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74, 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160, 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82, 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87, 46, 45, 139, 41, } var kYOffB = []int32{ 2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20, 111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89, 49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239, 253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79, 96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226, 248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115, 17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208, 57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40, 189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45, 187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107, 77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210, 139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6, 160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190, 106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208, 181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127, 96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193, 0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44, 245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9, 210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94, 53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185, 10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156, 236, 192, 108, 86, }