You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1920 lines
54 KiB

  1. // Use of this source code is governed by an ISC
  2. // license that can be found in the LICENSE file.
  3. package simd
  4. import (
  5. "fmt"
  6. "gitlab.com/nitya-sattva/go-x11/hash"
  7. )
  8. // HashSize holds the size of a hash in bytes.
  9. const HashSize = int(64)
  10. // BlockSize holds the size of a block in bytes.
  11. const BlockSize = uintptr(128)
  12. ////////////////
  13. type digest struct {
  14. ptr uintptr
  15. ch uint32
  16. cl uint32
  17. h [32]uint32
  18. b [BlockSize]byte
  19. }
  20. // New returns a new digest to compute a SIMD512 hash.
  21. func New() hash.Digest {
  22. ref := &digest{}
  23. ref.Reset()
  24. return ref
  25. }
  26. ////////////////
  27. // Reset resets the digest to its initial state.
  28. func (ref *digest) Reset() {
  29. ref.ptr = 0
  30. ref.cl, ref.ch = 0, 0
  31. copy(ref.h[:], kInit[:])
  32. }
  33. // Sum appends the current hash to dst and returns the result
  34. // as a slice. It does not change the underlying hash state.
  35. func (ref *digest) Sum(dst []byte) []byte {
  36. dgt := *ref
  37. hsh := [64]byte{}
  38. dgt.Close(hsh[:], 0, 0)
  39. return append(dst, hsh[:]...)
  40. }
  41. // Write more data to the running hash, never returns an error.
  42. func (ref *digest) Write(src []byte) (int, error) {
  43. sln := uintptr(len(src))
  44. fln := len(src)
  45. for sln > 0 {
  46. cln := BlockSize - ref.ptr
  47. if cln > sln {
  48. cln = sln
  49. }
  50. sln -= cln
  51. copy(ref.b[ref.ptr:], src[:cln])
  52. src = src[cln:]
  53. ref.ptr += cln
  54. if ref.ptr == BlockSize {
  55. ref.compress(0)
  56. ref.ptr = 0
  57. ref.cl += 1
  58. if ref.cl == 0 {
  59. ref.ch++
  60. }
  61. }
  62. }
  63. return fln, nil
  64. }
  65. // Close the digest by writing the last bits and storing the hash
  66. // in dst. This prepares the digest for reuse by calling reset. A call
  67. // to Close with a dst that is smaller then HashSize will return an error.
  68. func (ref *digest) Close(dst []byte, bits uint8, bcnt uint8) error {
  69. if ln := len(dst); HashSize > ln {
  70. return fmt.Errorf("Simd Close: dst min length: %d, got %d", HashSize, ln)
  71. }
  72. if ref.ptr > 0 || bcnt > 0 {
  73. memset(ref.b[ref.ptr:], 0)
  74. ref.b[ref.ptr] = uint8(bits & (0xFF << (8 - bcnt)))
  75. ref.compress(0)
  76. }
  77. memset(ref.b[:], 0)
  78. {
  79. low := uint32(ref.cl << 10)
  80. low += uint32(ref.ptr<<3) + uint32(bcnt)
  81. high := uint32(ref.ch<<10) + (ref.cl >> 22)
  82. encUInt32le(ref.b[:], low)
  83. encUInt32le(ref.b[4:], high)
  84. }
  85. ref.compress(1)
  86. for u := int(0); u < 16; u++ {
  87. encUInt32le(dst[(u<<2):], ref.h[u])
  88. }
  89. ref.Reset()
  90. return nil
  91. }
  92. // Size returns the number of bytes required to store the hash.
  93. func (*digest) Size() int {
  94. return HashSize
  95. }
  96. // BlockSize returns the block size of the hash.
  97. func (*digest) BlockSize() int {
  98. return int(BlockSize)
  99. }
  100. ////////////////
  101. func memset(dst []byte, src byte) {
  102. for i := range dst {
  103. dst[i] = src
  104. }
  105. }
  106. func decUInt32le(src []byte) uint32 {
  107. return (uint32(src[0]) |
  108. uint32(src[1])<<8 |
  109. uint32(src[2])<<16 |
  110. uint32(src[3])<<24)
  111. }
  112. func encUInt32le(dst []uint8, src uint32) {
  113. dst[0] = uint8(src)
  114. dst[1] = uint8(src >> 8)
  115. dst[2] = uint8(src >> 16)
  116. dst[3] = uint8(src >> 24)
  117. }
  118. func (ref *digest) compress(last int) {
  119. var q [256]int32
  120. var w [64]uint32
  121. var st [32]uint32
  122. mixoutRound(ref.b[:], q[:], 1<<2)
  123. mixoutRound(ref.b[2:], q[64:], 1<<2)
  124. {
  125. var t int32
  126. var u, v uintptr
  127. m := q[0]
  128. n := q[64]
  129. q[0] = m + n
  130. q[64] = m - n
  131. m = q[u+1]
  132. n = q[u+1+64]
  133. t = (n * kAlphaTab[v+1*2])
  134. t = ((t) & 0xFFFF) + ((t) >> 16)
  135. q[u+1] = m + t
  136. q[u+1+64] = m - t
  137. m = q[u+2]
  138. n = q[u+2+64]
  139. t = (n * kAlphaTab[v+2*2])
  140. t = ((t) & 0xFFFF) + ((t) >> 16)
  141. q[u+2] = m + t
  142. q[u+2+64] = m - t
  143. m = q[u+3]
  144. n = q[u+3+64]
  145. t = (n * kAlphaTab[v+3*2])
  146. t = ((t) & 0xFFFF) + ((t) >> 16)
  147. q[u+3] = m + t
  148. q[u+3+64] = m - t
  149. u = 4
  150. v = 4 * 2
  151. for u < 64 {
  152. m = q[u]
  153. n = q[u+(64)]
  154. t = (n * kAlphaTab[v+0*2])
  155. t = ((t) & 0xFFFF) + ((t) >> 16)
  156. q[u] = m + t
  157. q[u+(64)] = m - t
  158. m = q[u+1]
  159. n = q[u+1+64]
  160. t = (n * kAlphaTab[v+1*2])
  161. t = ((t) & 0xFFFF) + ((t) >> 16)
  162. q[u+1] = m + t
  163. q[u+1+64] = m - t
  164. m = q[u+2]
  165. n = q[u+2+64]
  166. t = (n * kAlphaTab[v+2*2])
  167. t = ((t) & 0xFFFF) + ((t) >> 16)
  168. q[u+2] = m + t
  169. q[u+2+64] = m - t
  170. m = q[u+3]
  171. n = q[u+3+64]
  172. t = (n * kAlphaTab[v+3*2])
  173. t = ((t) & 0xFFFF) + ((t) >> 16)
  174. q[u+3] = m + t
  175. q[u+3+64] = m - t
  176. u += 4
  177. v += 4 * 2
  178. }
  179. }
  180. mixoutRound(ref.b[1:], q[128:], 1<<2)
  181. mixoutRound(ref.b[3:], q[192:], 1<<2)
  182. {
  183. var t int32
  184. var u, v uintptr
  185. m := q[128]
  186. n := q[128+64]
  187. q[128] = m + n
  188. q[128+64] = m - n
  189. m = q[128+u+1]
  190. n = q[128+u+1+64]
  191. t = (n * kAlphaTab[v+1*2])
  192. t = ((t) & 0xFFFF) + ((t) >> 16)
  193. q[128+u+1] = m + t
  194. q[128+u+1+64] = m - t
  195. m = q[128+u+2]
  196. n = q[128+u+2+64]
  197. t = (n * kAlphaTab[v+2*2])
  198. t = ((t) & 0xFFFF) + ((t) >> 16)
  199. q[128+u+2] = m + t
  200. q[128+u+2+64] = m - t
  201. m = q[128+u+3]
  202. n = q[128+u+3+64]
  203. t = (n * kAlphaTab[v+3*2])
  204. t = ((t) & 0xFFFF) + ((t) >> 16)
  205. q[128+u+3] = m + t
  206. q[128+u+3+64] = m - t
  207. u = 4
  208. v = 4 * 2
  209. for u < 64 {
  210. m = q[128+u]
  211. n = q[128+u+64]
  212. t = (n * kAlphaTab[v+0*2])
  213. t = ((t) & 0xFFFF) + ((t) >> 16)
  214. q[128+u] = m + t
  215. q[128+u+64] = m - t
  216. m = q[128+u+1]
  217. n = q[128+u+1+64]
  218. t = (n * kAlphaTab[v+1*2])
  219. t = ((t) & 0xFFFF) + ((t) >> 16)
  220. q[128+u+1] = m + t
  221. q[128+u+1+64] = m - t
  222. m = q[128+u+2]
  223. n = q[128+u+2+64]
  224. t = (n * kAlphaTab[v+2*2])
  225. t = ((t) & 0xFFFF) + ((t) >> 16)
  226. q[128+u+2] = m + t
  227. q[128+u+2+64] = m - t
  228. m = q[128+u+3]
  229. n = q[128+u+3+64]
  230. t = (n * kAlphaTab[v+3*2])
  231. t = ((t) & 0xFFFF) + ((t) >> 16)
  232. q[128+u+3] = m + t
  233. q[128+u+3+64] = m - t
  234. u += 4
  235. v += 4 * 2
  236. }
  237. }
  238. {
  239. var t int32
  240. var u, v uintptr
  241. m := q[0]
  242. n := q[128]
  243. q[0] = m + n
  244. q[128] = m - n
  245. m = q[u+1]
  246. n = q[u+1+128]
  247. t = (n * kAlphaTab[v+1])
  248. t = ((t) & 0xFFFF) + ((t) >> 16)
  249. q[u+1] = m + t
  250. q[u+1+128] = m - t
  251. m = q[u+2]
  252. n = q[u+2+128]
  253. t = (n * kAlphaTab[v+2])
  254. t = ((t) & 0xFFFF) + ((t) >> 16)
  255. q[u+2] = m + t
  256. q[u+2+128] = m - t
  257. m = q[u+3]
  258. n = q[u+3+128]
  259. t = (n * kAlphaTab[v+3])
  260. t = ((t) & 0xFFFF) + ((t) >> 16)
  261. q[u+3] = m + t
  262. q[u+3+128] = m - t
  263. u = 4
  264. v = 4
  265. for u < 128 {
  266. m = q[u]
  267. n = q[u+128]
  268. t = (n * kAlphaTab[v+0])
  269. t = ((t) & 0xFFFF) + ((t) >> 16)
  270. q[u] = m + t
  271. q[u+128] = m - t
  272. m = q[u+1]
  273. n = q[u+1+128]
  274. t = (n * kAlphaTab[v+1])
  275. t = ((t) & 0xFFFF) + ((t) >> 16)
  276. q[u+1] = m + t
  277. q[u+1+128] = m - t
  278. m = q[u+2]
  279. n = q[u+2+128]
  280. t = (n * kAlphaTab[v+2])
  281. t = ((t) & 0xFFFF) + ((t) >> 16)
  282. q[u+2] = m + t
  283. q[u+2+128] = m - t
  284. m = q[u+3]
  285. n = q[u+3+128]
  286. t = (n * kAlphaTab[v+3])
  287. t = ((t) & 0xFFFF) + ((t) >> 16)
  288. q[u+3] = m + t
  289. q[u+3+128] = m - t
  290. u += 4
  291. v += 4
  292. }
  293. }
  294. if last == 1 {
  295. var tq int32
  296. for i := uintptr(0); i < 256; i++ {
  297. tq = q[i] + kYOffB[i]
  298. tq = (((tq) & 0xFFFF) + ((tq) >> 16))
  299. tq = (((tq) & 0xFF) - ((tq) >> 8))
  300. tq = (((tq) & 0xFF) - ((tq) >> 8))
  301. if tq <= 128 {
  302. q[i] = tq
  303. } else {
  304. q[i] = tq - 257
  305. }
  306. }
  307. } else {
  308. var tq int32
  309. for i := uintptr(0); i < 256; i++ {
  310. tq = q[i] + kYOffA[i]
  311. tq = (((tq) & 0xFFFF) + ((tq) >> 16))
  312. tq = (((tq) & 0xFF) - ((tq) >> 8))
  313. tq = (((tq) & 0xFF) - ((tq) >> 8))
  314. if tq <= 128 {
  315. q[i] = tq
  316. } else {
  317. q[i] = tq - 257
  318. }
  319. }
  320. }
  321. {
  322. b := ref.b[:]
  323. s := ref.h[:]
  324. for i := uintptr(0); i < 32; i += 8 {
  325. st[i+0] = s[i+0] ^ decUInt32le(b[4*(i+0):])
  326. st[i+1] = s[i+1] ^ decUInt32le(b[4*(i+1):])
  327. st[i+2] = s[i+2] ^ decUInt32le(b[4*(i+2):])
  328. st[i+3] = s[i+3] ^ decUInt32le(b[4*(i+3):])
  329. st[i+4] = s[i+4] ^ decUInt32le(b[4*(i+4):])
  330. st[i+5] = s[i+5] ^ decUInt32le(b[4*(i+5):])
  331. st[i+6] = s[i+6] ^ decUInt32le(b[4*(i+6):])
  332. st[i+7] = s[i+7] ^ decUInt32le(b[4*(i+7):])
  333. }
  334. }
  335. for u := uintptr(0); u < 64; u += 8 {
  336. v := uintptr(wbp[(u >> 3)])
  337. w[u+0] = ((uint32(q[v+2*0]*185) & uint32(0xFFFF)) +
  338. (uint32(q[v+2*0+1]*185) << 16))
  339. w[u+1] = ((uint32(q[v+2*1]*185) & uint32(0xFFFF)) +
  340. (uint32(q[v+2*1+1]*185) << 16))
  341. w[u+2] = ((uint32(q[v+2*2]*185) & uint32(0xFFFF)) +
  342. (uint32(q[v+2*2+1]*185) << 16))
  343. w[u+3] = ((uint32(q[v+2*3]*185) & uint32(0xFFFF)) +
  344. (uint32(q[v+2*3+1]*185) << 16))
  345. w[u+4] = ((uint32(q[v+2*4]*185) & uint32(0xFFFF)) +
  346. (uint32(q[v+2*4+1]*185) << 16))
  347. w[u+5] = ((uint32(q[v+2*5]*185) & uint32(0xFFFF)) +
  348. (uint32(q[v+2*5+1]*185) << 16))
  349. w[u+6] = ((uint32(q[v+2*6]*185) & uint32(0xFFFF)) +
  350. (uint32(q[v+2*6+1]*185) << 16))
  351. w[u+7] = ((uint32(q[v+2*7]*185) & uint32(0xFFFF)) +
  352. (uint32(q[v+2*7+1]*185) << 16))
  353. }
  354. mixinRound(st[:], w[:], 0, 3, 23, 17, 27)
  355. for u := uintptr(0); u < 64; u += 8 {
  356. v := uintptr(wbp[(u>>3)+8])
  357. w[u+0] = (uint32(q[v+2*0]*185) & uint32(0xFFFF)) +
  358. (uint32(q[v+2*0+1]*185) << 16)
  359. w[u+1] = (uint32(q[v+2*1]*185) & uint32(0xFFFF)) +
  360. (uint32(q[v+2*1+1]*185) << 16)
  361. w[u+2] = (uint32(q[v+2*2]*185) & uint32(0xFFFF)) +
  362. (uint32(q[v+2*2+1]*185) << 16)
  363. w[u+3] = (uint32(q[v+2*3]*185) & uint32(0xFFFF)) +
  364. (uint32(q[v+2*3+1]*185) << 16)
  365. w[u+4] = (uint32(q[v+2*4]*185) & uint32(0xFFFF)) +
  366. (uint32(q[v+2*4+1]*185) << 16)
  367. w[u+5] = (uint32(q[v+2*5]*185) & uint32(0xFFFF)) +
  368. (uint32(q[v+2*5+1]*185) << 16)
  369. w[u+6] = (uint32(q[v+2*6]*185) & uint32(0xFFFF)) +
  370. (uint32(q[v+2*6+1]*185) << 16)
  371. w[u+7] = (uint32(q[v+2*7]*185) & uint32(0xFFFF)) +
  372. (uint32(q[v+2*7+1]*185) << 16)
  373. }
  374. mixinRound(st[:], w[:], 1, 28, 19, 22, 7)
  375. for u := uintptr(0); u < 64; u += 8 {
  376. v := uintptr(wbp[(u>>3)+16])
  377. w[u+0] = ((uint32(q[v+2*0-256]*(233)) & uint32(0xFFFF)) +
  378. (uint32((q[v+2*0-128])*(233)) << 16))
  379. w[u+1] = ((uint32(q[v+2*1-256]*(233)) & uint32(0xFFFF)) +
  380. (uint32((q[v+2*1-128])*(233)) << 16))
  381. w[u+2] = ((uint32(q[v+2*2-256]*(233)) & uint32(0xFFFF)) +
  382. (uint32((q[v+2*2-128])*(233)) << 16))
  383. w[u+3] = ((uint32(q[v+2*3-256]*(233)) & uint32(0xFFFF)) +
  384. (uint32((q[v+2*3-128])*(233)) << 16))
  385. w[u+4] = ((uint32(q[v+2*4-256]*(233)) & uint32(0xFFFF)) +
  386. (uint32((q[v+2*4-128])*(233)) << 16))
  387. w[u+5] = ((uint32(q[v+2*5-256]*(233)) & uint32(0xFFFF)) +
  388. (uint32((q[v+2*5-128])*(233)) << 16))
  389. w[u+6] = ((uint32(q[v+2*6-256]*(233)) & uint32(0xFFFF)) +
  390. (uint32((q[v+2*6-128])*(233)) << 16))
  391. w[u+7] = ((uint32(q[v+2*7-256]*(233)) & uint32(0xFFFF)) +
  392. (uint32((q[v+2*7-128])*(233)) << 16))
  393. }
  394. mixinRound(st[:], w[:], 2, 29, 9, 15, 5)
  395. for u := uintptr(0); u < 64; u += 8 {
  396. v := uintptr(wbp[(u>>3)+24])
  397. w[u+0] = ((uint32(q[v+2*0-383]*233) & uint32(0xFFFF)) +
  398. (uint32(q[v+2*0-255]*233) << 16))
  399. w[u+1] = ((uint32(q[v+2*1-383]*233) & uint32(0xFFFF)) +
  400. (uint32(q[v+2*1-255]*233) << 16))
  401. w[u+2] = ((uint32(q[v+2*2-383]*233) & uint32(0xFFFF)) +
  402. (uint32(q[v+2*2-255]*233) << 16))
  403. w[u+3] = ((uint32(q[v+2*3-383]*233) & uint32(0xFFFF)) +
  404. (uint32(q[v+2*3-255]*233) << 16))
  405. w[u+4] = ((uint32(q[v+2*4-383]*233) & uint32(0xFFFF)) +
  406. (uint32(q[v+2*4-255]*233) << 16))
  407. w[u+5] = ((uint32(q[v+2*5-383]*233) & uint32(0xFFFF)) +
  408. (uint32(q[v+2*5-255]*233) << 16))
  409. w[u+6] = ((uint32(q[v+2*6-383]*233) & uint32(0xFFFF)) +
  410. (uint32(q[v+2*6-255]*233) << 16))
  411. w[u+7] = ((uint32(q[v+2*7-383]*233) & uint32(0xFFFF)) +
  412. (uint32(q[v+2*7-255]*233) << 16))
  413. }
  414. mixinRound(st[:], w[:], 3, 4, 13, 10, 25)
  415. {
  416. var tp uint32
  417. var tA [8]uint32
  418. sta := ref.h[:]
  419. tA[0] = ((st[0] << 4) | (st[0] >> (32 - 4)))
  420. tA[1] = ((st[1] << 4) | (st[1] >> (32 - 4)))
  421. tA[2] = ((st[2] << 4) | (st[2] >> (32 - 4)))
  422. tA[3] = ((st[3] << 4) | (st[3] >> (32 - 4)))
  423. tA[4] = ((st[4] << 4) | (st[4] >> (32 - 4)))
  424. tA[5] = ((st[5] << 4) | (st[5] >> (32 - 4)))
  425. tA[6] = ((st[6] << 4) | (st[6] >> (32 - 4)))
  426. tA[7] = ((st[7] << 4) | (st[7] >> (32 - 4)))
  427. tp = uint32(st[kIdxD[0]] + sta[0] +
  428. (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
  429. st[kIdxA[0]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][0]]
  430. st[kIdxD[0]] = st[kIdxC[0]]
  431. st[kIdxC[0]] = st[kIdxB[0]]
  432. st[kIdxB[0]] = tA[0]
  433. tp = uint32(st[kIdxD[1]] + sta[1] +
  434. (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
  435. st[kIdxA[1]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][1]]
  436. st[kIdxD[1]] = st[kIdxC[1]]
  437. st[kIdxC[1]] = st[kIdxB[1]]
  438. st[kIdxB[1]] = tA[1]
  439. tp = uint32(st[kIdxD[2]] + sta[2] +
  440. (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
  441. st[kIdxA[2]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][2]]
  442. st[kIdxD[2]] = st[kIdxC[2]]
  443. st[kIdxC[2]] = st[kIdxB[2]]
  444. st[kIdxB[2]] = tA[2]
  445. tp = uint32(st[kIdxD[3]] + sta[3] +
  446. (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
  447. st[kIdxA[3]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][3]]
  448. st[kIdxD[3]] = st[kIdxC[3]]
  449. st[kIdxC[3]] = st[kIdxB[3]]
  450. st[kIdxB[3]] = tA[3]
  451. tp = uint32(st[kIdxD[4]] + sta[4] +
  452. (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
  453. st[kIdxA[4]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][4]]
  454. st[kIdxD[4]] = st[kIdxC[4]]
  455. st[kIdxC[4]] = st[kIdxB[4]]
  456. st[kIdxB[4]] = tA[4]
  457. tp = uint32(st[kIdxD[5]] + sta[5] +
  458. (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
  459. st[kIdxA[5]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][5]]
  460. st[kIdxD[5]] = st[kIdxC[5]]
  461. st[kIdxC[5]] = st[kIdxB[5]]
  462. st[kIdxB[5]] = tA[5]
  463. tp = uint32(st[kIdxD[6]] + sta[6] +
  464. (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
  465. st[kIdxA[6]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][6]]
  466. st[kIdxD[6]] = st[kIdxC[6]]
  467. st[kIdxC[6]] = st[kIdxB[6]]
  468. st[kIdxB[6]] = tA[6]
  469. tp = uint32(st[kIdxD[7]] + sta[7] +
  470. (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
  471. st[kIdxA[7]] = ((tp << 13) | (tp >> (32 - 13))) + tA[kPrem[4][7]]
  472. st[kIdxD[7]] = st[kIdxC[7]]
  473. st[kIdxC[7]] = st[kIdxB[7]]
  474. st[kIdxB[7]] = tA[7]
  475. tA[0] = ((st[0] << 13) | (st[0] >> (32 - 13)))
  476. tA[1] = ((st[1] << 13) | (st[1] >> (32 - 13)))
  477. tA[2] = ((st[2] << 13) | (st[2] >> (32 - 13)))
  478. tA[3] = ((st[3] << 13) | (st[3] >> (32 - 13)))
  479. tA[4] = ((st[4] << 13) | (st[4] >> (32 - 13)))
  480. tA[5] = ((st[5] << 13) | (st[5] >> (32 - 13)))
  481. tA[6] = ((st[6] << 13) | (st[6] >> (32 - 13)))
  482. tA[7] = ((st[7] << 13) | (st[7] >> (32 - 13)))
  483. tp = uint32(st[kIdxD[0]] + sta[8] +
  484. (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
  485. st[kIdxA[0]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][0]]
  486. st[kIdxD[0]] = st[kIdxC[0]]
  487. st[kIdxC[0]] = st[kIdxB[0]]
  488. st[kIdxB[0]] = tA[0]
  489. tp = uint32(st[kIdxD[1]] + sta[9] +
  490. (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
  491. st[kIdxA[1]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][1]]
  492. st[kIdxD[1]] = st[kIdxC[1]]
  493. st[kIdxC[1]] = st[kIdxB[1]]
  494. st[kIdxB[1]] = tA[1]
  495. tp = uint32(st[kIdxD[2]] + sta[10] +
  496. (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
  497. st[kIdxA[2]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][2]]
  498. st[kIdxD[2]] = st[kIdxC[2]]
  499. st[kIdxC[2]] = st[kIdxB[2]]
  500. st[kIdxB[2]] = tA[2]
  501. tp = uint32(st[kIdxD[3]] + sta[11] +
  502. (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
  503. st[kIdxA[3]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][3]]
  504. st[kIdxD[3]] = st[kIdxC[3]]
  505. st[kIdxC[3]] = st[kIdxB[3]]
  506. st[kIdxB[3]] = tA[3]
  507. tp = uint32(st[kIdxD[4]] + sta[12] +
  508. (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
  509. st[kIdxA[4]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][4]]
  510. st[kIdxD[4]] = st[kIdxC[4]]
  511. st[kIdxC[4]] = st[kIdxB[4]]
  512. st[kIdxB[4]] = tA[4]
  513. tp = uint32(st[kIdxD[5]] + sta[13] +
  514. (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
  515. st[kIdxA[5]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][5]]
  516. st[kIdxD[5]] = st[kIdxC[5]]
  517. st[kIdxC[5]] = st[kIdxB[5]]
  518. st[kIdxB[5]] = tA[5]
  519. tp = uint32(st[kIdxD[6]] + sta[14] +
  520. (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
  521. st[kIdxA[6]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][6]]
  522. st[kIdxD[6]] = st[kIdxC[6]]
  523. st[kIdxC[6]] = st[kIdxB[6]]
  524. st[kIdxB[6]] = tA[6]
  525. tp = uint32(st[kIdxD[7]] + sta[15] +
  526. (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
  527. st[kIdxA[7]] = ((tp << 10) | (tp >> (32 - 10))) + tA[kPrem[5][7]]
  528. st[kIdxD[7]] = st[kIdxC[7]]
  529. st[kIdxC[7]] = st[kIdxB[7]]
  530. st[kIdxB[7]] = tA[7]
  531. tA[0] = ((st[0] << 10) | (st[0] >> (32 - 10)))
  532. tA[1] = ((st[1] << 10) | (st[1] >> (32 - 10)))
  533. tA[2] = ((st[2] << 10) | (st[2] >> (32 - 10)))
  534. tA[3] = ((st[3] << 10) | (st[3] >> (32 - 10)))
  535. tA[4] = ((st[4] << 10) | (st[4] >> (32 - 10)))
  536. tA[5] = ((st[5] << 10) | (st[5] >> (32 - 10)))
  537. tA[6] = ((st[6] << 10) | (st[6] >> (32 - 10)))
  538. tA[7] = ((st[7] << 10) | (st[7] >> (32 - 10)))
  539. tp = uint32(st[kIdxD[0]] + sta[16] +
  540. (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
  541. st[kIdxA[0]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][0]]
  542. st[kIdxD[0]] = st[kIdxC[0]]
  543. st[kIdxC[0]] = st[kIdxB[0]]
  544. st[kIdxB[0]] = tA[0]
  545. tp = uint32(st[kIdxD[1]] + sta[17] +
  546. (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
  547. st[kIdxA[1]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][1]]
  548. st[kIdxD[1]] = st[kIdxC[1]]
  549. st[kIdxC[1]] = st[kIdxB[1]]
  550. st[kIdxB[1]] = tA[1]
  551. tp = uint32(st[kIdxD[2]] + sta[18] +
  552. (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
  553. st[kIdxA[2]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][2]]
  554. st[kIdxD[2]] = st[kIdxC[2]]
  555. st[kIdxC[2]] = st[kIdxB[2]]
  556. st[kIdxB[2]] = tA[2]
  557. tp = uint32(st[kIdxD[3]] + sta[19] +
  558. (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
  559. st[kIdxA[3]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][3]]
  560. st[kIdxD[3]] = st[kIdxC[3]]
  561. st[kIdxC[3]] = st[kIdxB[3]]
  562. st[kIdxB[3]] = tA[3]
  563. tp = uint32(st[kIdxD[4]] + sta[20] +
  564. (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
  565. st[kIdxA[4]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][4]]
  566. st[kIdxD[4]] = st[kIdxC[4]]
  567. st[kIdxC[4]] = st[kIdxB[4]]
  568. st[kIdxB[4]] = tA[4]
  569. tp = uint32(st[kIdxD[5]] + sta[21] +
  570. (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
  571. st[kIdxA[5]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][5]]
  572. st[kIdxD[5]] = st[kIdxC[5]]
  573. st[kIdxC[5]] = st[kIdxB[5]]
  574. st[kIdxB[5]] = tA[5]
  575. tp = uint32(st[kIdxD[6]] + sta[22] +
  576. (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
  577. st[kIdxA[6]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][6]]
  578. st[kIdxD[6]] = st[kIdxC[6]]
  579. st[kIdxC[6]] = st[kIdxB[6]]
  580. st[kIdxB[6]] = tA[6]
  581. tp = uint32(st[kIdxD[7]] + sta[23] +
  582. (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
  583. st[kIdxA[7]] = ((tp << 25) | (tp >> (32 - 25))) + tA[kPrem[6][7]]
  584. st[kIdxD[7]] = st[kIdxC[7]]
  585. st[kIdxC[7]] = st[kIdxB[7]]
  586. st[kIdxB[7]] = tA[7]
  587. tA[0] = ((st[0] << 25) | (st[0] >> (32 - 25)))
  588. tA[1] = ((st[1] << 25) | (st[1] >> (32 - 25)))
  589. tA[2] = ((st[2] << 25) | (st[2] >> (32 - 25)))
  590. tA[3] = ((st[3] << 25) | (st[3] >> (32 - 25)))
  591. tA[4] = ((st[4] << 25) | (st[4] >> (32 - 25)))
  592. tA[5] = ((st[5] << 25) | (st[5] >> (32 - 25)))
  593. tA[6] = ((st[6] << 25) | (st[6] >> (32 - 25)))
  594. tA[7] = ((st[7] << 25) | (st[7] >> (32 - 25)))
  595. tp = uint32(st[kIdxD[0]] + sta[24] +
  596. (((st[kIdxB[0]] ^ st[kIdxC[0]]) & st[kIdxA[0]]) ^ st[kIdxC[0]]))
  597. st[kIdxA[0]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][0]]
  598. st[kIdxD[0]] = st[kIdxC[0]]
  599. st[kIdxC[0]] = st[kIdxB[0]]
  600. st[kIdxB[0]] = tA[0]
  601. tp = uint32(st[kIdxD[1]] + sta[25] +
  602. (((st[kIdxB[1]] ^ st[kIdxC[1]]) & st[kIdxA[1]]) ^ st[kIdxC[1]]))
  603. st[kIdxA[1]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][1]]
  604. st[kIdxD[1]] = st[kIdxC[1]]
  605. st[kIdxC[1]] = st[kIdxB[1]]
  606. st[kIdxB[1]] = tA[1]
  607. tp = uint32(st[kIdxD[2]] + sta[26] +
  608. (((st[kIdxB[2]] ^ st[kIdxC[2]]) & st[kIdxA[2]]) ^ st[kIdxC[2]]))
  609. st[kIdxA[2]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][2]]
  610. st[kIdxD[2]] = st[kIdxC[2]]
  611. st[kIdxC[2]] = st[kIdxB[2]]
  612. st[kIdxB[2]] = tA[2]
  613. tp = uint32(st[kIdxD[3]] + sta[27] +
  614. (((st[kIdxB[3]] ^ st[kIdxC[3]]) & st[kIdxA[3]]) ^ st[kIdxC[3]]))
  615. st[kIdxA[3]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][3]]
  616. st[kIdxD[3]] = st[kIdxC[3]]
  617. st[kIdxC[3]] = st[kIdxB[3]]
  618. st[kIdxB[3]] = tA[3]
  619. tp = uint32(st[kIdxD[4]] + sta[28] +
  620. (((st[kIdxB[4]] ^ st[kIdxC[4]]) & st[kIdxA[4]]) ^ st[kIdxC[4]]))
  621. st[kIdxA[4]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][4]]
  622. st[kIdxD[4]] = st[kIdxC[4]]
  623. st[kIdxC[4]] = st[kIdxB[4]]
  624. st[kIdxB[4]] = tA[4]
  625. tp = uint32(st[kIdxD[5]] + sta[29] +
  626. (((st[kIdxB[5]] ^ st[kIdxC[5]]) & st[kIdxA[5]]) ^ st[kIdxC[5]]))
  627. st[kIdxA[5]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][5]]
  628. st[kIdxD[5]] = st[kIdxC[5]]
  629. st[kIdxC[5]] = st[kIdxB[5]]
  630. st[kIdxB[5]] = tA[5]
  631. tp = uint32(st[kIdxD[6]] + sta[30] +
  632. (((st[kIdxB[6]] ^ st[kIdxC[6]]) & st[kIdxA[6]]) ^ st[kIdxC[6]]))
  633. st[kIdxA[6]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][6]]
  634. st[kIdxD[6]] = st[kIdxC[6]]
  635. st[kIdxC[6]] = st[kIdxB[6]]
  636. st[kIdxB[6]] = tA[6]
  637. tp = uint32(st[kIdxD[7]] + sta[31] +
  638. (((st[kIdxB[7]] ^ st[kIdxC[7]]) & st[kIdxA[7]]) ^ st[kIdxC[7]]))
  639. st[kIdxA[7]] = ((tp << 4) | (tp >> (32 - 4))) + tA[kPrem[0][7]]
  640. st[kIdxD[7]] = st[kIdxC[7]]
  641. st[kIdxC[7]] = st[kIdxB[7]]
  642. st[kIdxB[7]] = tA[7]
  643. }
  644. copy(ref.h[:], st[:])
  645. }
  646. func mixoutRound(x []uint8, q []int32, xt uintptr) {
  647. var tx int32
  648. var d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7 int32
  649. var d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7 int32
  650. xd := xt << 1
  651. {
  652. var sa, sb uintptr
  653. var x0, x1, x2, x3 int32
  654. var a0, a1, a2, a3 int32
  655. var b0, b1, b2, b3 int32
  656. sb = xd << 2
  657. x0 = int32(x[0])
  658. x1 = int32(x[sb])
  659. x2 = int32(x[2*sb])
  660. x3 = int32(x[3*sb])
  661. a0 = x0 + x2
  662. a1 = x0 + (x2 << 4)
  663. a2 = x0 - x2
  664. a3 = x0 - (x2 << 4)
  665. b0 = x1 + x3
  666. tx = ((x1 << 2) + (x3 << 6))
  667. b1 = ((tx & 0xFF) - (tx >> 8))
  668. b2 = (x1 << 4) - (x3 << 4)
  669. tx = (x1 << 6) + (x3 << 2)
  670. b3 = ((tx & 0xFF) - (tx >> 8))
  671. d1_0 = a0 + b0
  672. d1_1 = a1 + b1
  673. d1_2 = a2 + b2
  674. d1_3 = a3 + b3
  675. d1_4 = a0 - b0
  676. d1_5 = a1 - b1
  677. d1_6 = a2 - b2
  678. d1_7 = a3 - b3
  679. sa = xd << 1
  680. sb = xd << 2
  681. x0 = int32(x[sa])
  682. x1 = int32(x[sa+sb])
  683. x2 = int32(x[sa+2*sb])
  684. x3 = int32(x[sa+3*sb])
  685. a0 = x0 + x2
  686. a1 = x0 + (x2 << 4)
  687. a2 = x0 - x2
  688. a3 = x0 - (x2 << 4)
  689. b0 = x1 + x3
  690. tx = ((x1 << 2) + (x3 << 6))
  691. b1 = ((tx & 0xFF) - (tx >> 8))
  692. b2 = (x1 << 4) - (x3 << 4)
  693. tx = (x1 << 6) + (x3 << 2)
  694. b3 = ((tx & 0xFF) - (tx >> 8))
  695. d2_0 = a0 + b0
  696. d2_1 = a1 + b1
  697. d2_2 = a2 + b2
  698. d2_3 = a3 + b3
  699. d2_4 = a0 - b0
  700. d2_5 = a1 - b1
  701. d2_6 = a2 - b2
  702. d2_7 = a3 - b3
  703. }
  704. q[0] = d1_0 + d2_0
  705. q[1] = d1_1 + (d2_1 << 1)
  706. q[2] = d1_2 + (d2_2 << 2)
  707. q[3] = d1_3 + (d2_3 << 3)
  708. q[4] = d1_4 + (d2_4 << 4)
  709. q[5] = d1_5 + (d2_5 << 5)
  710. q[6] = d1_6 + (d2_6 << 6)
  711. q[7] = d1_7 + (d2_7 << 7)
  712. q[8] = d1_0 - d2_0
  713. q[9] = d1_1 - (d2_1 << 1)
  714. q[10] = d1_2 - (d2_2 << 2)
  715. q[11] = d1_3 - (d2_3 << 3)
  716. q[12] = d1_4 - (d2_4 << 4)
  717. q[13] = d1_5 - (d2_5 << 5)
  718. q[14] = d1_6 - (d2_6 << 6)
  719. q[15] = d1_7 - (d2_7 << 7)
  720. {
  721. var sa, sb uintptr
  722. var x0, x1, x2, x3 int32
  723. var a0, a1, a2, a3 int32
  724. var b0, b1, b2, b3 int32
  725. sb = xd << 2
  726. x0 = int32(x[(xd)])
  727. x1 = int32(x[(xd)+sb])
  728. x2 = int32(x[(xd)+2*sb])
  729. x3 = int32(x[(xd)+3*sb])
  730. a0 = x0 + x2
  731. a1 = x0 + (x2 << 4)
  732. a2 = x0 - x2
  733. a3 = x0 - (x2 << 4)
  734. b0 = x1 + x3
  735. tx = ((x1 << 2) + (x3 << 6))
  736. b1 = ((tx & 0xFF) - (tx >> 8))
  737. b2 = (x1 << 4) - (x3 << 4)
  738. tx = (x1 << 6) + (x3 << 2)
  739. b3 = ((tx & 0xFF) - (tx >> 8))
  740. d1_0 = a0 + b0
  741. d1_1 = a1 + b1
  742. d1_2 = a2 + b2
  743. d1_3 = a3 + b3
  744. d1_4 = a0 - b0
  745. d1_5 = a1 - b1
  746. d1_6 = a2 - b2
  747. d1_7 = a3 - b3
  748. sa = xd + (xd << 1)
  749. sb = xd << 2
  750. x0 = int32(x[sa])
  751. x1 = int32(x[sa+sb])
  752. x2 = int32(x[sa+2*sb])
  753. x3 = int32(x[sa+3*sb])
  754. a0 = x0 + x2
  755. a1 = x0 + (x2 << 4)
  756. a2 = x0 - x2
  757. a3 = x0 - (x2 << 4)
  758. b0 = x1 + x3
  759. tx = ((x1 << 2) + (x3 << 6))
  760. b1 = ((tx & 0xFF) - (tx >> 8))
  761. b2 = (x1 << 4) - (x3 << 4)
  762. tx = (x1 << 6) + (x3 << 2)
  763. b3 = ((tx & 0xFF) - (tx >> 8))
  764. d2_0 = a0 + b0
  765. d2_1 = a1 + b1
  766. d2_2 = a2 + b2
  767. d2_3 = a3 + b3
  768. d2_4 = a0 - b0
  769. d2_5 = a1 - b1
  770. d2_6 = a2 - b2
  771. d2_7 = a3 - b3
  772. }
  773. q[16+0] = d1_0 + d2_0
  774. q[16+1] = d1_1 + (d2_1 << 1)
  775. q[16+2] = d1_2 + (d2_2 << 2)
  776. q[16+3] = d1_3 + (d2_3 << 3)
  777. q[16+4] = d1_4 + (d2_4 << 4)
  778. q[16+5] = d1_5 + (d2_5 << 5)
  779. q[16+6] = d1_6 + (d2_6 << 6)
  780. q[16+7] = d1_7 + (d2_7 << 7)
  781. q[16+8] = d1_0 - d2_0
  782. q[16+9] = d1_1 - (d2_1 << 1)
  783. q[16+10] = d1_2 - (d2_2 << 2)
  784. q[16+11] = d1_3 - (d2_3 << 3)
  785. q[16+12] = d1_4 - (d2_4 << 4)
  786. q[16+13] = d1_5 - (d2_5 << 5)
  787. q[16+14] = d1_6 - (d2_6 << 6)
  788. q[16+15] = d1_7 - (d2_7 << 7)
  789. {
  790. var u, v uintptr
  791. m := q[0]
  792. n := q[16]
  793. q[0] = m + n
  794. q[16] = m - n
  795. m = q[u+1]
  796. n = q[u+1+16]
  797. tx = (n * kAlphaTab[v+1*8])
  798. tx = ((tx & 0xFFFF) + (tx >> 16))
  799. q[u+1] = m + tx
  800. q[u+1+16] = m - tx
  801. m = q[u+2]
  802. n = q[u+2+16]
  803. tx = (n * kAlphaTab[v+2*8])
  804. tx = ((tx & 0xFFFF) + (tx >> 16))
  805. q[u+2] = m + tx
  806. q[u+2+16] = m - tx
  807. m = q[u+3]
  808. n = q[u+3+16]
  809. tx = (n * kAlphaTab[v+3*8])
  810. tx = ((tx & 0xFFFF) + (tx >> 16))
  811. q[u+3] = m + tx
  812. q[u+3+16] = m - tx
  813. for u < 16 {
  814. u += 4
  815. v += 4 * 8
  816. m = q[u+0]
  817. n = q[u+0+16]
  818. tx = (n * kAlphaTab[v+0*8])
  819. tx = ((tx & 0xFFFF) + (tx >> 16))
  820. q[u+0] = m + tx
  821. q[u+0+16] = m - tx
  822. m = q[u+1]
  823. n = q[u+1+16]
  824. tx = (n * kAlphaTab[v+1*8])
  825. tx = ((tx & 0xFFFF) + (tx >> 16))
  826. q[u+1] = m + tx
  827. q[u+1+16] = m - tx
  828. m = q[u+2]
  829. n = q[u+2+16]
  830. tx = (n * kAlphaTab[v+2*8])
  831. tx = ((tx & 0xFFFF) + (tx >> 16))
  832. q[u+2] = m + tx
  833. q[u+2+16] = m - tx
  834. m = q[u+3]
  835. n = q[u+3+16]
  836. tx = (n * kAlphaTab[v+3*8])
  837. tx = ((tx & 0xFFFF) + (tx >> 16))
  838. q[u+3] = m + tx
  839. q[u+3+16] = m - tx
  840. }
  841. }
  842. {
  843. var sa, sb uintptr
  844. var x0, x1, x2, x3 int32
  845. var a0, a1, a2, a3 int32
  846. var b0, b1, b2, b3 int32
  847. sb = uintptr(xd << 2)
  848. x0 = int32(x[xt])
  849. x1 = int32(x[xt+sb])
  850. x2 = int32(x[xt+2*sb])
  851. x3 = int32(x[xt+3*sb])
  852. a0 = x0 + x2
  853. a1 = x0 + (x2 << 4)
  854. a2 = x0 - x2
  855. a3 = x0 - (x2 << 4)
  856. b0 = x1 + x3
  857. tx = ((x1 << 2) + (x3 << 6))
  858. b1 = ((tx & 0xFF) - (tx >> 8))
  859. b2 = (x1 << 4) - (x3 << 4)
  860. tx = (x1 << 6) + (x3 << 2)
  861. b3 = ((tx & 0xFF) - (tx >> 8))
  862. d1_0 = a0 + b0
  863. d1_1 = a1 + b1
  864. d1_2 = a2 + b2
  865. d1_3 = a3 + b3
  866. d1_4 = a0 - b0
  867. d1_5 = a1 - b1
  868. d1_6 = a2 - b2
  869. d1_7 = a3 - b3
  870. sa = xt + (xd << 1)
  871. sb = xd << 2
  872. x0 = int32(x[sa])
  873. x1 = int32(x[sa+sb])
  874. x2 = int32(x[sa+2*sb])
  875. x3 = int32(x[sa+3*sb])
  876. a0 = x0 + x2
  877. a1 = x0 + (x2 << 4)
  878. a2 = x0 - x2
  879. a3 = x0 - (x2 << 4)
  880. b0 = x1 + x3
  881. tx = ((x1 << 2) + (x3 << 6))
  882. b1 = ((tx & 0xFF) - (tx >> 8))
  883. b2 = (x1 << 4) - (x3 << 4)
  884. tx = (x1 << 6) + (x3 << 2)
  885. b3 = ((tx & 0xFF) - (tx >> 8))
  886. d2_0 = a0 + b0
  887. d2_1 = a1 + b1
  888. d2_2 = a2 + b2
  889. d2_3 = a3 + b3
  890. d2_4 = a0 - b0
  891. d2_5 = a1 - b1
  892. d2_6 = a2 - b2
  893. d2_7 = a3 - b3
  894. }
  895. q[32+0] = d1_0 + d2_0
  896. q[32+1] = d1_1 + (d2_1 << 1)
  897. q[32+2] = d1_2 + (d2_2 << 2)
  898. q[32+3] = d1_3 + (d2_3 << 3)
  899. q[32+4] = d1_4 + (d2_4 << 4)
  900. q[32+5] = d1_5 + (d2_5 << 5)
  901. q[32+6] = d1_6 + (d2_6 << 6)
  902. q[32+7] = d1_7 + (d2_7 << 7)
  903. q[32+8] = d1_0 - d2_0
  904. q[32+9] = d1_1 - (d2_1 << 1)
  905. q[32+10] = d1_2 - (d2_2 << 2)
  906. q[32+11] = d1_3 - (d2_3 << 3)
  907. q[32+12] = d1_4 - (d2_4 << 4)
  908. q[32+13] = d1_5 - (d2_5 << 5)
  909. q[32+14] = d1_6 - (d2_6 << 6)
  910. q[32+15] = d1_7 - (d2_7 << 7)
  911. {
  912. var sa, sb uintptr
  913. var x0, x1, x2, x3 int32
  914. var a0, a1, a2, a3 int32
  915. var b0, b1, b2, b3 int32
  916. sa = (xt) + (xd)
  917. sb = xd << 2
  918. x0 = int32(x[sa])
  919. x1 = int32(x[sa+sb])
  920. x2 = int32(x[sa+2*sb])
  921. x3 = int32(x[sa+3*sb])
  922. a0 = x0 + x2
  923. a1 = x0 + (x2 << 4)
  924. a2 = x0 - x2
  925. a3 = x0 - (x2 << 4)
  926. b0 = x1 + x3
  927. tx = ((x1 << 2) + (x3 << 6))
  928. b1 = ((tx & 0xFF) - (tx >> 8))
  929. b2 = (x1 << 4) - (x3 << 4)
  930. tx = (x1 << 6) + (x3 << 2)
  931. b3 = ((tx & 0xFF) - (tx >> 8))
  932. d1_0 = a0 + b0
  933. d1_1 = a1 + b1
  934. d1_2 = a2 + b2
  935. d1_3 = a3 + b3
  936. d1_4 = a0 - b0
  937. d1_5 = a1 - b1
  938. d1_6 = a2 - b2
  939. d1_7 = a3 - b3
  940. sa = (xt + xd) + (xd << 1)
  941. sb = xd << 2
  942. x0 = int32(x[sa])
  943. x1 = int32(x[sa+sb])
  944. x2 = int32(x[sa+2*sb])
  945. x3 = int32(x[sa+3*sb])
  946. a0 = x0 + x2
  947. a1 = x0 + (x2 << 4)
  948. a2 = x0 - x2
  949. a3 = x0 - (x2 << 4)
  950. b0 = x1 + x3
  951. tx = ((x1 << 2) + (x3 << 6))
  952. b1 = ((tx & 0xFF) - (tx >> 8))
  953. b2 = (x1 << 4) - (x3 << 4)
  954. tx = (x1 << 6) + (x3 << 2)
  955. b3 = ((tx & 0xFF) - (tx >> 8))
  956. d2_0 = a0 + b0
  957. d2_1 = a1 + b1
  958. d2_2 = a2 + b2
  959. d2_3 = a3 + b3
  960. d2_4 = a0 - b0
  961. d2_5 = a1 - b1
  962. d2_6 = a2 - b2
  963. d2_7 = a3 - b3
  964. }
  965. q[48+0] = d1_0 + d2_0
  966. q[48+1] = d1_1 + (d2_1 << 1)
  967. q[48+2] = d1_2 + (d2_2 << 2)
  968. q[48+3] = d1_3 + (d2_3 << 3)
  969. q[48+4] = d1_4 + (d2_4 << 4)
  970. q[48+5] = d1_5 + (d2_5 << 5)
  971. q[48+6] = d1_6 + (d2_6 << 6)
  972. q[48+7] = d1_7 + (d2_7 << 7)
  973. q[48+8] = d1_0 - d2_0
  974. q[48+9] = d1_1 - (d2_1 << 1)
  975. q[48+10] = d1_2 - (d2_2 << 2)
  976. q[48+11] = d1_3 - (d2_3 << 3)
  977. q[48+12] = d1_4 - (d2_4 << 4)
  978. q[48+13] = d1_5 - (d2_5 << 5)
  979. q[48+14] = d1_6 - (d2_6 << 6)
  980. q[48+15] = d1_7 - (d2_7 << 7)
  981. {
  982. var u, v uintptr
  983. m := q[(32)]
  984. n := q[(32)+(16)]
  985. q[(32)] = m + n
  986. q[(32)+(16)] = m - n
  987. m = q[(32)+u+1]
  988. n = q[(32)+u+1+(16)]
  989. tx = (n * kAlphaTab[v+1*(8)])
  990. tx = ((tx & 0xFFFF) + (tx >> 16))
  991. q[(32)+u+1] = m + tx
  992. q[(32)+u+1+(16)] = m - tx
  993. m = q[(32)+u+2]
  994. n = q[(32)+u+2+(16)]
  995. tx = (n * kAlphaTab[v+2*(8)])
  996. tx = ((tx & 0xFFFF) + (tx >> 16))
  997. q[(32)+u+2] = m + tx
  998. q[(32)+u+2+(16)] = m - tx
  999. m = q[(32)+u+3]
  1000. n = q[(32)+u+3+(16)]
  1001. tx = (n * kAlphaTab[v+3*(8)])
  1002. tx = ((tx & 0xFFFF) + (tx >> 16))
  1003. q[(32)+u+3] = m + tx
  1004. q[(32)+u+3+(16)] = m - tx
  1005. u = 4
  1006. v = 4 * (8)
  1007. for u < 16 {
  1008. m = q[(32)+u]
  1009. n = q[(32)+u+(16)]
  1010. tx = (n * kAlphaTab[v+0*(8)])
  1011. tx = ((tx & 0xFFFF) + (tx >> 16))
  1012. q[(32)+u+0] = m + tx
  1013. q[(32)+u+0+(16)] = m - tx
  1014. m = q[(32)+u+1]
  1015. n = q[(32)+u+1+(16)]
  1016. tx = (n * kAlphaTab[v+1*(8)])
  1017. tx = ((tx & 0xFFFF) + (tx >> 16))
  1018. q[(32)+u+1] = m + tx
  1019. q[(32)+u+1+(16)] = m - tx
  1020. m = q[(32)+u+2]
  1021. n = q[(32)+u+2+(16)]
  1022. tx = (n * kAlphaTab[v+2*(8)])
  1023. tx = ((tx & 0xFFFF) + (tx >> 16))
  1024. q[(32)+u+2] = m + tx
  1025. q[(32)+u+2+(16)] = m - tx
  1026. m = q[(32)+u+3]
  1027. n = q[(32)+u+3+(16)]
  1028. tx = (n * kAlphaTab[v+3*(8)])
  1029. tx = ((tx & 0xFFFF) + (tx >> 16))
  1030. q[(32)+u+3] = m + tx
  1031. q[(32)+u+3+(16)] = m - tx
  1032. u += 4
  1033. v += 4 * (8)
  1034. }
  1035. }
  1036. {
  1037. var u, v uintptr
  1038. m := q[0]
  1039. n := q[32]
  1040. q[0] = m + n
  1041. q[32] = m - n
  1042. m = q[u+1]
  1043. n = q[u+1+32]
  1044. tx = (n * kAlphaTab[v+1*4])
  1045. tx = ((tx & 0xFFFF) + (tx >> 16))
  1046. q[u+1] = m + tx
  1047. q[u+1+32] = m - tx
  1048. m = q[u+2]
  1049. n = q[u+2+32]
  1050. tx = (n * kAlphaTab[v+2*4])
  1051. tx = ((tx & 0xFFFF) + (tx >> 16))
  1052. q[u+2] = m + tx
  1053. q[u+2+32] = m - tx
  1054. m = q[u+3]
  1055. n = q[u+3+32]
  1056. tx = (n * kAlphaTab[v+3*4])
  1057. tx = ((tx & 0xFFFF) + (tx >> 16))
  1058. q[u+3] = m + tx
  1059. q[u+3+32] = m - tx
  1060. u = 4
  1061. v = 4 * 4
  1062. for u < 32 {
  1063. m = q[u]
  1064. n = q[u+32]
  1065. tx = (n * kAlphaTab[v+0*4])
  1066. tx = ((tx & 0xFFFF) + (tx >> 16))
  1067. q[u] = m + tx
  1068. q[u+(32)] = m - tx
  1069. m = q[u+1]
  1070. n = q[u+1+32]
  1071. tx = (n * kAlphaTab[v+1*4])
  1072. tx = ((tx & 0xFFFF) + (tx >> 16))
  1073. q[u+1] = m + tx
  1074. q[u+1+32] = m - tx
  1075. m = q[u+2]
  1076. n = q[u+2+32]
  1077. tx = (n * kAlphaTab[v+2*4])
  1078. tx = ((tx & 0xFFFF) + (tx >> 16))
  1079. q[u+2] = m + tx
  1080. q[u+2+32] = m - tx
  1081. m = q[u+3]
  1082. n = q[u+3+32]
  1083. tx = (n * kAlphaTab[v+3*4])
  1084. tx = ((tx & 0xFFFF) + (tx >> 16))
  1085. q[u+3] = m + tx
  1086. q[u+3+32] = m - tx
  1087. u += 4
  1088. v += 4 * 4
  1089. }
  1090. }
  1091. }
  1092. func mixinRound(h, w []uint32, isp, p0, p1, p2, p3 uint32) {
  1093. var tA [8]uint32
  1094. var tp uint32
  1095. tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0)))
  1096. tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0)))
  1097. tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0)))
  1098. tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0)))
  1099. tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0)))
  1100. tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0)))
  1101. tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0)))
  1102. tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0)))
  1103. tp = uint32(h[kIdxD[0]] + w[0] +
  1104. (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
  1105. h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]]
  1106. h[kIdxD[0]] = h[kIdxC[0]]
  1107. h[kIdxC[0]] = h[kIdxB[0]]
  1108. h[kIdxB[0]] = tA[0]
  1109. tp = uint32(h[kIdxD[1]] + w[1] +
  1110. (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
  1111. h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^1]
  1112. h[kIdxD[1]] = h[kIdxC[1]]
  1113. h[kIdxC[1]] = h[kIdxB[1]]
  1114. h[kIdxB[1]] = tA[1]
  1115. tp = uint32(h[kIdxD[2]] + w[2] +
  1116. (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
  1117. h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^2]
  1118. h[kIdxD[2]] = h[kIdxC[2]]
  1119. h[kIdxC[2]] = h[kIdxB[2]]
  1120. h[kIdxB[2]] = tA[2]
  1121. tp = uint32(h[kIdxD[3]] + w[3] +
  1122. (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
  1123. h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^3]
  1124. h[kIdxD[3]] = h[kIdxC[3]]
  1125. h[kIdxC[3]] = h[kIdxB[3]]
  1126. h[kIdxB[3]] = tA[3]
  1127. tp = uint32(h[kIdxD[4]] + w[4] +
  1128. (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
  1129. h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^4]
  1130. h[kIdxD[4]] = h[kIdxC[4]]
  1131. h[kIdxC[4]] = h[kIdxB[4]]
  1132. h[kIdxB[4]] = tA[4]
  1133. tp = uint32(h[kIdxD[5]] + w[5] +
  1134. (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
  1135. h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^5]
  1136. h[kIdxD[5]] = h[kIdxC[5]]
  1137. h[kIdxC[5]] = h[kIdxB[5]]
  1138. h[kIdxB[5]] = tA[5]
  1139. tp = uint32(h[kIdxD[6]] + w[6] +
  1140. (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
  1141. h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^6]
  1142. h[kIdxD[6]] = h[kIdxC[6]]
  1143. h[kIdxC[6]] = h[kIdxB[6]]
  1144. h[kIdxB[6]] = tA[6]
  1145. tp = uint32(h[kIdxD[7]] + w[7] +
  1146. (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
  1147. h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp]^7]
  1148. h[kIdxD[7]] = h[kIdxC[7]]
  1149. h[kIdxC[7]] = h[kIdxB[7]]
  1150. h[kIdxB[7]] = tA[7]
  1151. tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1)))
  1152. tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1)))
  1153. tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1)))
  1154. tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1)))
  1155. tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1)))
  1156. tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1)))
  1157. tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1)))
  1158. tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1)))
  1159. tp = uint32(h[kIdxD[0]] + w[8] +
  1160. (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
  1161. h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]]
  1162. h[kIdxD[0]] = h[kIdxC[0]]
  1163. h[kIdxC[0]] = h[kIdxB[0]]
  1164. h[kIdxB[0]] = tA[0]
  1165. tp = uint32(h[kIdxD[1]] + w[9] +
  1166. (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
  1167. h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^1]
  1168. h[kIdxD[1]] = h[kIdxC[1]]
  1169. h[kIdxC[1]] = h[kIdxB[1]]
  1170. h[kIdxB[1]] = tA[1]
  1171. tp = uint32(h[kIdxD[2]] + w[10] +
  1172. (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
  1173. h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^2]
  1174. h[kIdxD[2]] = h[kIdxC[2]]
  1175. h[kIdxC[2]] = h[kIdxB[2]]
  1176. h[kIdxB[2]] = tA[2]
  1177. tp = uint32(h[kIdxD[3]] + w[11] +
  1178. (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
  1179. h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^3]
  1180. h[kIdxD[3]] = h[kIdxC[3]]
  1181. h[kIdxC[3]] = h[kIdxB[3]]
  1182. h[kIdxB[3]] = tA[3]
  1183. tp = uint32(h[kIdxD[4]] + w[12] +
  1184. (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
  1185. h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^4]
  1186. h[kIdxD[4]] = h[kIdxC[4]]
  1187. h[kIdxC[4]] = h[kIdxB[4]]
  1188. h[kIdxB[4]] = tA[4]
  1189. tp = uint32(h[kIdxD[5]] + w[13] +
  1190. (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
  1191. h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^5]
  1192. h[kIdxD[5]] = h[kIdxC[5]]
  1193. h[kIdxC[5]] = h[kIdxB[5]]
  1194. h[kIdxB[5]] = tA[5]
  1195. tp = uint32(h[kIdxD[6]] + w[14] +
  1196. (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
  1197. h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^6]
  1198. h[kIdxD[6]] = h[kIdxC[6]]
  1199. h[kIdxC[6]] = h[kIdxB[6]]
  1200. h[kIdxB[6]] = tA[6]
  1201. tp = uint32(h[kIdxD[7]] + w[15] +
  1202. (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
  1203. h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+1]^7]
  1204. h[kIdxD[7]] = h[kIdxC[7]]
  1205. h[kIdxC[7]] = h[kIdxB[7]]
  1206. h[kIdxB[7]] = tA[7]
  1207. tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2)))
  1208. tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2)))
  1209. tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2)))
  1210. tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2)))
  1211. tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2)))
  1212. tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2)))
  1213. tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2)))
  1214. tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2)))
  1215. tp = uint32(h[kIdxD[0]] + w[16] +
  1216. (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
  1217. h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]]
  1218. h[kIdxD[0]] = h[kIdxC[0]]
  1219. h[kIdxC[0]] = h[kIdxB[0]]
  1220. h[kIdxB[0]] = tA[0]
  1221. tp = uint32(h[kIdxD[1]] + w[17] +
  1222. (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
  1223. h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^1]
  1224. h[kIdxD[1]] = h[kIdxC[1]]
  1225. h[kIdxC[1]] = h[kIdxB[1]]
  1226. h[kIdxB[1]] = tA[1]
  1227. tp = uint32(h[kIdxD[2]] + w[18] +
  1228. (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
  1229. h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^2]
  1230. h[kIdxD[2]] = h[kIdxC[2]]
  1231. h[kIdxC[2]] = h[kIdxB[2]]
  1232. h[kIdxB[2]] = tA[2]
  1233. tp = uint32(h[kIdxD[3]] + w[19] +
  1234. (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
  1235. h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^3]
  1236. h[kIdxD[3]] = h[kIdxC[3]]
  1237. h[kIdxC[3]] = h[kIdxB[3]]
  1238. h[kIdxB[3]] = tA[3]
  1239. tp = uint32(h[kIdxD[4]] + w[20] +
  1240. (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
  1241. h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^4]
  1242. h[kIdxD[4]] = h[kIdxC[4]]
  1243. h[kIdxC[4]] = h[kIdxB[4]]
  1244. h[kIdxB[4]] = tA[4]
  1245. tp = uint32(h[kIdxD[5]] + w[21] +
  1246. (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
  1247. h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^5]
  1248. h[kIdxD[5]] = h[kIdxC[5]]
  1249. h[kIdxC[5]] = h[kIdxB[5]]
  1250. h[kIdxB[5]] = tA[5]
  1251. tp = uint32(h[kIdxD[6]] + w[22] +
  1252. (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
  1253. h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^6]
  1254. h[kIdxD[6]] = h[kIdxC[6]]
  1255. h[kIdxC[6]] = h[kIdxB[6]]
  1256. h[kIdxB[6]] = tA[6]
  1257. tp = uint32(h[kIdxD[7]] + w[23] +
  1258. (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
  1259. h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+2]^7]
  1260. h[kIdxD[7]] = h[kIdxC[7]]
  1261. h[kIdxC[7]] = h[kIdxB[7]]
  1262. h[kIdxB[7]] = tA[7]
  1263. tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3)))
  1264. tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3)))
  1265. tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3)))
  1266. tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3)))
  1267. tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3)))
  1268. tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3)))
  1269. tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3)))
  1270. tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3)))
  1271. tp = uint32(h[kIdxD[0]] + w[24] +
  1272. (((h[kIdxB[0]] ^ h[kIdxC[0]]) & h[kIdxA[0]]) ^ h[kIdxC[0]]))
  1273. h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]]
  1274. h[kIdxD[0]] = h[kIdxC[0]]
  1275. h[kIdxC[0]] = h[kIdxB[0]]
  1276. h[kIdxB[0]] = tA[0]
  1277. tp = uint32(h[kIdxD[1]] + w[25] +
  1278. (((h[kIdxB[1]] ^ h[kIdxC[1]]) & h[kIdxA[1]]) ^ h[kIdxC[1]]))
  1279. h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^1]
  1280. h[kIdxD[1]] = h[kIdxC[1]]
  1281. h[kIdxC[1]] = h[kIdxB[1]]
  1282. h[kIdxB[1]] = tA[1]
  1283. tp = uint32(h[kIdxD[2]] + w[26] +
  1284. (((h[kIdxB[2]] ^ h[kIdxC[2]]) & h[kIdxA[2]]) ^ h[kIdxC[2]]))
  1285. h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^2]
  1286. h[kIdxD[2]] = h[kIdxC[2]]
  1287. h[kIdxC[2]] = h[kIdxB[2]]
  1288. h[kIdxB[2]] = tA[2]
  1289. tp = uint32(h[kIdxD[3]] + w[27] +
  1290. (((h[kIdxB[3]] ^ h[kIdxC[3]]) & h[kIdxA[3]]) ^ h[kIdxC[3]]))
  1291. h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^3]
  1292. h[kIdxD[3]] = h[kIdxC[3]]
  1293. h[kIdxC[3]] = h[kIdxB[3]]
  1294. h[kIdxB[3]] = tA[3]
  1295. tp = uint32(h[kIdxD[4]] + w[28] +
  1296. (((h[kIdxB[4]] ^ h[kIdxC[4]]) & h[kIdxA[4]]) ^ h[kIdxC[4]]))
  1297. h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^4]
  1298. h[kIdxD[4]] = h[kIdxC[4]]
  1299. h[kIdxC[4]] = h[kIdxB[4]]
  1300. h[kIdxB[4]] = tA[4]
  1301. tp = uint32(h[kIdxD[5]] + w[29] +
  1302. (((h[kIdxB[5]] ^ h[kIdxC[5]]) & h[kIdxA[5]]) ^ h[kIdxC[5]]))
  1303. h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^5]
  1304. h[kIdxD[5]] = h[kIdxC[5]]
  1305. h[kIdxC[5]] = h[kIdxB[5]]
  1306. h[kIdxB[5]] = tA[5]
  1307. tp = uint32(h[kIdxD[6]] + w[30] +
  1308. (((h[kIdxB[6]] ^ h[kIdxC[6]]) & h[kIdxA[6]]) ^ h[kIdxC[6]]))
  1309. h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^6]
  1310. h[kIdxD[6]] = h[kIdxC[6]]
  1311. h[kIdxC[6]] = h[kIdxB[6]]
  1312. h[kIdxB[6]] = tA[6]
  1313. tp = uint32(h[kIdxD[7]] + w[31] +
  1314. (((h[kIdxB[7]] ^ h[kIdxC[7]]) & h[kIdxA[7]]) ^ h[kIdxC[7]]))
  1315. h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+3]^7]
  1316. h[kIdxD[7]] = h[kIdxC[7]]
  1317. h[kIdxC[7]] = h[kIdxB[7]]
  1318. h[kIdxB[7]] = tA[7]
  1319. tA[0] = ((h[0] << p0) | (h[0] >> (32 - p0)))
  1320. tA[1] = ((h[1] << p0) | (h[1] >> (32 - p0)))
  1321. tA[2] = ((h[2] << p0) | (h[2] >> (32 - p0)))
  1322. tA[3] = ((h[3] << p0) | (h[3] >> (32 - p0)))
  1323. tA[4] = ((h[4] << p0) | (h[4] >> (32 - p0)))
  1324. tA[5] = ((h[5] << p0) | (h[5] >> (32 - p0)))
  1325. tA[6] = ((h[6] << p0) | (h[6] >> (32 - p0)))
  1326. tA[7] = ((h[7] << p0) | (h[7] >> (32 - p0)))
  1327. tp = uint32(h[kIdxD[0]] + w[32] +
  1328. ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
  1329. h[kIdxA[0]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]]
  1330. h[kIdxD[0]] = h[kIdxC[0]]
  1331. h[kIdxC[0]] = h[kIdxB[0]]
  1332. h[kIdxB[0]] = tA[0]
  1333. tp = uint32(h[kIdxD[1]] + w[33] +
  1334. ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
  1335. h[kIdxA[1]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^1]
  1336. h[kIdxD[1]] = h[kIdxC[1]]
  1337. h[kIdxC[1]] = h[kIdxB[1]]
  1338. h[kIdxB[1]] = tA[1]
  1339. tp = uint32(h[kIdxD[2]] + w[34] +
  1340. ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
  1341. h[kIdxA[2]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^2]
  1342. h[kIdxD[2]] = h[kIdxC[2]]
  1343. h[kIdxC[2]] = h[kIdxB[2]]
  1344. h[kIdxB[2]] = tA[2]
  1345. tp = uint32(h[kIdxD[3]] + w[35] +
  1346. ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
  1347. h[kIdxA[3]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^3]
  1348. h[kIdxD[3]] = h[kIdxC[3]]
  1349. h[kIdxC[3]] = h[kIdxB[3]]
  1350. h[kIdxB[3]] = tA[3]
  1351. tp = uint32(h[kIdxD[4]] + w[36] +
  1352. ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
  1353. h[kIdxA[4]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^4]
  1354. h[kIdxD[4]] = h[kIdxC[4]]
  1355. h[kIdxC[4]] = h[kIdxB[4]]
  1356. h[kIdxB[4]] = tA[4]
  1357. tp = uint32(h[kIdxD[5]] + w[37] +
  1358. ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
  1359. h[kIdxA[5]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^5]
  1360. h[kIdxD[5]] = h[kIdxC[5]]
  1361. h[kIdxC[5]] = h[kIdxB[5]]
  1362. h[kIdxB[5]] = tA[5]
  1363. tp = uint32(h[kIdxD[6]] + w[38] +
  1364. ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
  1365. h[kIdxA[6]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^6]
  1366. h[kIdxD[6]] = h[kIdxC[6]]
  1367. h[kIdxC[6]] = h[kIdxB[6]]
  1368. h[kIdxB[6]] = tA[6]
  1369. tp = uint32(h[kIdxD[7]] + w[39] +
  1370. ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
  1371. h[kIdxA[7]] = ((tp << p1) | (tp >> (32 - p1))) + tA[kPrems[isp+4]^7]
  1372. h[kIdxD[7]] = h[kIdxC[7]]
  1373. h[kIdxC[7]] = h[kIdxB[7]]
  1374. h[kIdxB[7]] = tA[7]
  1375. tA[0] = ((h[0] << p1) | (h[0] >> (32 - p1)))
  1376. tA[1] = ((h[1] << p1) | (h[1] >> (32 - p1)))
  1377. tA[2] = ((h[2] << p1) | (h[2] >> (32 - p1)))
  1378. tA[3] = ((h[3] << p1) | (h[3] >> (32 - p1)))
  1379. tA[4] = ((h[4] << p1) | (h[4] >> (32 - p1)))
  1380. tA[5] = ((h[5] << p1) | (h[5] >> (32 - p1)))
  1381. tA[6] = ((h[6] << p1) | (h[6] >> (32 - p1)))
  1382. tA[7] = ((h[7] << p1) | (h[7] >> (32 - p1)))
  1383. tp = uint32(h[kIdxD[0]] + w[40] +
  1384. ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
  1385. h[kIdxA[0]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]]
  1386. h[kIdxD[0]] = h[kIdxC[0]]
  1387. h[kIdxC[0]] = h[kIdxB[0]]
  1388. h[kIdxB[0]] = tA[0]
  1389. tp = uint32(h[kIdxD[1]] + w[41] +
  1390. ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
  1391. h[kIdxA[1]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^1]
  1392. h[kIdxD[1]] = h[kIdxC[1]]
  1393. h[kIdxC[1]] = h[kIdxB[1]]
  1394. h[kIdxB[1]] = tA[1]
  1395. tp = uint32(h[kIdxD[2]] + w[42] +
  1396. ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
  1397. h[kIdxA[2]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^2]
  1398. h[kIdxD[2]] = h[kIdxC[2]]
  1399. h[kIdxC[2]] = h[kIdxB[2]]
  1400. h[kIdxB[2]] = tA[2]
  1401. tp = uint32(h[kIdxD[3]] + w[43] +
  1402. ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
  1403. h[kIdxA[3]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^3]
  1404. h[kIdxD[3]] = h[kIdxC[3]]
  1405. h[kIdxC[3]] = h[kIdxB[3]]
  1406. h[kIdxB[3]] = tA[3]
  1407. tp = uint32(h[kIdxD[4]] + w[44] +
  1408. ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
  1409. h[kIdxA[4]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^4]
  1410. h[kIdxD[4]] = h[kIdxC[4]]
  1411. h[kIdxC[4]] = h[kIdxB[4]]
  1412. h[kIdxB[4]] = tA[4]
  1413. tp = uint32(h[kIdxD[5]] + w[45] +
  1414. ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
  1415. h[kIdxA[5]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^5]
  1416. h[kIdxD[5]] = h[kIdxC[5]]
  1417. h[kIdxC[5]] = h[kIdxB[5]]
  1418. h[kIdxB[5]] = tA[5]
  1419. tp = uint32(h[kIdxD[6]] + w[46] +
  1420. ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
  1421. h[kIdxA[6]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^6]
  1422. h[kIdxD[6]] = h[kIdxC[6]]
  1423. h[kIdxC[6]] = h[kIdxB[6]]
  1424. h[kIdxB[6]] = tA[6]
  1425. tp = uint32(h[kIdxD[7]] + w[47] +
  1426. ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
  1427. h[kIdxA[7]] = ((tp << p2) | (tp >> (32 - p2))) + tA[kPrems[isp+5]^7]
  1428. h[kIdxD[7]] = h[kIdxC[7]]
  1429. h[kIdxC[7]] = h[kIdxB[7]]
  1430. h[kIdxB[7]] = tA[7]
  1431. tA[0] = ((h[0] << p2) | (h[0] >> (32 - p2)))
  1432. tA[1] = ((h[1] << p2) | (h[1] >> (32 - p2)))
  1433. tA[2] = ((h[2] << p2) | (h[2] >> (32 - p2)))
  1434. tA[3] = ((h[3] << p2) | (h[3] >> (32 - p2)))
  1435. tA[4] = ((h[4] << p2) | (h[4] >> (32 - p2)))
  1436. tA[5] = ((h[5] << p2) | (h[5] >> (32 - p2)))
  1437. tA[6] = ((h[6] << p2) | (h[6] >> (32 - p2)))
  1438. tA[7] = ((h[7] << p2) | (h[7] >> (32 - p2)))
  1439. tp = uint32(h[kIdxD[0]] + w[48] +
  1440. ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
  1441. h[kIdxA[0]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]]
  1442. h[kIdxD[0]] = h[kIdxC[0]]
  1443. h[kIdxC[0]] = h[kIdxB[0]]
  1444. h[kIdxB[0]] = tA[0]
  1445. tp = uint32(h[kIdxD[1]] + w[49] +
  1446. ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
  1447. h[kIdxA[1]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^1]
  1448. h[kIdxD[1]] = h[kIdxC[1]]
  1449. h[kIdxC[1]] = h[kIdxB[1]]
  1450. h[kIdxB[1]] = tA[1]
  1451. tp = uint32(h[kIdxD[2]] + w[50] +
  1452. ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
  1453. h[kIdxA[2]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^2]
  1454. h[kIdxD[2]] = h[kIdxC[2]]
  1455. h[kIdxC[2]] = h[kIdxB[2]]
  1456. h[kIdxB[2]] = tA[2]
  1457. tp = uint32(h[kIdxD[3]] + w[51] +
  1458. ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
  1459. h[kIdxA[3]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^3]
  1460. h[kIdxD[3]] = h[kIdxC[3]]
  1461. h[kIdxC[3]] = h[kIdxB[3]]
  1462. h[kIdxB[3]] = tA[3]
  1463. tp = uint32(h[kIdxD[4]] + w[52] +
  1464. ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
  1465. h[kIdxA[4]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^4]
  1466. h[kIdxD[4]] = h[kIdxC[4]]
  1467. h[kIdxC[4]] = h[kIdxB[4]]
  1468. h[kIdxB[4]] = tA[4]
  1469. tp = uint32(h[kIdxD[5]] + w[53] +
  1470. ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
  1471. h[kIdxA[5]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^5]
  1472. h[kIdxD[5]] = h[kIdxC[5]]
  1473. h[kIdxC[5]] = h[kIdxB[5]]
  1474. h[kIdxB[5]] = tA[5]
  1475. tp = uint32(h[kIdxD[6]] + w[54] +
  1476. ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
  1477. h[kIdxA[6]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^6]
  1478. h[kIdxD[6]] = h[kIdxC[6]]
  1479. h[kIdxC[6]] = h[kIdxB[6]]
  1480. h[kIdxB[6]] = tA[6]
  1481. tp = uint32(h[kIdxD[7]] + w[55] +
  1482. ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
  1483. h[kIdxA[7]] = ((tp << p3) | (tp >> (32 - p3))) + tA[kPrems[isp+6]^7]
  1484. h[kIdxD[7]] = h[kIdxC[7]]
  1485. h[kIdxC[7]] = h[kIdxB[7]]
  1486. h[kIdxB[7]] = tA[7]
  1487. tA[0] = ((h[0] << p3) | (h[0] >> (32 - p3)))
  1488. tA[1] = ((h[1] << p3) | (h[1] >> (32 - p3)))
  1489. tA[2] = ((h[2] << p3) | (h[2] >> (32 - p3)))
  1490. tA[3] = ((h[3] << p3) | (h[3] >> (32 - p3)))
  1491. tA[4] = ((h[4] << p3) | (h[4] >> (32 - p3)))
  1492. tA[5] = ((h[5] << p3) | (h[5] >> (32 - p3)))
  1493. tA[6] = ((h[6] << p3) | (h[6] >> (32 - p3)))
  1494. tA[7] = ((h[7] << p3) | (h[7] >> (32 - p3)))
  1495. tp = uint32(h[kIdxD[0]] + w[56] +
  1496. ((h[kIdxA[0]] & h[kIdxB[0]]) | ((h[kIdxA[0]] | h[kIdxB[0]]) & h[kIdxC[0]])))
  1497. h[kIdxA[0]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]]
  1498. h[kIdxD[0]] = h[kIdxC[0]]
  1499. h[kIdxC[0]] = h[kIdxB[0]]
  1500. h[kIdxB[0]] = tA[0]
  1501. tp = uint32(h[kIdxD[1]] + w[57] +
  1502. ((h[kIdxA[1]] & h[kIdxB[1]]) | ((h[kIdxA[1]] | h[kIdxB[1]]) & h[kIdxC[1]])))
  1503. h[kIdxA[1]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^1]
  1504. h[kIdxD[1]] = h[kIdxC[1]]
  1505. h[kIdxC[1]] = h[kIdxB[1]]
  1506. h[kIdxB[1]] = tA[1]
  1507. tp = uint32(h[kIdxD[2]] + w[58] +
  1508. ((h[kIdxA[2]] & h[kIdxB[2]]) | ((h[kIdxA[2]] | h[kIdxB[2]]) & h[kIdxC[2]])))
  1509. h[kIdxA[2]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^2]
  1510. h[kIdxD[2]] = h[kIdxC[2]]
  1511. h[kIdxC[2]] = h[kIdxB[2]]
  1512. h[kIdxB[2]] = tA[2]
  1513. tp = uint32(h[kIdxD[3]] + w[59] +
  1514. ((h[kIdxA[3]] & h[kIdxB[3]]) | ((h[kIdxA[3]] | h[kIdxB[3]]) & h[kIdxC[3]])))
  1515. h[kIdxA[3]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^3]
  1516. h[kIdxD[3]] = h[kIdxC[3]]
  1517. h[kIdxC[3]] = h[kIdxB[3]]
  1518. h[kIdxB[3]] = tA[3]
  1519. tp = uint32(h[kIdxD[4]] + w[60] +
  1520. ((h[kIdxA[4]] & h[kIdxB[4]]) | ((h[kIdxA[4]] | h[kIdxB[4]]) & h[kIdxC[4]])))
  1521. h[kIdxA[4]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^4]
  1522. h[kIdxD[4]] = h[kIdxC[4]]
  1523. h[kIdxC[4]] = h[kIdxB[4]]
  1524. h[kIdxB[4]] = tA[4]
  1525. tp = uint32(h[kIdxD[5]] + w[61] +
  1526. ((h[kIdxA[5]] & h[kIdxB[5]]) | ((h[kIdxA[5]] | h[kIdxB[5]]) & h[kIdxC[5]])))
  1527. h[kIdxA[5]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^5]
  1528. h[kIdxD[5]] = h[kIdxC[5]]
  1529. h[kIdxC[5]] = h[kIdxB[5]]
  1530. h[kIdxB[5]] = tA[5]
  1531. tp = uint32(h[kIdxD[6]] + w[62] +
  1532. ((h[kIdxA[6]] & h[kIdxB[6]]) | ((h[kIdxA[6]] | h[kIdxB[6]]) & h[kIdxC[6]])))
  1533. h[kIdxA[6]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^6]
  1534. h[kIdxD[6]] = h[kIdxC[6]]
  1535. h[kIdxC[6]] = h[kIdxB[6]]
  1536. h[kIdxB[6]] = tA[6]
  1537. tp = uint32(h[kIdxD[7]] + w[63] +
  1538. ((h[kIdxA[7]] & h[kIdxB[7]]) | ((h[kIdxA[7]] | h[kIdxB[7]]) & h[kIdxC[7]])))
  1539. h[kIdxA[7]] = ((tp << p0) | (tp >> (32 - p0))) + tA[kPrems[isp+7]^7]
  1540. h[kIdxD[7]] = h[kIdxC[7]]
  1541. h[kIdxC[7]] = h[kIdxB[7]]
  1542. h[kIdxB[7]] = tA[7]
  1543. }
  1544. ////////////////
  1545. var kInit = []uint32{
  1546. uint32(0x0BA16B95), uint32(0x72F999AD),
  1547. uint32(0x9FECC2AE), uint32(0xBA3264FC),
  1548. uint32(0x5E894929), uint32(0x8E9F30E5),
  1549. uint32(0x2F1DAA37), uint32(0xF0F2C558),
  1550. uint32(0xAC506643), uint32(0xA90635A5),
  1551. uint32(0xE25B878B), uint32(0xAAB7878F),
  1552. uint32(0x88817F7A), uint32(0x0A02892B),
  1553. uint32(0x559A7550), uint32(0x598F657E),
  1554. uint32(0x7EEF60A1), uint32(0x6B70E3E8),
  1555. uint32(0x9C1714D1), uint32(0xB958E2A8),
  1556. uint32(0xAB02675E), uint32(0xED1C014F),
  1557. uint32(0xCD8D65BB), uint32(0xFDB7A257),
  1558. uint32(0x09254899), uint32(0xD699C7BC),
  1559. uint32(0x9019B6DC), uint32(0x2B9022E4),
  1560. uint32(0x8FA14956), uint32(0x21BF9BD3),
  1561. uint32(0xB94D0943), uint32(0x6FFDDC22),
  1562. }
  1563. var kIdxA = [8]uint8{
  1564. 0, 1, 2, 3, 4, 5, 6, 7,
  1565. }
  1566. var kIdxB = [8]uint8{
  1567. 8, 9, 10, 11, 12, 13, 14, 15,
  1568. }
  1569. var kIdxC = [8]uint8{
  1570. 16, 17, 18, 19, 20, 21, 22, 23,
  1571. }
  1572. var kIdxD = [8]uint8{
  1573. 24, 25, 26, 27, 28, 29, 30, 31,
  1574. }
  1575. var kPrems = []uint8{
  1576. 1, 6, 2, 3, 5, 7, 4, 1, 6, 2, 3,
  1577. }
  1578. var kPrem = [7][8]uint8{
  1579. {1, 0, 3, 2, 5, 4, 7, 6},
  1580. {6, 7, 4, 5, 2, 3, 0, 1},
  1581. {2, 3, 0, 1, 6, 7, 4, 5},
  1582. {3, 2, 1, 0, 7, 6, 5, 4},
  1583. {5, 4, 7, 6, 1, 0, 3, 2},
  1584. {7, 6, 5, 4, 3, 2, 1, 0},
  1585. {4, 5, 6, 7, 0, 1, 2, 3},
  1586. }
  1587. var wbp = [32]uintptr{
  1588. 4 << 4, 6 << 4, 0 << 4, 2 << 4,
  1589. 7 << 4, 5 << 4, 3 << 4, 1 << 4,
  1590. 15 << 4, 11 << 4, 12 << 4, 8 << 4,
  1591. 9 << 4, 13 << 4, 10 << 4, 14 << 4,
  1592. 17 << 4, 18 << 4, 23 << 4, 20 << 4,
  1593. 22 << 4, 21 << 4, 16 << 4, 19 << 4,
  1594. 30 << 4, 24 << 4, 25 << 4, 31 << 4,
  1595. 27 << 4, 29 << 4, 28 << 4, 26 << 4,
  1596. }
  1597. var kAlphaTab = []int32{
  1598. 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
  1599. 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
  1600. 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
  1601. 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
  1602. 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
  1603. 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
  1604. 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
  1605. 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
  1606. 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
  1607. 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
  1608. 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
  1609. 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
  1610. 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
  1611. 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
  1612. 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
  1613. 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
  1614. 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
  1615. 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
  1616. 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
  1617. 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
  1618. 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
  1619. 95, 40, 98, 163,
  1620. }
  1621. var kYOffA = []int32{
  1622. 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
  1623. 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
  1624. 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
  1625. 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
  1626. 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
  1627. 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
  1628. 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
  1629. 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
  1630. 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
  1631. 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
  1632. 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
  1633. 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
  1634. 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
  1635. 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
  1636. 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
  1637. 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
  1638. 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
  1639. 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
  1640. 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
  1641. 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
  1642. 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
  1643. 46, 45, 139, 41,
  1644. }
  1645. var kYOffB = []int32{
  1646. 2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20,
  1647. 111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89,
  1648. 49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239,
  1649. 253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79,
  1650. 96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226,
  1651. 248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115,
  1652. 17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208,
  1653. 57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40,
  1654. 189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45,
  1655. 187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107,
  1656. 77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210,
  1657. 139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6,
  1658. 160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190,
  1659. 106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208,
  1660. 181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127,
  1661. 96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193,
  1662. 0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44,
  1663. 245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9,
  1664. 210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94,
  1665. 53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185,
  1666. 10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156,
  1667. 236, 192, 108, 86,
  1668. }