Lines Matching +full:c +full:- +full:22
2 # Implement fast SHA-256 with AVX2 instructions. (x86_64)
4 # Copyright (C) 2013 Intel Corporation.
9 # Tim Chen <tim.c.chen@linux.intel.com>
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
41 # This code is described in an Intel White-Paper:
42 # "Fast SHA-256 Implementations on Intel Architecture Processors"
60 # Add reg to mem using reg-mem add and store
87 SHUF_00BA = %ymm10 # shuffle xBxA -> 00BA
88 SHUF_DC00 = %ymm12 # shuffle xDxC -> DC00
96 c = %ecx define
148 d = c
149 c = b define
161 addl \disp(%rsp, SRND), h # h = k + w + h # --
162 or c, y3 # y3 = a|c # MAJA
163 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
169 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1
174 rorx $22, a, y1 # y1 = a >> 22 # S0A
175 add h, d # d = k + w + h + d # --
177 and b, y3 # y3 = (a|c)&b # MAJA
178 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
179 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
184 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
186 and c, T1 # T1 = a&c # MAJB
188 add y0, y2 # y2 = S1 + CH # --
189 vpslld $(32-7), XTMP1, XTMP3
190 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
191 add y1, h # h = k + w + h + S0 # --
193 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
194 vpor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7
197 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
198 add y3, h # h = t1 + S0 + MAJ # --
209 addl offset(%rsp, SRND), h # h = k + w + h # --
210 or c, y3 # y3 = a|c # MAJA
213 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
222 rorx $22, a, y1 # y1 = a >> 22 # S0A
224 add h, d # d = k + w + h + d # --
226 vpslld $(32-18), XTMP1, XTMP1
227 and b, y3 # y3 = (a|c)&b # MAJA
228 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
234 vpxor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 ^ W[-15] ror 18
235 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
237 and c, T1 # T1 = a&c # MAJB
238 add y0, y2 # y2 = S1 + CH # --
241 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
242 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
243 add y1, h # h = k + w + h + S0 # --
245 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
246 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
247 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
248 add y3, h # h = t1 + S0 + MAJ # --
250 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
260 addl offset(%rsp, SRND), h # h = k + w + h # --
262 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
264 or c, y3 # y3 = a|c # MAJA
270 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
275 add h, d # d = k + w + h + d # --
276 and b, y3 # y3 = (a|c)&b # MAJA
279 rorx $22, a, y1 # y1 = a >> 22 # S0A
284 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
288 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
290 and c, T1 # T1 = a&c # MAJB
291 add y0, y2 # y2 = S1 + CH # --
292 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
294 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
295 add y1,h # h = k + w + h + S0 # --
296 add y2,d # d = k + w + h + d + S1 + CH = d + t1 # --
297 add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
299 add y3,h # h = t1 + S0 + MAJ # --
310 addl offset(%rsp, SRND), h # h = k + w + h # --
311 or c, y3 # y3 = a|c # MAJA
314 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
321 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
324 add h, d # d = k + w + h + d # --
325 and b, y3 # y3 = (a|c)&b # MAJA
327 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
332 rorx $22, a, y1 # y1 = a >> 22 # S0A
333 add y0, y2 # y2 = S1 + CH # --
336 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
337 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
343 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
345 and c, T1 # T1 = a&c # MAJB
346 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
348 add y1, h # h = k + w + h + S0 # --
349 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
350 add y3, h # h = t1 + S0 + MAJ # --
371 rorx $22, a, y1 # y1 = a >> 22 # S0A
374 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
376 addl \disp(%rsp, SRND), h # h = k + w + h # --
377 or c, y3 # y3 = a|c # MAJA
379 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
381 and b, y3 # y3 = (a|c)&b # MAJA
382 and c, T1 # T1 = a&c # MAJB
383 add y0, y2 # y2 = S1 + CH # --
386 add h, d # d = k + w + h + d # --
387 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
388 add y1, h # h = k + w + h + S0 # --
389 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
395 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
404 add y3, old_h # h = t1 + S0 + MAJ # --
409 rorx $22, a, y1 # y1 = a >> 22 # S0A
412 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
415 addl offset(%rsp, SRND), h # h = k + w + h # --
416 or c, y3 # y3 = a|c # MAJA
418 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
420 and b, y3 # y3 = (a|c)&b # MAJA
421 and c, T1 # T1 = a&c # MAJB
422 add y0, y2 # y2 = S1 + CH # --
425 add h, d # d = k + w + h + d # --
426 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
427 add y1, h # h = k + w + h + S0 # --
429 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
435 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
444 add y3, old_h # h = t1 + S0 + MAJ # --
449 rorx $22, a, y1 # y1 = a >> 22 # S0A
452 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
455 addl offset(%rsp, SRND), h # h = k + w + h # --
456 or c, y3 # y3 = a|c # MAJA
458 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
460 and b, y3 # y3 = (a|c)&b # MAJA
461 and c, T1 # T1 = a&c # MAJB
462 add y0, y2 # y2 = S1 + CH # --
465 add h, d # d = k + w + h + d # --
466 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
467 add y1, h # h = k + w + h + S0 # --
469 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
475 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
484 add y3, old_h # h = t1 + S0 + MAJ # --
489 rorx $22, a, y1 # y1 = a >> 22 # S0A
492 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
495 addl offset(%rsp, SRND), h # h = k + w + h # --
496 or c, y3 # y3 = a|c # MAJA
498 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
500 and b, y3 # y3 = (a|c)&b # MAJA
501 and c, T1 # T1 = a&c # MAJB
502 add y0, y2 # y2 = S1 + CH # --
505 add h, d # d = k + w + h + d # --
506 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
507 add y1, h # h = k + w + h + S0 # --
509 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
512 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
514 add y3, h # h = t1 + S0 + MAJ # --
538 and $-32, %rsp # align rsp to 32 byte boundary
542 lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block
551 mov 4*2(CTX), c
640 addm (4*2)(CTX),c
666 addm (4*2)(CTX),c
695 mov (4*2)(CTX),c
764 # shuffle xBxA -> 00BA
770 # shuffle xDxC -> DC00