Lines Matching full:h1

180 my ($h0,$h1,$h2)=("%r14","%rbx","%r10");
196 mulq $h1 # h1*r0
201 mulq $h1 # h1*s1
202 mov $h2,$h1 # borrow $h1
206 imulq $s1,$h1 # h2*s1
207 add $h1,$d2
208 mov $d1,$h1
212 add $d2,$h1
222 adc \$0,$h1
330 mov 8($ctx),$h1
342 adc 8($inp),$h1
358 mov $h1,8($ctx)
419 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
440 mov $r1,$h1
472 mov $h1,%rax
485 mov $h1,$d1
530 mov $h1,%rax
536 mov $h1,$d1
570 mov $h1,%rax
576 mov $h1,$d1
649 mov $d2#d,$h1#d
655 shr \$12,$h1
658 adc $d2,$h1
663 add $d1,$h1
673 adc \$0,$h1
682 adc 8($inp),$h1
695 mov $h1,$r0
696 mov $h1,$r1
701 shr \$14,$h1
706 and \$0x3ffffff,$h1 # h[3]
713 vmovd %rdx#d,$H1
715 vmovd $h1#d,$H3
722 mov $h1,8($ctx)
731 mov $h1#d,12($ctx)
776 mov 8($ctx),$h1
788 adc 8($inp),$h1
800 mov $h1,$d1
801 mov $h1,$d2
806 shr \$14,$h1
811 and \$0x3ffffff,$h1 # h[3]
815 vmovd %rdx#d,$H1
817 vmovd $h1#d,$H3
845 vmovd 4*1($ctx),$H1
970 # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
971 # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
972 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
973 # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
974 # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
980 vpmuludq $T1,$D4,$D1 # d1 = h1*r0
989 vmovdqa $H1,0x10(%r11) #
990 vpmuludq $T3,$H2,$H1 # h3*r1
992 vpaddq $H1,$D4,$D4 # d4 += h3*r1
995 vpmuludq $T1,$H2,$H1 # h1*r1
998 vpaddq $H1,$D2,$D2 # d2 += h1*r1
1006 vpmuludq $T1,$H3,$H1 # h1*r2
1008 vpaddq $H1,$D3,$D3 # d3 += h1*r2
1018 vpmuludq $T1,$H2,$H1 # h1*r3
1020 vpaddq $H1,$D4,$D4 # d4 += h1*r3
1023 vpmuludq $T3,$H3,$H1 # h3*s3
1026 vpaddq $H1,$D1,$D1 # d1 += h3*s3
1031 vmovdqu 16*1($inp),$H1 #
1038 vpsrldq \$6,$H1,$H3 #
1040 vpmuludq $T1,$H4,$T0 # h1*s4
1041 vpunpckhqdq $H1,$H0,$H4 # 4
1044 vpaddq $T0,$D0,$D0 # d0 += h1*s4
1046 vpunpcklqdq $H1,$H0,$H0 # 0:1
1051 vpsrlq \$26,$H0,$H1
1054 vpand $MASK,$H1,$H1 # 1
1062 vpaddq 0x10(%r11),$H1,$H1
1075 # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
1076 # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
1077 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
1078 # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
1079 # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
1082 vpmuludq $H1,$T4,$T1 # h1*r0
1100 vpmuludq $H1,$T2,$T1 # h1*r1
1102 vpaddq $T1,$D2,$D2 # d2 += h1*r1
1107 vpmuludq $H1,$T3,$T1 # h1*r2
1109 vpaddq $T1,$D3,$D3 # d3 += h1*r2
1117 vpmuludq $H1,$T2,$T1 # h1*r3
1121 vpaddq $T1,$D4,$D4 # d4 += h1*r3
1142 vpmuludq $H1,$T4,$H0
1145 vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
1173 vpaddq $D0,$D1,$H1 # h0 -> h1
1178 vpsrlq \$26,$H1,$D1
1179 vpand $MASK,$H1,$H1
1180 vpaddq $D1,$H2,$H2 # h1 -> h2
1192 vpaddq $D0,$H1,$H1 # h0 -> h1
1210 vpaddq $H1,$T1,$T1
1217 vmovdqa $H1,0x10(%r11)
1221 # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
1222 # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
1223 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
1224 # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
1225 # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
1230 vpmuludq $T1,$D4,$D1 # d1 = h1*r0
1237 vpmuludq $T2,$H2,$H1 # h2*r1
1238 vpaddq $H1,$D3,$D3 # d3 += h2*r1
1240 vpmuludq $T1,$H2,$H0 # h1*r1
1241 vpaddq $H0,$D2,$D2 # d2 += h1*r1
1248 vpmuludq $T2,$H4,$H1 # h2*r2
1249 vpaddq $H1,$D4,$D4 # d4 += h2*r2
1250 vpmuludq $T1,$H4,$H0 # h1*r2
1251 vpaddq $H0,$D3,$D3 # d3 += h1*r2
1255 vpmuludq $T4,$H2,$H1 # h4*s2
1256 vpaddq $H1,$D1,$D1 # d1 += h4*s2
1261 vpmuludq $T1,$H3,$H0 # h1*r3
1262 vpaddq $H0,$D4,$D4 # d4 += h1*r3
1266 vpmuludq $T4,$H4,$H1 # h4*s3
1267 vpaddq $H1,$D2,$D2 # d2 += h4*s3
1276 vpmuludq $T4,$H3,$H1 # h4*s4
1277 vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4
1280 vpmuludq $T2,$H3,$H1 # h2*s4
1281 vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4
1282 vpmuludq $T1,$H3,$H3 # h1*s4
1283 vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4
1288 vmovdqu 16*1($inp),$H1
1291 vpsrldq \$6,$H1,$H3
1292 vpunpckhqdq $H1,$H0,$H4 # 4
1293 vpunpcklqdq $H1,$H0,$H0 # 0:1
1297 vpsrlq \$26,$H0,$H1
1300 vpand $MASK,$H1,$H1 # 1
1308 vpaddq 0x10(%r11),$H1,$H1
1318 vpmuludq $H1,$T4,$T1 # h1*r0
1319 vpaddq $T1,$D1,$D1 # d1 += h1*r0
1334 vpmuludq $H1,$T2,$T0 # h1*r1
1335 vpaddq $T0,$D2,$D2 # d2 += h1*r1
1344 vpmuludq $H1,$T4,$T0 # h1*r2
1345 vpaddq $T0,$D3,$D3 # d3 += h1*r2
1355 vpmuludq $H1,$T3,$T0 # h1*r3
1356 vpaddq $T0,$D4,$D4 # d4 += h1*r3
1376 vpmuludq $H1,$T3,$T3 # h1*s4
1377 vpaddq $T3,$D0,$D0 # d0 += h1*s4
1403 vpaddq $H0,$D1,$D1 # h0 -> h1
1408 vpsrlq \$26,$D1,$H1
1410 vpaddq $H1,$D2,$D2 # h1 -> h2
1422 vpaddq $H0,$D1,$D1 # h0 -> h1
1483 add %rax,%r9 # h1
1516 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1571 mov $d2#d,$h1#d
1577 shr \$12,$h1
1580 adc $d2,$h1
1585 add $d1,$h1
1595 adc \$0,$h1
1605 adc 8($inp),$h1
1623 mov $h1,$r0
1624 mov $h1,$r1
1629 shr \$14,$h1
1634 and \$0x3ffffff,$h1 # h[3]
1641 vmovd %rdx#d,%x#$H1
1643 vmovd $h1#d,%x#$H3
1650 mov $h1,8($ctx)
1659 mov $h1#d,12($ctx)
1704 mov 8($ctx),$h1
1717 adc 8($inp),$h1
1733 mov $h1,$d1
1734 mov $h1,$d2
1739 shr \$14,$h1
1744 and \$0x3ffffff,$h1 # h[3]
1748 vmovd %rdx#d,%x#$H1
1750 vmovd $h1#d,%x#$H3
1789 vmovd 4*1($ctx),%x#$H1
1906 vpaddq $H1,$T1,$H1
1914 # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
1915 # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
1916 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
1917 # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
1918 # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
1923 # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4
1924 # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4
1925 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
1926 # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3
1927 # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4
1936 vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp
1938 vpaddq $H2,$D2,$D2 # d2 += h1*r1
1946 vpmuludq $H1,$T0,$H2 # h1*r0
1948 vpaddq $H2,$D1,$D1 # d1 += h1*r0
1962 vpmuludq $H1,$T2,$T4 # h1*r2
1964 vpaddq $T4,$D3,$D3 # d3 += h1*r2
1969 vpmuludq $H1,$H2,$T4 # h1*r3
1972 vpaddq $T4,$D4,$D4 # d4 += h1*r3
1988 vpmuludq $H1,$S4,$H0 # h1*s4
1991 vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
2002 vpaddq $D0,$D1,$H1 # h0 -> h1
2009 vpsrlq \$26,$H1,$D1
2010 vpand $MASK,$H1,$H1
2011 vpaddq $D1,$H2,$H2 # h1 -> h2
2029 vpaddq $D0,$H1,$H1 # h0 -> h1
2056 vpaddq $H1,$T1,$H1
2071 vpmuludq $H1,$T1,$H2 # h1*r1
2073 vpaddq $H2,$D2,$D2 # d2 += h1*r1
2080 vpmuludq $H1,$T0,$H2 # h1*r0
2083 vpaddq $H2,$D1,$D1 # d1 += h1*r0
2094 vpmuludq $H1,$T2,$T4 # h1*r2
2096 vpaddq $T4,$D3,$D3 # d3 += h1*r2
2099 vpmuludq $H1,$H2,$T4 # h1*r3
2101 vpaddq $T4,$D4,$D4 # d4 += h1*r3
2113 vpmuludq $H1,$S4,$H0 # h1*s4
2116 vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
2152 vpaddq $D0,$D1,$H1 # h0 -> h1
2157 vpsrlq \$26,$H1,$D1
2158 vpand $MASK,$H1,$H1
2159 vpaddq $D1,$H2,$H2 # h1 -> h2
2171 vpaddq $D0,$H1,$H1 # h0 -> h1
2178 vmovd %x#$H1,`4*1-48-64`($ctx)
2213 map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
2454 # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
2455 # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
2456 # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
2457 # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
2458 # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
2463 # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4
2464 # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0
2465 # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1
2466 # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2
2467 # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3
2478 vpaddq $H1,$T1,$H1 # accumulate input
2494 vpmuludq $H1,$R2,$M3
2495 vpmuludq $H1,$R3,$M4
2496 vpmuludq $H1,$S4,$M0
2498 vpaddq $M3,$D3,$D3 # d3 += h1*r2
2499 vpaddq $M4,$D4,$D4 # d4 += h1*r3
2500 vpaddq $M0,$D0,$D0 # d0 += h1*s4
2508 vpmuludq $H1,$R0,$M1
2509 vpmuludq $H1,$R1,$M2
2512 vpaddq $M1,$D1,$D1 # d1 += h1*r0
2513 vpaddq $M2,$D2,$D2 # d2 += h1*r1
2530 vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
2547 vpaddq $D0,$H1,$H1 # h0 -> h1
2554 vpsrlq \$26,$H1,$D1
2555 vpandq $MASK,$H1,$H1
2556 vpaddq $D1,$H2,$H2 # h1 -> h2
2573 vpaddq $D0,$H1,$H1 # h0 -> h1
2620 vpaddq $H1,$T1,$H1 # accumulate input
2635 vpmuludq $H1,$R2,$M3
2636 vpmuludq $H1,$R3,$M4
2637 vpmuludq $H1,$S4,$M0
2639 vpaddq $M3,$D3,$D3 # d3 += h1*r2
2640 vpaddq $M4,$D4,$D4 # d4 += h1*r3
2641 vpaddq $M0,$D0,$D0 # d0 += h1*s4
2647 vpmuludq $H1,$R0,$M1
2648 vpmuludq $H1,$R1,$M2
2651 vpaddq $M1,$D1,$D1 # d1 += h1*r0
2652 vpaddq $M2,$D2,$D2 # d2 += h1*r1
2670 vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
2680 vpermq \$0xb1,$H1,$D1
2685 vpaddq $D1,$H1,$H1
2692 vpermq \$0x2,$H1,$D1
2697 vpaddq $D1,$H1,$H1
2703 vextracti64x4 \$0x1,$H1,%y#$D1
2708 vpaddq $D1,$H1,${H1}{%k3}{z}
2712 map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
2728 vpaddq $D0,$H1,$H1 # h0 -> h1
2733 vpsrlq \$26,$H1,$D1
2734 vpand $MASK,$H1,$H1
2737 vpaddq $D1,$H2,$H2 # h1 -> h2
2755 vpaddq $D0,$H1,$H1 # h0 -> h1
2769 vmovd %x#$H1,`4*1-48-64`($ctx)
2899 my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
2964 vpermq \$0b01010101,$Dlo,${H1}{%k7}{z}
2973 vpmadd52luq $r1r0s2,$H1,$Dlo
2974 vpmadd52huq $r1r0s2,$H1,$Dhi
3022 my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
3048 vmovq 8($ctx),%x#$H1
3100 vmovdqa $R1,$H1
3126 vpmadd52luq $H1,$S2,$D0lo
3127 vpmadd52huq $H1,$S2,$D0hi
3128 vpmadd52luq $H1,$R0,$D1lo
3129 vpmadd52huq $H1,$R0,$D1hi
3130 vpmadd52luq $H1,$R1,$D2lo
3131 vpmadd52huq $H1,$R1,$D2hi
3144 vpandq $mask44,$D1lo,$H1
3162 vpaddq $tmp,$H1,$H1
3167 vpunpcklqdq $R1,$H1,$R1 # 1,2
3168 vpbroadcastq %x#$H1,%x#$H1 # 2,2
3186 vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4
3199 vmovq 8($ctx),%x#$H1
3263 vpaddq $T1,$H1,$H1
3290 vpmadd52luq $H1,$S2,$D0lo
3291 vpmadd52huq $H1,$S2,$D0hi
3292 vpmadd52luq $H1,$R0,$D1lo
3293 vpmadd52huq $H1,$R0,$D1hi
3294 vpmadd52luq $H1,$R1,$D2lo
3295 vpmadd52huq $H1,$R1,$D2hi
3310 vpandq $mask44,$D1lo,$H1
3334 vpaddq $tmp,$H1,$H1
3352 vpaddq $T1,$H1,$H1
3374 vpmadd52luq $H1,$S2,$D0lo
3375 vpmadd52huq $H1,$S2,$D0hi
3376 vpmadd52luq $H1,$R0,$D1lo
3377 vpmadd52huq $H1,$R0,$D1hi
3378 vpmadd52luq $H1,$R1,$D2lo
3379 vpmadd52huq $H1,$R1,$D2hi
3389 vpsrldq \$8,$D1hi,$H1
3395 vpaddq $H1,$D1hi,$D1hi
3402 vpermq \$0x2,$D1hi,$H1
3408 vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
3423 vpandq $mask44,$D1lo,$H1
3441 vpaddq $tmp,$H1,$H1
3448 vmovq %x#$H1,8($ctx)
3463 my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
3485 vmovq 8($ctx),%x#$H1
3575 map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
3628 vpaddq $T1,$H1,$H1
3655 vpmadd52luq $H1,$S2,$D0lo
3656 vpmadd52huq $H1,$S2,$D0hi
3657 vpmadd52luq $H1,$R0,$D1lo
3658 vpmadd52huq $H1,$R0,$D1hi
3659 vpmadd52luq $H1,$R1,$D2lo
3660 vpmadd52huq $H1,$R1,$D2hi
3675 vpandq $mask44,$D1lo,$H1
3699 vpaddq $tmp,$H1,$H1
3707 vpaddq $T1,$H1,$H1
3729 vpmadd52luq $H1,$SS2,$D0lo
3730 vpmadd52huq $H1,$SS2,$D0hi
3731 vpmadd52luq $H1,$RR0,$D1lo
3732 vpmadd52huq $H1,$RR0,$D1hi
3733 vpmadd52luq $H1,$RR1,$D2lo
3734 vpmadd52huq $H1,$RR1,$D2hi
3744 vpsrldq \$8,$D1hi,$H1
3750 vpaddq $H1,$D1hi,$D1hi
3757 vpermq \$0x2,$D1hi,$H1
3763 vpaddq $H1,$D1hi,$D1hi
3770 vextracti64x4 \$1,$D1hi,%y#$H1
3775 map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
3783 vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
3798 vpandq $mask44,$D1lo,$H1
3816 vpaddq $tmp,$H1,$H1
3821 vmovq %x#$H1,8($ctx)