Lines Matching refs:T0
419 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
886 vmovdqu 16*2($inp),$T0
890 vpsrldq \$6,$T0,$T2 # splat input
892 vpunpckhqdq $T1,$T0,$T4 # 4
893 vpunpcklqdq $T1,$T0,$T0 # 0:1
897 vpsrlq \$26,$T0,$T1
898 vpand $MASK,$T0,$T0 # 0
979 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1000 vpmuludq $T0,$H2,$H2 # h0*r1
1007 vpmuludq $T0,$H3,$H3 # h0*r2
1019 vpmuludq $T0,$H2,$H2 # h0*r3
1039 vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
1040 vpmuludq $T1,$H4,$T0 # h1*s4
1044 vpaddq $T0,$D0,$D0 # d0 += h1*s4
1081 vpmuludq $H0,$T4,$T0 # h0*r0
1083 vpaddq $T0,$D0,$D0
1086 vpmuludq $H2,$T4,$T0 # h2*r0
1088 vpaddq $T0,$D2,$D2
1091 vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
1094 vpaddq $T0,$D0,$D0 # d0 += h4*s1
1096 vpmuludq $H3,$T2,$T0 # h3*r1
1099 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1106 vpmuludq $H2,$T3,$T0 # h2*r2
1108 vpaddq $T0,$D4,$D4 # d4 += h2*r2
1112 vpmuludq $H4,$T4,$T0 # h4*s2
1114 vpaddq $T0,$D1,$D1 # d1 += h4*s2
1123 vpmuludq $H4,$T3,$T0 # h4*s3
1125 vpaddq $T0,$D2,$D2 # d2 += h4*s3
1126 vmovdqu 16*2($inp),$T0 # load input
1135 vpsrldq \$6,$T0,$T2 # splat input
1143 vpunpckhqdq $T1,$T0,$T4 # 4
1147 vpunpcklqdq $T1,$T0,$T0 # 0:1
1152 vpsrlq \$26,$T0,$T1
1154 vpand $MASK,$T0,$T0 # 0
1209 vpaddq $H0,$T0,$T0
1228 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1242 vpmuludq $T0,$H2,$H2 # h0*r1
1253 vpmuludq $T0,$H4,$H4 # h0*r2
1263 vpmuludq $T0,$H3,$H3 # h0*r3
1274 vpmuludq $T0,$H2,$H2 # h0*r4
1316 vpmuludq $H0,$T4,$T0 # h0*r0
1317 vpaddq $T0,$D0,$D0 # d0 += h0*r0
1320 vpmuludq $H2,$T4,$T0 # h2*r0
1321 vpaddq $T0,$D2,$D2 # d2 += h2*r0
1328 vpmuludq $H3,$T2,$T0 # h3*r1
1329 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1334 vpmuludq $H1,$T2,$T0 # h1*r1
1335 vpaddq $T0,$D2,$D2 # d2 += h1*r1
1344 vpmuludq $H1,$T4,$T0 # h1*r2
1345 vpaddq $T0,$D3,$D3 # d3 += h1*r2
1355 vpmuludq $H1,$T3,$T0 # h1*r3
1356 vpaddq $T0,$D4,$D4 # d4 += h1*r3
1363 vpmuludq $H3,$T4,$T0 # h3*s3
1364 vpaddq $T0,$D1,$D1 # d1 += h3*s3
1372 vpmuludq $H3,$T3,$T0 # h3*s4
1373 vpaddq $T0,$D2,$D2 # d2 += h3*s4
1386 vpsrldq \$8,$D0,$T0
1390 vpaddq $T0,$D0,$D0
1516 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1831 vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
1843 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1845 vpermd $T3,$T0,$T3
1847 vpermd $T4,$T0,$T4
1849 vpermd $D0,$T0,$D0
1851 vpermd $D1,$T0,$D1
1853 vpermd $D2,$T0,$D2
1855 vpermd $D3,$T0,$D3
1857 vpermd $D4,$T0,$D4
1859 vpermd $MASK,$T0,$MASK
1867 vmovdqu 16*0($inp),%x#$T0
1869 vinserti128 \$1,16*2($inp),$T0,$T0
1873 vpsrldq \$6,$T0,$T2 # splat input
1875 vpunpckhqdq $T1,$T0,$T4 # 4
1877 vpunpcklqdq $T1,$T0,$T0 # 0:1
1881 vpsrlq \$26,$T0,$T1
1884 vpand $MASK,$T0,$T0 # 0
1904 vpaddq $H0,$T0,$H0
1905 vmovdqa `32*0`(%rsp),$T0 # r0^4
1929 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1945 vpmuludq $H0,$T0,$T4 # h0*r0
1946 vpmuludq $H1,$T0,$H2 # h1*r0
1949 vpmuludq $H3,$T0,$T4 # h3*r0
1950 vpmuludq $H4,$T0,$H2 # h4*r0
1951 vmovdqu 16*0($inp),%x#$T0 # load input
1954 vinserti128 \$1,16*2($inp),$T0,$T0
1971 vpsrldq \$6,$T0,$T2 # splat input
1979 vpunpckhqdq $T1,$T0,$T4 # 4
1983 vpunpcklqdq $T1,$T0,$T0 # 0:1
2018 vpsrlq \$26,$T0,$T1
2037 vpand $MASK,$T0,$T0 # 0
2054 vpaddq $H0,$T0,$H0
2055 vmovdqu `32*0+4`(%rsp),$T0 # r0^4
2064 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
2079 vpmuludq $H0,$T0,$T4 # h0*r0
2080 vpmuludq $H1,$T0,$H2 # h1*r0
2084 vpmuludq $H3,$T0,$T4 # h3*r0
2085 vpmuludq $H4,$T0,$H2 # h4*r0
2125 vpsrldq \$8,$H0,$T0
2130 vpaddq $T0,$H0,$H0
2134 vpermq \$0x2,$H0,$T0
2139 vpaddq $T0,$H0,$H0
2211 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2252 vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
2262 vpermd $T0,$T2,$S1
2265 vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
2290 vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
2291 vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
2292 vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
2293 vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
2294 vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
2387 vpunpcklqdq $T4,$T3,$T0 # transpose input
2422 vpsrlq \$52,$T0,$T2 # splat input
2425 vpsrlq \$26,$T0,$T1
2429 vpandq $MASK,$T0,$T0 # 0
2470 vpaddq $H0,$T0,$H0
2503 vpunpcklqdq $T4,$T3,$T0 # transpose input
2536 vpsrlq \$52,$T0,$T2 # splat input
2563 vpsrlq \$26,$T0,$T1
2581 vpandq $MASK,$T0,$T0 # 0
2610 vpaddq $H0,$T0,$H0
2624 vmovdqu 16*0($inp),%x#$T0
2644 vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
2711 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2719 vpsrldq \$6,$T0,$T2 # splat input
2721 vpunpckhqdq $T1,$T0,$T4 # 4
2727 vpunpcklqdq $T1,$T0,$T0 # 0:1
2741 vpsrlq \$26,$T0,$T1
2748 vpand $MASK,$T0,$T0 # 0
2900 my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
2953 vmovdqu32 0($inp),%x#$T0 # load input as ----3210
2956 vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
2957 vpsrlvq $inp_shift,$T0,$T0
2958 vpandq $reduc_mask,$T0,$T0
2959 vporq $PAD,$T0,$T0
2961 vpaddq $T0,$Dlo,$Dlo # accumulate input
2979 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
2983 vpaddq $T0,$Dhi,$Dhi
2989 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
2992 vpermq \$0b10010011,$T0,$T0
2994 vpaddq $T0,$Dlo,$Dlo
2996 vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
2998 vpaddq $T0,$Dlo,$Dlo
2999 vpsllq \$2,$T0,$T0
3001 vpaddq $T0,$Dlo,$Dlo
3024 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3080 vpandq $mask44,$T1,$T0
3250 vpandq $mask44,$T1,$T0
3262 vpaddq $T0,$H0,$H0
3313 vpandq $mask44,$T1,$T0
3351 vpaddq $T0,$H0,$H0
3386 vpsrldq \$8,$D0lo,$T0
3390 vpaddq $T0,$D0lo,$D0lo
3396 vpermq \$0x2,$D0lo,$T0
3403 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
3465 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3569 vpunpcklqdq $R0,$RR0,$T0
3577 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3582 vshufi64x2 \$0x44,$R0,$T0,$RR0
3614 vpandq $mask44,$T1,$T0
3627 vpaddq $T0,$H0,$H0
3678 vpandq $mask44,$T1,$T0
3706 vpaddq $T0,$H0,$H0
3741 vpsrldq \$8,$D0lo,$T0
3745 vpaddq $T0,$D0lo,$D0lo
3751 vpermq \$0x2,$D0lo,$T0
3758 vpaddq $T0,$D0lo,$D0lo
3764 vextracti64x4 \$1,$D0lo,%y#$T0
3777 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3780 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}