Lines Matching +full:4 +full:l
19 * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
39 * Finally, original 'folding' approach is to split the long into 4 unsigned shorts
40 * add 4 ushorts, resulting in ushort/carry
58 .align 4
64 ldq_u $0,0($16) # L : Latency: 3
65 inslh $18,7,$4 # U : 0000000000AABBCC
66 ldq_u $1,8($16) # L : Latency: 3
67 sll $19,8,$7 # U : U L U L : 0x00000000 00aabb00
70 ldq_u $5,15($16) # L : Latency: 3
72 ldq_u $2,0($17) # L : U L U L : Latency: 3
76 ldq_u $3,8($17) # L : Latency: 3
77 sll $19,24,$19 # U : U U L U : 0x000000aa bb000000
80 ldq_u $23,15($17) # L : Latency: 3
82 addl $19,$7,$19 # E : U L U L : <sign bits>bbaabb00
86 or $18,$4,$18 # E : 000000CCDDAABBCC
87 extqh $5,$6,$5 # U : L U L U
92 extqh $3,$6,$22 # U : L U L U :
97 extqh $23,$6,$23 # U : L U L U :
99 srl $18,16,$4 # U : 0000000000CCDDAA
102 or $3,$23,$3 # E : U L U L : 2nd dst word complete
107 zap $4,0xa,$4 # U : U U L L : 0000000000CC00AA
109 or $18,$4,$18 # E : 00000000DDCCBBAA
112 addq $20,$2,$20 # E : U L U L
117 addq $20,$18,$20 # E : U L U L (1 cycle stall on $20)
127 addq $20,$18,$20 # E : U L U L :
133 srl $0,32,$0 # U : U L U L : (1 cycle stall on $0)
138 extwl $1,4,$1 # U : ushort[2] (1 cycle stall on $1)
144 nop # E : L U L U
147 not $0,$4 # E : complement (1 cycle stall on $0)
148 zapnot $4,3,$0 # U : clear upper garbage bits
149 /* (1 cycle stall on $4) */
150 ret # L0 : L U L U