Lines Matching refs:VEC_SIZE
31 # ifndef VEC_SIZE
32 # define VEC_SIZE 32 macro
68 and $((VEC_SIZE * 4) - 1), %ecx
69 cmp $(VEC_SIZE * 2), %ecx
72 and $-VEC_SIZE, %rsi
73 and $(VEC_SIZE - 1), %ecx
81 mov $VEC_SIZE, %r10
85 mov $(VEC_SIZE + 1), %r10
94 vpcmpeqb VEC_SIZE(%rsi), %ymmZ, %ymm2
98 add $VEC_SIZE, %r10
117 mov $VEC_SIZE, %rcx
120 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm2
123 add $VEC_SIZE, %rcx
125 sub $(VEC_SIZE * 3), %r8
136 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm3
139 add $VEC_SIZE, %rcx
141 sub $VEC_SIZE, %r8
152 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm4
155 add $VEC_SIZE, %rcx
157 sub $VEC_SIZE, %r8
168 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm2
171 add $VEC_SIZE, %rcx
173 sub $VEC_SIZE, %r8
184 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm2
187 add $VEC_SIZE, %rcx
189 sub $VEC_SIZE, %r8
199 vmovdqa VEC_SIZE(%rsi, %rcx), %ymm3
203 add $VEC_SIZE, %rcx
205 sub $VEC_SIZE, %r8
217 lea VEC_SIZE(%rsi, %rcx), %rsi
218 and $-(VEC_SIZE * 4), %rsi
222 lea (VEC_SIZE * 8)(%r8, %rdx), %r8
226 vmovdqa VEC_SIZE(%rsi), %ymm5
227 vmovdqa (VEC_SIZE * 2)(%rsi), %ymm6
228 vmovdqa (VEC_SIZE * 3)(%rsi), %ymm7
235 sub $(VEC_SIZE * 4), %r8
242 add $(VEC_SIZE * 4), %rdi
243 add $(VEC_SIZE * 4), %rsi
244 vmovdqu %ymm4, -(VEC_SIZE * 4)(%rdi)
246 vmovdqu %ymm5, -(VEC_SIZE * 3)(%rdi)
247 vmovdqa VEC_SIZE(%rsi), %ymm5
249 vmovdqu %ymm6, -(VEC_SIZE * 2)(%rdi)
250 vmovdqa (VEC_SIZE * 2)(%rsi), %ymm6
251 vmovdqu %ymm7, -VEC_SIZE(%rdi)
252 vmovdqa (VEC_SIZE * 3)(%rsi), %ymm7
258 sub $(VEC_SIZE * 4), %r8
284 vmovdqu %ymm5, VEC_SIZE(%rdi)
285 vmovdqu %ymm6, (VEC_SIZE * 2)(%rdi)
288 lea (VEC_SIZE * 3)(%rdi, %rdx), %rax
290 vmovdqu %ymm7, (VEC_SIZE * 3)(%rdi)
291 add $(VEC_SIZE - 1), %r8
293 lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi
296 add $(VEC_SIZE * 3), %rsi
297 add $(VEC_SIZE * 3), %rdi
305 vmovdqu VEC_SIZE(%rsi), %ymm2
311 cmp $VEC_SIZE, %r8
313 cmp $(VEC_SIZE + 1), %r8
326 cmp $(VEC_SIZE * 2), %r8
328 cmp $((VEC_SIZE * 2) + 1), %r8
335 and $-VEC_SIZE, %rsi
336 and $(VEC_SIZE - 1), %ecx
380 add $VEC_SIZE, %rsi
381 add $VEC_SIZE, %rdi
383 sub $VEC_SIZE, %r8
391 add $VEC_SIZE, %edx
403 add $((VEC_SIZE * 4) - 1), %r8
417 lea VEC_SIZE(%rdi, %rdx), %rax
419 vmovdqu %ymm5, VEC_SIZE(%rdi)
420 add $((VEC_SIZE * 3) - 1), %r8
422 lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi
425 add $VEC_SIZE, %rsi
426 add $VEC_SIZE, %rdi
434 vmovdqu %ymm5, VEC_SIZE(%rdi)
437 lea (VEC_SIZE * 2)(%rdi, %rdx), %rax
439 vmovdqu %ymm6, (VEC_SIZE * 2)(%rdi)
440 add $((VEC_SIZE * 2) - 1), %r8
442 lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi
445 add $(VEC_SIZE * 2), %rsi
446 add $(VEC_SIZE * 2), %rdi
477 add $VEC_SIZE, %r8
489 add $VEC_SIZE, %edx
515 add $VEC_SIZE, %r8
536 add $VEC_SIZE, %rdi
537 add $VEC_SIZE, %rsi
538 sub $VEC_SIZE, %r8
744 vmovdqu -VEC_SIZE(%rsi, %r8), %ymm3
746 vmovdqu %ymm3, -VEC_SIZE(%rdi, %r8)
815 add $(VEC_SIZE - 1), %r8
826 sub $VEC_SIZE, %r8
830 add $VEC_SIZE, %rdi
833 and $(VEC_SIZE - 1), %esi
836 sub $(VEC_SIZE * 4), %r8
841 vmovdqa %ymmZ, VEC_SIZE(%rdi)
842 vmovdqa %ymmZ, (VEC_SIZE * 2)(%rdi)
843 vmovdqa %ymmZ, (VEC_SIZE * 3)(%rdi)
844 add $(VEC_SIZE * 4), %rdi
845 sub $(VEC_SIZE * 4), %r8
849 add $(VEC_SIZE * 2), %r8
852 vmovdqa %ymmZ, VEC_SIZE(%rdi)
853 add $(VEC_SIZE * 2), %rdi
854 sub $VEC_SIZE, %r8
857 add $VEC_SIZE, %rdi
862 add $VEC_SIZE, %r8
865 add $VEC_SIZE, %rdi
870 add $VEC_SIZE, %r8
893 lea (VEC_SIZE * 4)(%r8), %rcx
894 and $-VEC_SIZE, %rcx
895 add $(VEC_SIZE * 3), %r8
898 sub $VEC_SIZE, %r8
900 vmovdqu %ymm5, VEC_SIZE(%rdi)
901 sub $VEC_SIZE, %r8
903 vmovdqu %ymm6, (VEC_SIZE * 2)(%rdi)
904 sub $VEC_SIZE, %r8
906 vmovdqu %ymm7, (VEC_SIZE * 3)(%rdi)
908 lea (VEC_SIZE * 4)(%rdi), %rax
911 movb $0, (VEC_SIZE * 4)(%rdi)
920 add $(VEC_SIZE * 3), %r8
931 add $VEC_SIZE, %rcx
932 sub $VEC_SIZE, %r8
943 vmovdqu %ymm5, VEC_SIZE(%rdi)
944 add $VEC_SIZE, %rcx
945 sub $VEC_SIZE, %r8
956 vmovdqu %ymm6, (VEC_SIZE * 2)(%rdi)
957 lea VEC_SIZE(%rdi, %rcx), %rdi
958 lea VEC_SIZE(%rsi, %rcx), %rsi